Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/entry: Remove exception_enter() from most trap handlers

On 64-bit kernels, we don't need it any more: we handle context
tracking directly on entry from user mode and exit to user mode.

On 32-bit kernels, we don't support context tracking at all, so
these callbacks had no effect.

Note: this doesn't change do_page_fault(). Before we do that,
we need to make sure that there is no code that can page fault
from kernel mode with CONTEXT_USER. The 32-bit fast system call
stack argument code is the only offender I'm aware of right now.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/ae22f4dfebd799c916574089964592be218151f9.1435952415.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andy Lutomirski and committed by
Ingo Molnar
8c84014f 02bc7768

+27 -69
+2 -2
arch/x86/include/asm/traps.h
··· 112 112 asmlinkage void smp_deferred_error_interrupt(void); 113 113 #endif 114 114 115 - extern enum ctx_state ist_enter(struct pt_regs *regs); 116 - extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); 115 + extern void ist_enter(struct pt_regs *regs); 116 + extern void ist_exit(struct pt_regs *regs); 117 117 extern void ist_begin_non_atomic(struct pt_regs *regs); 118 118 extern void ist_end_non_atomic(void); 119 119
+2 -3
arch/x86/kernel/cpu/mcheck/mce.c
··· 1029 1029 { 1030 1030 struct mca_config *cfg = &mca_cfg; 1031 1031 struct mce m, *final; 1032 - enum ctx_state prev_state; 1033 1032 int i; 1034 1033 int worst = 0; 1035 1034 int severity; ··· 1054 1055 int flags = MF_ACTION_REQUIRED; 1055 1056 int lmce = 0; 1056 1057 1057 - prev_state = ist_enter(regs); 1058 + ist_enter(regs); 1058 1059 1059 1060 this_cpu_inc(mce_exception_count); 1060 1061 ··· 1226 1227 local_irq_disable(); 1227 1228 ist_end_non_atomic(); 1228 1229 done: 1229 - ist_exit(regs, prev_state); 1230 + ist_exit(regs); 1230 1231 } 1231 1232 EXPORT_SYMBOL_GPL(do_machine_check); 1232 1233
+2 -3
arch/x86/kernel/cpu/mcheck/p5.c
··· 19 19 /* Machine check handler for Pentium class Intel CPUs: */ 20 20 static void pentium_machine_check(struct pt_regs *regs, long error_code) 21 21 { 22 - enum ctx_state prev_state; 23 22 u32 loaddr, hi, lotype; 24 23 25 - prev_state = ist_enter(regs); 24 + ist_enter(regs); 26 25 27 26 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); 28 27 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); ··· 38 39 39 40 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 40 41 41 - ist_exit(regs, prev_state); 42 + ist_exit(regs); 42 43 } 43 44 44 45 /* Set up machine check reporting for processors with Intel style MCE: */
+2 -2
arch/x86/kernel/cpu/mcheck/winchip.c
··· 15 15 /* Machine check handler for WinChip C6: */ 16 16 static void winchip_machine_check(struct pt_regs *regs, long error_code) 17 17 { 18 - enum ctx_state prev_state = ist_enter(regs); 18 + ist_enter(regs); 19 19 20 20 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 21 21 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 22 22 23 - ist_exit(regs, prev_state); 23 + ist_exit(regs); 24 24 } 25 25 26 26 /* Set up machine check reporting on the Winchip C6 series */
+19 -59
arch/x86/kernel/traps.c
··· 108 108 preempt_count_dec(); 109 109 } 110 110 111 - enum ctx_state ist_enter(struct pt_regs *regs) 111 + void ist_enter(struct pt_regs *regs) 112 112 { 113 - enum ctx_state prev_state; 114 - 115 113 if (user_mode(regs)) { 116 - /* Other than that, we're just an exception. */ 117 - prev_state = exception_enter(); 114 + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 118 115 } else { 119 116 /* 120 117 * We might have interrupted pretty much anything. In ··· 120 123 * but we need to notify RCU. 121 124 */ 122 125 rcu_nmi_enter(); 123 - prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ 124 126 } 125 127 126 128 /* 127 - * We are atomic because we're on the IST stack (or we're on x86_32, 128 - * in which case we still shouldn't schedule). 129 - * 130 - * This must be after exception_enter(), because exception_enter() 131 - * won't do anything if in_interrupt() returns true. 129 + * We are atomic because we're on the IST stack; or we're on 130 + * x86_32, in which case we still shouldn't schedule; or we're 131 + * on x86_64 and entered from user mode, in which case we're 132 + * still atomic unless ist_begin_non_atomic is called. 132 133 */ 133 134 preempt_count_add(HARDIRQ_OFFSET); 134 135 135 136 /* This code is a bit fragile. Test it. */ 136 137 rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); 137 - 138 - return prev_state; 139 138 } 140 139 141 - void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) 140 + void ist_exit(struct pt_regs *regs) 142 141 { 143 - /* Must be before exception_exit. */ 144 142 preempt_count_sub(HARDIRQ_OFFSET); 145 143 146 - if (user_mode(regs)) 147 - return exception_exit(prev_state); 148 - else 144 + if (!user_mode(regs)) 149 145 rcu_nmi_exit(); 150 146 } 151 147 ··· 152 162 * a double fault, it can be safe to schedule. ist_begin_non_atomic() 153 163 * begins a non-atomic section within an ist_enter()/ist_exit() region. 154 164 * Callers are responsible for enabling interrupts themselves inside 155 - * the non-atomic section, and callers must call is_end_non_atomic() 165 + * the non-atomic section, and callers must call ist_end_non_atomic() 156 166 * before ist_exit(). 157 167 */ 158 168 void ist_begin_non_atomic(struct pt_regs *regs) ··· 279 289 static void do_error_trap(struct pt_regs *regs, long error_code, char *str, 280 290 unsigned long trapnr, int signr) 281 291 { 282 - enum ctx_state prev_state = exception_enter(); 283 292 siginfo_t info; 284 293 285 294 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); ··· 289 300 do_trap(trapnr, signr, str, regs, error_code, 290 301 fill_trap_info(regs, signr, trapnr, &info)); 291 302 } 292 - 293 - exception_exit(prev_state); 294 303 } 295 304 296 305 #define DO_ERROR(trapnr, signr, str, name) \ ··· 340 353 } 341 354 #endif 342 355 343 - ist_enter(regs); /* Discard prev_state because we won't return. */ 356 + ist_enter(regs); 344 357 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 345 358 346 359 tsk->thread.error_code = error_code; ··· 360 373 361 374 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) 362 375 { 363 - enum ctx_state prev_state; 364 376 const struct bndcsr *bndcsr; 365 377 siginfo_t *info; 366 378 367 - prev_state = exception_enter(); 368 379 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 369 380 if (notify_die(DIE_TRAP, "bounds", regs, error_code, 370 381 X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) 371 - goto exit; 382 + return; 372 383 conditional_sti(regs); 373 384 374 385 if (!user_mode(regs)) ··· 423 438 die("bounds", regs, error_code); 424 439 } 425 440 426 - exit: 427 - exception_exit(prev_state); 428 441 return; 442 + 429 443 exit_trap: 430 444 /* 431 445 * This path out is for all the cases where we could not ··· 434 450 * time.. 435 451 */ 436 452 do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); 437 - exception_exit(prev_state); 438 453 } 439 454 440 455 dotraplinkage void 441 456 do_general_protection(struct pt_regs *regs, long error_code) 442 457 { 443 458 struct task_struct *tsk; 444 - enum ctx_state prev_state; 445 459 446 - prev_state = exception_enter(); 447 460 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 448 461 conditional_sti(regs); 449 462 450 463 if (v8086_mode(regs)) { 451 464 local_irq_enable(); 452 465 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 453 - goto exit; 466 + return; 454 467 } 455 468 456 469 tsk = current; 457 470 if (!user_mode(regs)) { 458 471 if (fixup_exception(regs)) 459 - goto exit; 472 + return; 460 473 461 474 tsk->thread.error_code = error_code; 462 475 tsk->thread.trap_nr = X86_TRAP_GP; 463 476 if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 464 477 X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) 465 478 die("general protection fault", regs, error_code); 466 - goto exit; 479 + return; 467 480 } 468 481 469 482 tsk->thread.error_code = error_code; ··· 476 495 } 477 496 478 497 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 479 - exit: 480 - exception_exit(prev_state); 481 498 } 482 499 NOKPROBE_SYMBOL(do_general_protection); 483 500 484 501 /* May run on IST stack. */ 485 502 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) 486 503 { 487 - enum ctx_state prev_state; 488 - 489 504 #ifdef CONFIG_DYNAMIC_FTRACE 490 505 /* 491 506 * ftrace must be first, everything else may cause a recursive crash. ··· 494 517 if (poke_int3_handler(regs)) 495 518 return; 496 519 497 - prev_state = ist_enter(regs); 520 + ist_enter(regs); 498 521 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 499 522 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 500 523 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, ··· 521 544 preempt_conditional_cli(regs); 522 545 debug_stack_usage_dec(); 523 546 exit: 524 - ist_exit(regs, prev_state); 547 + ist_exit(regs); 525 548 } 526 549 NOKPROBE_SYMBOL(do_int3); 527 550 ··· 597 620 dotraplinkage void do_debug(struct pt_regs *regs, long error_code) 598 621 { 599 622 struct task_struct *tsk = current; 600 - enum ctx_state prev_state; 601 623 int user_icebp = 0; 602 624 unsigned long dr6; 603 625 int si_code; 604 626 605 - prev_state = ist_enter(regs); 627 + ist_enter(regs); 606 628 607 629 get_debugreg(dr6, 6); 608 630 ··· 676 700 debug_stack_usage_dec(); 677 701 678 702 exit: 679 - ist_exit(regs, prev_state); 703 + ist_exit(regs); 680 704 } 681 705 NOKPROBE_SYMBOL(do_debug); 682 706 ··· 728 752 729 753 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) 730 754 { 731 - enum ctx_state prev_state; 732 - 733 - prev_state = exception_enter(); 734 755 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 735 756 math_error(regs, error_code, X86_TRAP_MF); 736 - exception_exit(prev_state); 737 757 } 738 758 739 759 dotraplinkage void 740 760 do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 741 761 { 742 - enum ctx_state prev_state; 743 - 744 - prev_state = exception_enter(); 745 762 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 746 763 math_error(regs, error_code, X86_TRAP_XF); 747 - exception_exit(prev_state); 748 764 } 749 765 750 766 dotraplinkage void ··· 748 780 dotraplinkage void 749 781 do_device_not_available(struct pt_regs *regs, long error_code) 750 782 { 751 - enum ctx_state prev_state; 752 - 753 - prev_state = exception_enter(); 754 783 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 755 784 BUG_ON(use_eager_fpu()); 756 785 ··· 759 794 760 795 info.regs = regs; 761 796 math_emulate(&info); 762 - exception_exit(prev_state); 763 797 return; 764 798 } 765 799 #endif ··· 766 802 #ifdef CONFIG_X86_32 767 803 conditional_sti(regs); 768 804 #endif 769 - exception_exit(prev_state); 770 805 } 771 806 NOKPROBE_SYMBOL(do_device_not_available); 772 807 ··· 773 810 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 774 811 { 775 812 siginfo_t info; 776 - enum ctx_state prev_state; 777 813 778 - prev_state = exception_enter(); 779 814 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 780 815 local_irq_enable(); 781 816 ··· 786 825 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, 787 826 &info); 788 827 } 789 - exception_exit(prev_state); 790 828 } 791 829 #endif 792 830