Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/asm/entry/64: Fix comments

- Misleading and slightly incorrect comments in "struct pt_regs" are
fixed (four instances).

- Fix incorrect comment atop EMPTY_FRAME macro.

- Explain in more detail what we do with stack layout during hw interrupt.

- Correct comments about "partial stack frame" which are no longer
true.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-3-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/e1f4429c491fe6ceeddb879dea2786e0f8920f9c.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Denys Vlasenko and committed by
Ingo Molnar
e90e147c 76f5df43

+43 -16
+10 -3
arch/x86/include/asm/ptrace.h
··· 31 31 #else /* __i386__ */ 32 32 33 33 struct pt_regs { 34 + /* 35 + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry 36 + * unless syscall needs a complete, fully filled "struct pt_regs". 37 + */ 34 38 unsigned long r15; 35 39 unsigned long r14; 36 40 unsigned long r13; 37 41 unsigned long r12; 38 42 unsigned long bp; 39 43 unsigned long bx; 40 - /* arguments: non interrupts/non tracing syscalls only save up to here*/ 44 + /* These regs are callee-clobbered. Always saved on kernel entry. */ 41 45 unsigned long r11; 42 46 unsigned long r10; 43 47 unsigned long r9; ··· 51 47 unsigned long dx; 52 48 unsigned long si; 53 49 unsigned long di; 50 + /* 51 + * On syscall entry, this is syscall#. On CPU exception, this is error code. 52 + * On hw interrupt, it's IRQ number: 53 + */ 54 54 unsigned long orig_ax; 55 - /* end of arguments */ 56 - /* cpu exception frame or undefined */ 55 + /* Return frame for iretq */ 57 56 unsigned long ip; 58 57 unsigned long cs; 59 58 unsigned long flags;
+11 -4
arch/x86/include/uapi/asm/ptrace-abi.h
··· 25 25 #else /* __i386__ */ 26 26 27 27 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) 28 + /* 29 + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry 30 + * unless syscall needs a complete, fully filled "struct pt_regs". 31 + */ 28 32 #define R15 0 29 33 #define R14 8 30 34 #define R13 16 31 35 #define R12 24 32 36 #define RBP 32 33 37 #define RBX 40 34 - /* arguments: interrupts/non tracing syscalls only save up to here*/ 38 + /* These regs are callee-clobbered. Always saved on kernel entry. */ 35 39 #define R11 48 36 40 #define R10 56 37 41 #define R9 64 ··· 45 41 #define RDX 96 46 42 #define RSI 104 47 43 #define RDI 112 48 - #define ORIG_RAX 120 /* = ERROR */ 49 - /* end of arguments */ 50 - /* cpu exception frame or undefined in case of fast syscall. */ 44 + /* 45 + * On syscall entry, this is syscall#. On CPU exception, this is error code. 46 + * On hw interrupt, it's IRQ number: 47 + */ 48 + #define ORIG_RAX 120 49 + /* Return frame for iretq */ 51 50 #define RIP 128 52 51 #define CS 136 53 52 #define EFLAGS 144
+10 -3
arch/x86/include/uapi/asm/ptrace.h
··· 41 41 #ifndef __KERNEL__ 42 42 43 43 struct pt_regs { 44 + /* 45 + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry 46 + * unless syscall needs a complete, fully filled "struct pt_regs". 47 + */ 44 48 unsigned long r15; 45 49 unsigned long r14; 46 50 unsigned long r13; 47 51 unsigned long r12; 48 52 unsigned long rbp; 49 53 unsigned long rbx; 50 - /* arguments: non interrupts/non tracing syscalls only save up to here*/ 54 + /* These regs are callee-clobbered. Always saved on kernel entry. */ 51 55 unsigned long r11; 52 56 unsigned long r10; 53 57 unsigned long r9; ··· 61 57 unsigned long rdx; 62 58 unsigned long rsi; 63 59 unsigned long rdi; 60 + /* 61 + * On syscall entry, this is syscall#. On CPU exception, this is error code. 62 + * On hw interrupt, it's IRQ number: 63 + */ 64 64 unsigned long orig_rax; 65 - /* end of arguments */ 66 - /* cpu exception frame or undefined */ 65 + /* Return frame for iretq */ 67 66 unsigned long rip; 68 67 unsigned long cs; 69 68 unsigned long eflags;
+12 -6
arch/x86/kernel/entry_64.S
··· 14 14 * NOTE: This code handles signal-recognition, which happens every time 15 15 * after an interrupt and after each system call. 16 16 * 17 - * Normal syscalls and interrupts don't save a full stack frame, this is 18 - * only done for syscall tracing, signals or fork/exec et.al. 19 - * 20 17 * A note on terminology: 21 18 * - top of stack: Architecture defined interrupt frame from SS to RIP 22 19 * at the top of the kernel process stack. ··· 148 151 .endm 149 152 150 153 /* 151 - * initial frame state for interrupts (and exceptions without error code) 154 + * empty frame 152 155 */ 153 156 .macro EMPTY_FRAME start=1 offset=0 154 157 .if \start ··· 376 379 call syscall_trace_enter_phase2 377 380 378 381 /* 379 - * Reload arg registers from stack in case ptrace changed them. 382 + * Reload registers from stack in case ptrace changed them. 380 383 * We don't reload %rax because syscall_trace_entry_phase2() returned 381 384 * the value it wants us to use in the table lookup. 382 385 */ ··· 626 629 /* 0(%rsp): ~(interrupt number) */ 627 630 .macro interrupt func 628 631 cld 632 + /* 633 + * Since nothing in interrupt handling code touches r12...r15 members 634 + * of "struct pt_regs", and since interrupts can nest, we can save 635 + * four stack slots and simultaneously provide 636 + * an unwind-friendly stack layout by saving "truncated" pt_regs 637 + * exactly up to rbp slot, without these members. 638 + */ 629 639 ALLOC_PT_GPREGS_ON_STACK -RBP 630 640 SAVE_C_REGS -RBP 631 641 /* this goes to 0(%rsp) for unwinder, not for saving the value: */ ··· 645 641 SWAPGS 646 642 1: 647 643 /* 644 + * Save previous stack pointer, optionally switch to interrupt stack. 648 645 * irq_count is used to check if a CPU is already on an interrupt stack 649 646 * or not. While this is essentially redundant with preempt_count it is 650 647 * a little cheaper to use a separate counter in the PDA (short of ··· 686 681 /* Restore saved previous stack */ 687 682 popq %rsi 688 683 CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ 684 + /* return code expects complete pt_regs - adjust rsp accordingly: */ 689 685 leaq ARGOFFSET-RBP(%rsi), %rsp 690 686 CFI_DEF_CFA_REGISTER rsp 691 687 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET ··· 698 692 699 693 /* Interrupt came from user space */ 700 694 /* 701 - * Has a correct top of stack, but a partial stack frame 695 + * Has a correct top of stack. 702 696 * %rcx: thread info. Interrupts off. 703 697 */ 704 698 retint_with_reschedule: