Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack

The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.

Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.

As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.

This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:

- ALLOC_PT_GPREGS_ON_STACK allocates the structure.

- SAVE_C_REGS saves to it those registers which are clobbered
by C code.

- SAVE_EXTRA_REGS saves to it all other registers.

- Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
reverse it.

'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.

Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Denys Vlasenko and committed by
Ingo Molnar
76f5df43 6e1327bd

+215 -265
+25 -22
arch/x86/ia32/ia32entry.S
··· 62 62 */ 63 63 .macro LOAD_ARGS32 offset, _r9=0 64 64 .if \_r9 65 - movl \offset+16(%rsp),%r9d 65 + movl \offset+R9(%rsp),%r9d 66 66 .endif 67 - movl \offset+40(%rsp),%ecx 68 - movl \offset+48(%rsp),%edx 69 - movl \offset+56(%rsp),%esi 70 - movl \offset+64(%rsp),%edi 67 + movl \offset+RCX(%rsp),%ecx 68 + movl \offset+RDX(%rsp),%edx 69 + movl \offset+RSI(%rsp),%esi 70 + movl \offset+RDI(%rsp),%edi 71 71 movl %eax,%eax /* zero extension */ 72 72 .endm 73 73 ··· 144 144 CFI_REL_OFFSET rip,0 145 145 pushq_cfi %rax 146 146 cld 147 - SAVE_ARGS 0,1,0 147 + ALLOC_PT_GPREGS_ON_STACK 148 + SAVE_C_REGS_EXCEPT_R891011 148 149 /* no need to do an access_ok check here because rbp has been 149 150 32bit zero extended */ 150 151 ASM_STAC ··· 183 182 andl $~0x200,EFLAGS-ARGOFFSET(%rsp) 184 183 movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ 185 184 CFI_REGISTER rip,rdx 186 - RESTORE_ARGS 0,24,0,0,0,0 185 + RESTORE_RSI_RDI 186 + REMOVE_PT_GPREGS_FROM_STACK 3*8 187 187 xorq %r8,%r8 188 188 xorq %r9,%r9 189 189 xorq %r10,%r10 ··· 258 256 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 259 257 jz sysenter_auditsys 260 258 #endif 261 - SAVE_REST 259 + SAVE_EXTRA_REGS 262 260 CLEAR_RREGS 263 261 movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ 264 262 movq %rsp,%rdi /* &pt_regs -> arg1 */ 265 263 call syscall_trace_enter 266 264 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 267 - RESTORE_REST 265 + RESTORE_EXTRA_REGS 268 266 cmpq $(IA32_NR_syscalls-1),%rax 269 267 ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ 270 268 jmp sysenter_do_call ··· 306 304 * disabled irqs and here we enable it straight after entry: 307 305 */ 308 306 ENABLE_INTERRUPTS(CLBR_NONE) 309 - SAVE_ARGS 8,0,0 307 + ALLOC_PT_GPREGS_ON_STACK 8 308 + SAVE_C_REGS_EXCEPT_RCX_R891011 310 309 movl %eax,%eax /* zero extension */ 311 310 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 312 311 movq %rcx,RIP-ARGOFFSET(%rsp) ··· 344 341 jnz sysretl_audit 345 342 sysretl_from_sys_call: 346 343 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 347 - RESTORE_ARGS 0,-ARG_SKIP,0,0,0 344 + RESTORE_RSI_RDI_RDX 348 345 movl RIP-ARGOFFSET(%rsp),%ecx 349 346 CFI_REGISTER rip,rcx 350 347 movl EFLAGS-ARGOFFSET(%rsp),%r11d ··· 375 372 jz cstar_auditsys 376 373 #endif 377 374 xchgl %r9d,%ebp 378 - SAVE_REST 375 + SAVE_EXTRA_REGS 379 376 CLEAR_RREGS 0, r9 380 377 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 381 378 movq %rsp,%rdi /* &pt_regs -> arg1 */ 382 379 call syscall_trace_enter 383 380 LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ 384 - RESTORE_REST 381 + RESTORE_EXTRA_REGS 385 382 xchgl %ebp,%r9d 386 383 cmpq $(IA32_NR_syscalls-1),%rax 387 384 ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ ··· 436 433 cld 437 434 /* note the registers are not zero extended to the sf. 438 435 this could be a problem. */ 439 - SAVE_ARGS 0,1,0 436 + ALLOC_PT_GPREGS_ON_STACK 437 + SAVE_C_REGS_EXCEPT_R891011 440 438 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 441 439 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 442 440 jnz ia32_tracesys ··· 450 446 movq %rax,RAX-ARGOFFSET(%rsp) 451 447 ia32_ret_from_sys_call: 452 448 CLEAR_RREGS -ARGOFFSET 453 - jmp int_ret_from_sys_call 449 + jmp int_ret_from_sys_call 454 450 455 - ia32_tracesys: 456 - SAVE_REST 451 + ia32_tracesys: 452 + SAVE_EXTRA_REGS 457 453 CLEAR_RREGS 458 454 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 459 455 movq %rsp,%rdi /* &pt_regs -> arg1 */ 460 456 call syscall_trace_enter 461 457 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 462 - RESTORE_REST 458 + RESTORE_EXTRA_REGS 463 459 cmpq $(IA32_NR_syscalls-1),%rax 464 460 ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ 465 461 jmp ia32_do_call ··· 496 492 497 493 ALIGN 498 494 ia32_ptregs_common: 499 - popq %r11 500 495 CFI_ENDPROC 501 496 CFI_STARTPROC32 simple 502 497 CFI_SIGNAL_FRAME ··· 510 507 /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ 511 508 CFI_REL_OFFSET rsp,RSP-ARGOFFSET 512 509 /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ 513 - SAVE_REST 510 + SAVE_EXTRA_REGS 8 514 511 call *%rax 515 - RESTORE_REST 516 - jmp ia32_sysret /* misbalances the return cache */ 512 + RESTORE_EXTRA_REGS 8 513 + ret 517 514 CFI_ENDPROC 518 515 END(ia32_ptregs_common)
+114 -120
arch/x86/include/asm/calling.h
··· 55 55 * for assembly code: 56 56 */ 57 57 58 - #define R15 0 59 - #define R14 8 60 - #define R13 16 61 - #define R12 24 62 - #define RBP 32 63 - #define RBX 40 58 + /* The layout forms the "struct pt_regs" on the stack: */ 59 + /* 60 + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry 61 + * unless syscall needs a complete, fully filled "struct pt_regs". 62 + */ 63 + #define R15 0*8 64 + #define R14 1*8 65 + #define R13 2*8 66 + #define R12 3*8 67 + #define RBP 4*8 68 + #define RBX 5*8 69 + /* These regs are callee-clobbered. Always saved on kernel entry. */ 70 + #define R11 6*8 71 + #define R10 7*8 72 + #define R9 8*8 73 + #define R8 9*8 74 + #define RAX 10*8 75 + #define RCX 11*8 76 + #define RDX 12*8 77 + #define RSI 13*8 78 + #define RDI 14*8 79 + /* 80 + * On syscall entry, this is syscall#. On CPU exception, this is error code. 81 + * On hw interrupt, it's IRQ number: 82 + */ 83 + #define ORIG_RAX 15*8 84 + /* Return frame for iretq */ 85 + #define RIP 16*8 86 + #define CS 17*8 87 + #define EFLAGS 18*8 88 + #define RSP 19*8 89 + #define SS 20*8 64 90 65 - /* arguments: interrupts/non tracing syscalls only save up to here: */ 66 - #define R11 48 67 - #define R10 56 68 - #define R9 64 69 - #define R8 72 70 - #define RAX 80 71 - #define RCX 88 72 - #define RDX 96 73 - #define RSI 104 74 - #define RDI 112 75 - #define ORIG_RAX 120 /* + error_code */ 76 - /* end of arguments */ 91 + #define ARGOFFSET 0 77 92 78 - /* cpu exception frame or undefined in case of fast syscall: */ 79 - #define RIP 128 80 - #define CS 136 81 - #define EFLAGS 144 82 - #define RSP 152 83 - #define SS 160 84 - 85 - #define ARGOFFSET R11 86 - 87 - .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 88 - subq $9*8+\addskip, %rsp 89 - CFI_ADJUST_CFA_OFFSET 9*8+\addskip 90 - movq_cfi rdi, 8*8 91 - movq_cfi rsi, 7*8 92 - movq_cfi rdx, 6*8 93 - 94 - .if \save_rcx 95 - movq_cfi rcx, 5*8 96 - .endif 97 - 98 - .if \rax_enosys 99 - movq $-ENOSYS, 4*8(%rsp) 100 - .else 101 - movq_cfi rax, 4*8 102 - .endif 103 - 104 - .if \save_r891011 105 - movq_cfi r8, 3*8 106 - movq_cfi r9, 2*8 107 - movq_cfi r10, 1*8 108 - movq_cfi r11, 0*8 109 - .endif 110 - 93 + .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 94 + subq $15*8+\addskip, %rsp 95 + CFI_ADJUST_CFA_OFFSET 15*8+\addskip 111 96 .endm 112 97 113 - #define ARG_SKIP (9*8) 98 + .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1 99 + .if \r8plus 100 + movq_cfi r11, 6*8+\offset 101 + movq_cfi r10, 7*8+\offset 102 + movq_cfi r9, 8*8+\offset 103 + movq_cfi r8, 9*8+\offset 104 + .endif 105 + .if \rax 106 + movq_cfi rax, 10*8+\offset 107 + .endif 108 + .if \rcx 109 + movq_cfi rcx, 11*8+\offset 110 + .endif 111 + movq_cfi rdx, 12*8+\offset 112 + movq_cfi rsi, 13*8+\offset 113 + movq_cfi rdi, 14*8+\offset 114 + .endm 115 + .macro SAVE_C_REGS offset=0 116 + SAVE_C_REGS_HELPER \offset, 1, 1, 1 117 + .endm 118 + .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 119 + SAVE_C_REGS_HELPER \offset, 0, 0, 1 120 + .endm 121 + .macro SAVE_C_REGS_EXCEPT_R891011 122 + SAVE_C_REGS_HELPER 0, 1, 1, 0 123 + .endm 124 + .macro SAVE_C_REGS_EXCEPT_RCX_R891011 125 + SAVE_C_REGS_HELPER 0, 1, 0, 0 126 + .endm 114 127 115 - .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ 116 - rstor_r8910=1, rstor_rdx=1 128 + .macro SAVE_EXTRA_REGS offset=0 129 + movq_cfi r15, 0*8+\offset 130 + movq_cfi r14, 1*8+\offset 131 + movq_cfi r13, 2*8+\offset 132 + movq_cfi r12, 3*8+\offset 133 + movq_cfi rbp, 4*8+\offset 134 + movq_cfi rbx, 5*8+\offset 135 + .endm 136 + .macro SAVE_EXTRA_REGS_RBP offset=0 137 + movq_cfi rbp, 4*8+\offset 138 + .endm 139 + 140 + .macro RESTORE_EXTRA_REGS offset=0 141 + movq_cfi_restore 0*8+\offset, r15 142 + movq_cfi_restore 1*8+\offset, r14 143 + movq_cfi_restore 2*8+\offset, r13 144 + movq_cfi_restore 3*8+\offset, r12 145 + movq_cfi_restore 4*8+\offset, rbp 146 + movq_cfi_restore 5*8+\offset, rbx 147 + .endm 148 + 149 + .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 117 150 .if \rstor_r11 118 - movq_cfi_restore 0*8, r11 151 + movq_cfi_restore 6*8, r11 119 152 .endif 120 - 121 153 .if \rstor_r8910 122 - movq_cfi_restore 1*8, r10 123 - movq_cfi_restore 2*8, r9 124 - movq_cfi_restore 3*8, r8 154 + movq_cfi_restore 7*8, r10 155 + movq_cfi_restore 8*8, r9 156 + movq_cfi_restore 9*8, r8 125 157 .endif 126 - 127 158 .if \rstor_rax 128 - movq_cfi_restore 4*8, rax 159 + movq_cfi_restore 10*8, rax 129 160 .endif 130 - 131 161 .if \rstor_rcx 132 - movq_cfi_restore 5*8, rcx 162 + movq_cfi_restore 11*8, rcx 133 163 .endif 134 - 135 164 .if \rstor_rdx 136 - movq_cfi_restore 6*8, rdx 165 + movq_cfi_restore 12*8, rdx 137 166 .endif 138 - 139 - movq_cfi_restore 7*8, rsi 140 - movq_cfi_restore 8*8, rdi 141 - 142 - .if ARG_SKIP+\addskip > 0 143 - addq $ARG_SKIP+\addskip, %rsp 144 - CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) 145 - .endif 167 + movq_cfi_restore 13*8, rsi 168 + movq_cfi_restore 14*8, rdi 169 + .endm 170 + .macro RESTORE_C_REGS 171 + RESTORE_C_REGS_HELPER 1,1,1,1,1 172 + .endm 173 + .macro RESTORE_C_REGS_EXCEPT_RAX 174 + RESTORE_C_REGS_HELPER 0,1,1,1,1 175 + .endm 176 + .macro RESTORE_C_REGS_EXCEPT_RCX 177 + RESTORE_C_REGS_HELPER 1,0,1,1,1 178 + .endm 179 + .macro RESTORE_RSI_RDI 180 + RESTORE_C_REGS_HELPER 0,0,0,0,0 181 + .endm 182 + .macro RESTORE_RSI_RDI_RDX 183 + RESTORE_C_REGS_HELPER 0,0,0,0,1 146 184 .endm 147 185 148 - .macro LOAD_ARGS offset, skiprax=0 149 - movq \offset(%rsp), %r11 150 - movq \offset+8(%rsp), %r10 151 - movq \offset+16(%rsp), %r9 152 - movq \offset+24(%rsp), %r8 153 - movq \offset+40(%rsp), %rcx 154 - movq \offset+48(%rsp), %rdx 155 - movq \offset+56(%rsp), %rsi 156 - movq \offset+64(%rsp), %rdi 157 - .if \skiprax 158 - .else 159 - movq \offset+72(%rsp), %rax 160 - .endif 161 - .endm 162 - 163 - #define REST_SKIP (6*8) 164 - 165 - .macro SAVE_REST 166 - subq $REST_SKIP, %rsp 167 - CFI_ADJUST_CFA_OFFSET REST_SKIP 168 - movq_cfi rbx, 5*8 169 - movq_cfi rbp, 4*8 170 - movq_cfi r12, 3*8 171 - movq_cfi r13, 2*8 172 - movq_cfi r14, 1*8 173 - movq_cfi r15, 0*8 174 - .endm 175 - 176 - .macro RESTORE_REST 177 - movq_cfi_restore 0*8, r15 178 - movq_cfi_restore 1*8, r14 179 - movq_cfi_restore 2*8, r13 180 - movq_cfi_restore 3*8, r12 181 - movq_cfi_restore 4*8, rbp 182 - movq_cfi_restore 5*8, rbx 183 - addq $REST_SKIP, %rsp 184 - CFI_ADJUST_CFA_OFFSET -(REST_SKIP) 185 - .endm 186 - 187 - .macro SAVE_ALL 188 - SAVE_ARGS 189 - SAVE_REST 190 - .endm 191 - 192 - .macro RESTORE_ALL addskip=0 193 - RESTORE_REST 194 - RESTORE_ARGS 1, \addskip 186 + .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 187 + addq $15*8+\addskip, %rsp 188 + CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) 195 189 .endm 196 190 197 191 .macro icebp
+2 -2
arch/x86/include/asm/irqflags.h
··· 171 171 #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ 172 172 TRACE_IRQS_ON; \ 173 173 sti; \ 174 - SAVE_REST; \ 174 + SAVE_EXTRA_REGS; \ 175 175 LOCKDEP_SYS_EXIT; \ 176 - RESTORE_REST; \ 176 + RESTORE_EXTRA_REGS; \ 177 177 cli; \ 178 178 TRACE_IRQS_OFF; 179 179
-1
arch/x86/include/uapi/asm/ptrace-abi.h
··· 49 49 #define EFLAGS 144 50 50 #define RSP 152 51 51 #define SS 160 52 - #define ARGOFFSET R11 53 52 #endif /* __ASSEMBLY__ */ 54 53 55 54 /* top of stack page */
+74 -120
arch/x86/kernel/entry_64.S
··· 26 26 * Some macro usage: 27 27 * - CFI macros are used to generate dwarf2 unwind information for better 28 28 * backtraces. They don't change any code. 29 - * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 30 - * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 31 - * There are unfortunately lots of special cases where some registers 32 - * not touched. The macro is a big mess that should be cleaned up. 33 - * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 34 - * Gives a full stack frame. 35 29 * - ENTRY/END Define functions in the symbol table. 36 30 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 37 31 * frame that is otherwise undefined after a SYSCALL ··· 184 190 .endm 185 191 186 192 /* 187 - * frame that enables calling into C. 193 + * frame that enables passing a complete pt_regs to a C function. 188 194 */ 189 - .macro PARTIAL_FRAME start=1 offset=0 195 + .macro DEFAULT_FRAME start=1 offset=0 190 196 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET 191 197 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET 192 198 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET ··· 197 203 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET 198 204 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET 199 205 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET 200 - .endm 201 - 202 - /* 203 - * frame that enables passing a complete pt_regs to a C function. 204 - */ 205 - .macro DEFAULT_FRAME start=1 offset=0 206 - PARTIAL_FRAME \start, R11+\offset-R15 207 206 CFI_REL_OFFSET rbx, RBX+\offset 208 207 CFI_REL_OFFSET rbp, RBP+\offset 209 208 CFI_REL_OFFSET r12, R12+\offset ··· 208 221 ENTRY(save_paranoid) 209 222 XCPT_FRAME 1 RDI+8 210 223 cld 211 - movq %rdi, RDI+8(%rsp) 212 - movq %rsi, RSI+8(%rsp) 213 - movq_cfi rdx, RDX+8 214 - movq_cfi rcx, RCX+8 215 - movq_cfi rax, RAX+8 216 - movq %r8, R8+8(%rsp) 217 - movq %r9, R9+8(%rsp) 218 - movq %r10, R10+8(%rsp) 219 - movq %r11, R11+8(%rsp) 220 - movq_cfi rbx, RBX+8 221 - movq %rbp, RBP+8(%rsp) 222 - movq %r12, R12+8(%rsp) 223 - movq %r13, R13+8(%rsp) 224 - movq %r14, R14+8(%rsp) 225 - movq %r15, R15+8(%rsp) 224 + SAVE_C_REGS 8 225 + SAVE_EXTRA_REGS 8 226 226 movl $1,%ebx 227 227 movl $MSR_GS_BASE,%ecx 228 228 rdmsr ··· 238 264 239 265 GET_THREAD_INFO(%rcx) 240 266 241 - RESTORE_REST 267 + RESTORE_EXTRA_REGS 242 268 243 269 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 244 270 jz 1f ··· 250 276 jmp ret_from_sys_call # go to the SYSRET fastpath 251 277 252 278 1: 253 - subq $REST_SKIP, %rsp # leave space for volatiles 254 - CFI_ADJUST_CFA_OFFSET REST_SKIP 255 279 movq %rbp, %rdi 256 280 call *%rbx 257 281 movl $0, RAX(%rsp) 258 - RESTORE_REST 282 + RESTORE_EXTRA_REGS 259 283 jmp int_ret_from_sys_call 260 284 CFI_ENDPROC 261 285 END(ret_from_fork) ··· 311 339 * and short: 312 340 */ 313 341 ENABLE_INTERRUPTS(CLBR_NONE) 314 - SAVE_ARGS 8, 0, rax_enosys=1 342 + ALLOC_PT_GPREGS_ON_STACK 8 343 + SAVE_C_REGS_EXCEPT_RAX_RCX 344 + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 315 345 movq_cfi rax,(ORIG_RAX-ARGOFFSET) 316 - movq %rcx,RIP-ARGOFFSET(%rsp) 346 + movq %rcx,RIP-ARGOFFSET(%rsp) 317 347 CFI_REL_OFFSET rip,RIP-ARGOFFSET 318 348 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 319 349 jnz tracesys ··· 346 372 * sysretq will re-enable interrupts: 347 373 */ 348 374 TRACE_IRQS_ON 375 + RESTORE_C_REGS_EXCEPT_RCX 349 376 movq RIP-ARGOFFSET(%rsp),%rcx 350 377 CFI_REGISTER rip,rcx 351 - RESTORE_ARGS 1,-ARG_SKIP,0 352 378 /*CFI_REGISTER rflags,r11*/ 353 379 movq PER_CPU_VAR(old_rsp), %rsp 354 380 USERGS_SYSRET64 ··· 361 387 362 388 /* Do syscall tracing */ 363 389 tracesys: 364 - leaq -REST_SKIP(%rsp), %rdi 390 + movq %rsp, %rdi 365 391 movq $AUDIT_ARCH_X86_64, %rsi 366 392 call syscall_trace_enter_phase1 367 393 test %rax, %rax 368 394 jnz tracesys_phase2 /* if needed, run the slow path */ 369 - LOAD_ARGS 0 /* else restore clobbered regs */ 395 + RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ 396 + movq ORIG_RAX-ARGOFFSET(%rsp), %rax 370 397 jmp system_call_fastpath /* and return to the fast path */ 371 398 372 399 tracesys_phase2: 373 - SAVE_REST 400 + SAVE_EXTRA_REGS 374 401 FIXUP_TOP_OF_STACK %rdi 375 402 movq %rsp, %rdi 376 403 movq $AUDIT_ARCH_X86_64, %rsi ··· 383 408 * We don't reload %rax because syscall_trace_entry_phase2() returned 384 409 * the value it wants us to use in the table lookup. 385 410 */ 386 - LOAD_ARGS ARGOFFSET, 1 387 - RESTORE_REST 411 + RESTORE_C_REGS_EXCEPT_RAX 412 + RESTORE_EXTRA_REGS 388 413 #if __SYSCALL_MASK == ~0 389 414 cmpq $__NR_syscall_max,%rax 390 415 #else ··· 435 460 TRACE_IRQS_ON 436 461 ENABLE_INTERRUPTS(CLBR_NONE) 437 462 int_check_syscall_exit_work: 438 - SAVE_REST 463 + SAVE_EXTRA_REGS 439 464 /* Check for syscall exit trace */ 440 465 testl $_TIF_WORK_SYSCALL_EXIT,%edx 441 466 jz int_signal ··· 454 479 call do_notify_resume 455 480 1: movl $_TIF_WORK_MASK,%edi 456 481 int_restore_rest: 457 - RESTORE_REST 482 + RESTORE_EXTRA_REGS 458 483 DISABLE_INTERRUPTS(CLBR_NONE) 459 484 TRACE_IRQS_OFF 460 485 jmp int_with_check ··· 464 489 .macro FORK_LIKE func 465 490 ENTRY(stub_\func) 466 491 CFI_STARTPROC 467 - popq %r11 /* save return address */ 468 - PARTIAL_FRAME 0 469 - SAVE_REST 470 - pushq %r11 /* put it back on stack */ 492 + DEFAULT_FRAME 0, 8 /* offset 8: return address */ 493 + SAVE_EXTRA_REGS 8 471 494 FIXUP_TOP_OF_STACK %r11, 8 472 - DEFAULT_FRAME 0 8 /* offset 8: return address */ 473 495 call sys_\func 474 496 RESTORE_TOP_OF_STACK %r11, 8 475 - ret $REST_SKIP /* pop extended registers */ 497 + ret 476 498 CFI_ENDPROC 477 499 END(stub_\func) 478 500 .endm ··· 477 505 .macro FIXED_FRAME label,func 478 506 ENTRY(\label) 479 507 CFI_STARTPROC 480 - PARTIAL_FRAME 0 8 /* offset 8: return address */ 508 + DEFAULT_FRAME 0, 8 /* offset 8: return address */ 481 509 FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET 482 510 call \func 483 511 RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET ··· 494 522 ENTRY(stub_execve) 495 523 CFI_STARTPROC 496 524 addq $8, %rsp 497 - PARTIAL_FRAME 0 498 - SAVE_REST 525 + DEFAULT_FRAME 0 526 + SAVE_EXTRA_REGS 499 527 FIXUP_TOP_OF_STACK %r11 500 528 call sys_execve 501 529 movq %rax,RAX(%rsp) 502 - RESTORE_REST 530 + RESTORE_EXTRA_REGS 503 531 jmp int_ret_from_sys_call 504 532 CFI_ENDPROC 505 533 END(stub_execve) ··· 507 535 ENTRY(stub_execveat) 508 536 CFI_STARTPROC 509 537 addq $8, %rsp 510 - PARTIAL_FRAME 0 511 - SAVE_REST 538 + DEFAULT_FRAME 0 539 + SAVE_EXTRA_REGS 512 540 FIXUP_TOP_OF_STACK %r11 513 541 call sys_execveat 514 542 RESTORE_TOP_OF_STACK %r11 515 543 movq %rax,RAX(%rsp) 516 - RESTORE_REST 544 + RESTORE_EXTRA_REGS 517 545 jmp int_ret_from_sys_call 518 546 CFI_ENDPROC 519 547 END(stub_execveat) ··· 525 553 ENTRY(stub_rt_sigreturn) 526 554 CFI_STARTPROC 527 555 addq $8, %rsp 528 - PARTIAL_FRAME 0 529 - SAVE_REST 556 + DEFAULT_FRAME 0 557 + SAVE_EXTRA_REGS 530 558 FIXUP_TOP_OF_STACK %r11 531 559 call sys_rt_sigreturn 532 560 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 533 - RESTORE_REST 561 + RESTORE_EXTRA_REGS 534 562 jmp int_ret_from_sys_call 535 563 CFI_ENDPROC 536 564 END(stub_rt_sigreturn) ··· 539 567 ENTRY(stub_x32_rt_sigreturn) 540 568 CFI_STARTPROC 541 569 addq $8, %rsp 542 - PARTIAL_FRAME 0 543 - SAVE_REST 570 + DEFAULT_FRAME 0 571 + SAVE_EXTRA_REGS 544 572 FIXUP_TOP_OF_STACK %r11 545 573 call sys32_x32_rt_sigreturn 546 574 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 547 - RESTORE_REST 575 + RESTORE_EXTRA_REGS 548 576 jmp int_ret_from_sys_call 549 577 CFI_ENDPROC 550 578 END(stub_x32_rt_sigreturn) ··· 552 580 ENTRY(stub_x32_execve) 553 581 CFI_STARTPROC 554 582 addq $8, %rsp 555 - PARTIAL_FRAME 0 556 - SAVE_REST 583 + DEFAULT_FRAME 0 584 + SAVE_EXTRA_REGS 557 585 FIXUP_TOP_OF_STACK %r11 558 586 call compat_sys_execve 559 587 RESTORE_TOP_OF_STACK %r11 560 588 movq %rax,RAX(%rsp) 561 - RESTORE_REST 589 + RESTORE_EXTRA_REGS 562 590 jmp int_ret_from_sys_call 563 591 CFI_ENDPROC 564 592 END(stub_x32_execve) ··· 566 594 ENTRY(stub_x32_execveat) 567 595 CFI_STARTPROC 568 596 addq $8, %rsp 569 - PARTIAL_FRAME 0 570 - SAVE_REST 597 + DEFAULT_FRAME 0 598 + SAVE_EXTRA_REGS 571 599 FIXUP_TOP_OF_STACK %r11 572 600 call compat_sys_execveat 573 601 RESTORE_TOP_OF_STACK %r11 574 602 movq %rax,RAX(%rsp) 575 - RESTORE_REST 603 + RESTORE_EXTRA_REGS 576 604 jmp int_ret_from_sys_call 577 605 CFI_ENDPROC 578 606 END(stub_x32_execveat) ··· 628 656 629 657 /* 0(%rsp): ~(interrupt number) */ 630 658 .macro interrupt func 631 - /* reserve pt_regs for scratch regs and rbp */ 632 - subq $ORIG_RAX-RBP, %rsp 633 - CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 634 659 cld 635 - /* start from rbp in pt_regs and jump over */ 636 - movq_cfi rdi, (RDI-RBP) 637 - movq_cfi rsi, (RSI-RBP) 638 - movq_cfi rdx, (RDX-RBP) 639 - movq_cfi rcx, (RCX-RBP) 640 - movq_cfi rax, (RAX-RBP) 641 - movq_cfi r8, (R8-RBP) 642 - movq_cfi r9, (R9-RBP) 643 - movq_cfi r10, (R10-RBP) 644 - movq_cfi r11, (R11-RBP) 660 + ALLOC_PT_GPREGS_ON_STACK -RBP 661 + SAVE_C_REGS -RBP 662 + /* this goes to 0(%rsp) for unwinder, not for saving the value: */ 663 + SAVE_EXTRA_REGS_RBP -RBP 645 664 646 - /* Save rbp so that we can unwind from get_irq_regs() */ 647 - movq_cfi rbp, 0 665 + leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ 648 666 649 - /* Save previous stack value */ 650 - movq %rsp, %rsi 651 - 652 - leaq -RBP(%rsp),%rdi /* arg1 for handler */ 653 - testl $3, CS-RBP(%rsi) 667 + testl $3, CS-RBP(%rsp) 654 668 je 1f 655 669 SWAPGS 670 + 1: 656 671 /* 657 672 * irq_count is used to check if a CPU is already on an interrupt stack 658 673 * or not. While this is essentially redundant with preempt_count it is 659 674 * a little cheaper to use a separate counter in the PDA (short of 660 675 * moving irq_enter into assembly, which would be too much work) 661 676 */ 662 - 1: incl PER_CPU_VAR(irq_count) 677 + movq %rsp, %rsi 678 + incl PER_CPU_VAR(irq_count) 663 679 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 664 680 CFI_DEF_CFA_REGISTER rsi 665 - 666 - /* Store previous stack value */ 667 681 pushq %rsi 668 682 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ 669 683 0x77 /* DW_OP_breg7 */, 0, \ ··· 758 800 */ 759 801 irq_return_via_sysret: 760 802 CFI_REMEMBER_STATE 761 - RESTORE_ARGS 1,8,1 803 + RESTORE_C_REGS 804 + REMOVE_PT_GPREGS_FROM_STACK 8 762 805 movq (RSP-RIP)(%rsp),%rsp 763 806 USERGS_SYSRET64 764 807 CFI_RESTORE_STATE ··· 775 816 */ 776 817 TRACE_IRQS_IRETQ 777 818 restore_args: 778 - RESTORE_ARGS 1,8,1 819 + RESTORE_C_REGS 820 + REMOVE_PT_GPREGS_FROM_STACK 8 779 821 780 822 irq_return: 781 823 INTERRUPT_RETURN ··· 847 887 jz retint_swapgs 848 888 TRACE_IRQS_ON 849 889 ENABLE_INTERRUPTS(CLBR_NONE) 850 - SAVE_REST 890 + SAVE_EXTRA_REGS 851 891 movq $-1,ORIG_RAX(%rsp) 852 892 xorl %esi,%esi # oldset 853 893 movq %rsp,%rdi # &pt_regs 854 894 call do_notify_resume 855 - RESTORE_REST 895 + RESTORE_EXTRA_REGS 856 896 DISABLE_INTERRUPTS(CLBR_NONE) 857 897 TRACE_IRQS_OFF 858 898 GET_THREAD_INFO(%rcx) ··· 979 1019 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 980 1020 .endif 981 1021 982 - subq $ORIG_RAX-R15, %rsp 983 - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1022 + ALLOC_PT_GPREGS_ON_STACK 984 1023 985 1024 .if \paranoid 986 1025 .if \paranoid == 1 ··· 1228 1269 addq $0x30,%rsp 1229 1270 CFI_ADJUST_CFA_OFFSET -0x30 1230 1271 pushq_cfi $-1 /* orig_ax = -1 => not a system call */ 1231 - SAVE_ALL 1272 + ALLOC_PT_GPREGS_ON_STACK 1273 + SAVE_C_REGS 1274 + SAVE_EXTRA_REGS 1232 1275 jmp error_exit 1233 1276 CFI_ENDPROC 1234 1277 END(xen_failsafe_callback) ··· 1282 1321 jnz paranoid_restore 1283 1322 TRACE_IRQS_IRETQ 0 1284 1323 SWAPGS_UNSAFE_STACK 1285 - RESTORE_ALL 8 1324 + RESTORE_EXTRA_REGS 1325 + RESTORE_C_REGS 1326 + REMOVE_PT_GPREGS_FROM_STACK 8 1286 1327 INTERRUPT_RETURN 1287 1328 paranoid_restore: 1288 1329 TRACE_IRQS_IRETQ_DEBUG 0 1289 - RESTORE_ALL 8 1330 + RESTORE_EXTRA_REGS 1331 + RESTORE_C_REGS 1332 + REMOVE_PT_GPREGS_FROM_STACK 8 1290 1333 INTERRUPT_RETURN 1291 1334 CFI_ENDPROC 1292 1335 END(paranoid_exit) ··· 1304 1339 CFI_ADJUST_CFA_OFFSET 15*8 1305 1340 /* oldrax contains error code */ 1306 1341 cld 1307 - movq %rdi, RDI+8(%rsp) 1308 - movq %rsi, RSI+8(%rsp) 1309 - movq %rdx, RDX+8(%rsp) 1310 - movq %rcx, RCX+8(%rsp) 1311 - movq %rax, RAX+8(%rsp) 1312 - movq %r8, R8+8(%rsp) 1313 - movq %r9, R9+8(%rsp) 1314 - movq %r10, R10+8(%rsp) 1315 - movq %r11, R11+8(%rsp) 1316 - movq_cfi rbx, RBX+8 1317 - movq %rbp, RBP+8(%rsp) 1318 - movq %r12, R12+8(%rsp) 1319 - movq %r13, R13+8(%rsp) 1320 - movq %r14, R14+8(%rsp) 1321 - movq %r15, R15+8(%rsp) 1342 + SAVE_C_REGS 8 1343 + SAVE_EXTRA_REGS 8 1322 1344 xorl %ebx,%ebx 1323 1345 testl $3,CS+8(%rsp) 1324 1346 je error_kernelspace ··· 1354 1402 ENTRY(error_exit) 1355 1403 DEFAULT_FRAME 1356 1404 movl %ebx,%eax 1357 - RESTORE_REST 1405 + RESTORE_EXTRA_REGS 1358 1406 DISABLE_INTERRUPTS(CLBR_NONE) 1359 1407 TRACE_IRQS_OFF 1360 1408 GET_THREAD_INFO(%rcx) ··· 1573 1621 * so that we repeat another NMI. 1574 1622 */ 1575 1623 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1576 - subq $ORIG_RAX-R15, %rsp 1577 - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1624 + ALLOC_PT_GPREGS_ON_STACK 1625 + 1578 1626 /* 1579 1627 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit 1580 1628 * as we should not be calling schedule in NMI context. ··· 1613 1661 nmi_swapgs: 1614 1662 SWAPGS_UNSAFE_STACK 1615 1663 nmi_restore: 1664 + RESTORE_EXTRA_REGS 1665 + RESTORE_C_REGS 1616 1666 /* Pop the extra iret frame at once */ 1617 - RESTORE_ALL 6*8 1667 + REMOVE_PT_GPREGS_FROM_STACK 6*8 1618 1668 1619 1669 /* Clear the NMI executing stack variable */ 1620 1670 movq $0, 5*8(%rsp)