Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/entry: Move C entry and exit code to arch/x86/entry/common.c

The entry and exit C helpers were confusingly scattered between
ptrace.c and signal.c, even though they aren't specific to
ptrace or signal handling. Move them together in a new file.

This change just moves code around. It doesn't change anything.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/324d686821266544d8572423cc281f961da445f4.1435952415.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andy Lutomirski and committed by
Ingo Molnar
1f484aa6 e727c7d7

+257 -228
+1
arch/x86/entry/Makefile
··· 2 2 # Makefile for the x86 low level entry code 3 3 # 4 4 obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o 5 + obj-y += common.o 5 6 6 7 obj-y += vdso/ 7 8 obj-y += vsyscall/
+253
arch/x86/entry/common.c
··· 1 + /* 2 + * common.c - C code for kernel entry and exit 3 + * Copyright (c) 2015 Andrew Lutomirski 4 + * GPL v2 5 + * 6 + * Based on asm and ptrace code by many authors. The code here originated 7 + * in ptrace.c and signal.c. 8 + */ 9 + 10 + #include <linux/kernel.h> 11 + #include <linux/sched.h> 12 + #include <linux/mm.h> 13 + #include <linux/smp.h> 14 + #include <linux/errno.h> 15 + #include <linux/ptrace.h> 16 + #include <linux/tracehook.h> 17 + #include <linux/audit.h> 18 + #include <linux/seccomp.h> 19 + #include <linux/signal.h> 20 + #include <linux/export.h> 21 + #include <linux/context_tracking.h> 22 + #include <linux/user-return-notifier.h> 23 + #include <linux/uprobes.h> 24 + 25 + #include <asm/desc.h> 26 + #include <asm/traps.h> 27 + 28 + #define CREATE_TRACE_POINTS 29 + #include <trace/events/syscalls.h> 30 + 31 + static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) 32 + { 33 + #ifdef CONFIG_X86_64 34 + if (arch == AUDIT_ARCH_X86_64) { 35 + audit_syscall_entry(regs->orig_ax, regs->di, 36 + regs->si, regs->dx, regs->r10); 37 + } else 38 + #endif 39 + { 40 + audit_syscall_entry(regs->orig_ax, regs->bx, 41 + regs->cx, regs->dx, regs->si); 42 + } 43 + } 44 + 45 + /* 46 + * We can return 0 to resume the syscall or anything else to go to phase 47 + * 2. If we resume the syscall, we need to put something appropriate in 48 + * regs->orig_ax. 49 + * 50 + * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax 51 + * are fully functional. 52 + * 53 + * For phase 2's benefit, our return value is: 54 + * 0: resume the syscall 55 + * 1: go to phase 2; no seccomp phase 2 needed 56 + * anything else: go to phase 2; pass return value to seccomp 57 + */ 58 + unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) 59 + { 60 + unsigned long ret = 0; 61 + u32 work; 62 + 63 + BUG_ON(regs != task_pt_regs(current)); 64 + 65 + work = ACCESS_ONCE(current_thread_info()->flags) & 66 + _TIF_WORK_SYSCALL_ENTRY; 67 + 68 + /* 69 + * If TIF_NOHZ is set, we are required to call user_exit() before 70 + * doing anything that could touch RCU. 71 + */ 72 + if (work & _TIF_NOHZ) { 73 + user_exit(); 74 + work &= ~_TIF_NOHZ; 75 + } 76 + 77 + #ifdef CONFIG_SECCOMP 78 + /* 79 + * Do seccomp first -- it should minimize exposure of other 80 + * code, and keeping seccomp fast is probably more valuable 81 + * than the rest of this. 82 + */ 83 + if (work & _TIF_SECCOMP) { 84 + struct seccomp_data sd; 85 + 86 + sd.arch = arch; 87 + sd.nr = regs->orig_ax; 88 + sd.instruction_pointer = regs->ip; 89 + #ifdef CONFIG_X86_64 90 + if (arch == AUDIT_ARCH_X86_64) { 91 + sd.args[0] = regs->di; 92 + sd.args[1] = regs->si; 93 + sd.args[2] = regs->dx; 94 + sd.args[3] = regs->r10; 95 + sd.args[4] = regs->r8; 96 + sd.args[5] = regs->r9; 97 + } else 98 + #endif 99 + { 100 + sd.args[0] = regs->bx; 101 + sd.args[1] = regs->cx; 102 + sd.args[2] = regs->dx; 103 + sd.args[3] = regs->si; 104 + sd.args[4] = regs->di; 105 + sd.args[5] = regs->bp; 106 + } 107 + 108 + BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); 109 + BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); 110 + 111 + ret = seccomp_phase1(&sd); 112 + if (ret == SECCOMP_PHASE1_SKIP) { 113 + regs->orig_ax = -1; 114 + ret = 0; 115 + } else if (ret != SECCOMP_PHASE1_OK) { 116 + return ret; /* Go directly to phase 2 */ 117 + } 118 + 119 + work &= ~_TIF_SECCOMP; 120 + } 121 + #endif 122 + 123 + /* Do our best to finish without phase 2. */ 124 + if (work == 0) 125 + return ret; /* seccomp and/or nohz only (ret == 0 here) */ 126 + 127 + #ifdef CONFIG_AUDITSYSCALL 128 + if (work == _TIF_SYSCALL_AUDIT) { 129 + /* 130 + * If there is no more work to be done except auditing, 131 + * then audit in phase 1. Phase 2 always audits, so, if 132 + * we audit here, then we can't go on to phase 2. 133 + */ 134 + do_audit_syscall_entry(regs, arch); 135 + return 0; 136 + } 137 + #endif 138 + 139 + return 1; /* Something is enabled that we can't handle in phase 1 */ 140 + } 141 + 142 + /* Returns the syscall nr to run (which should match regs->orig_ax). */ 143 + long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, 144 + unsigned long phase1_result) 145 + { 146 + long ret = 0; 147 + u32 work = ACCESS_ONCE(current_thread_info()->flags) & 148 + _TIF_WORK_SYSCALL_ENTRY; 149 + 150 + BUG_ON(regs != task_pt_regs(current)); 151 + 152 + /* 153 + * If we stepped into a sysenter/syscall insn, it trapped in 154 + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 155 + * If user-mode had set TF itself, then it's still clear from 156 + * do_debug() and we need to set it again to restore the user 157 + * state. If we entered on the slow path, TF was already set. 158 + */ 159 + if (work & _TIF_SINGLESTEP) 160 + regs->flags |= X86_EFLAGS_TF; 161 + 162 + #ifdef CONFIG_SECCOMP 163 + /* 164 + * Call seccomp_phase2 before running the other hooks so that 165 + * they can see any changes made by a seccomp tracer. 166 + */ 167 + if (phase1_result > 1 && seccomp_phase2(phase1_result)) { 168 + /* seccomp failures shouldn't expose any additional code. */ 169 + return -1; 170 + } 171 + #endif 172 + 173 + if (unlikely(work & _TIF_SYSCALL_EMU)) 174 + ret = -1L; 175 + 176 + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && 177 + tracehook_report_syscall_entry(regs)) 178 + ret = -1L; 179 + 180 + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 181 + trace_sys_enter(regs, regs->orig_ax); 182 + 183 + do_audit_syscall_entry(regs, arch); 184 + 185 + return ret ?: regs->orig_ax; 186 + } 187 + 188 + long syscall_trace_enter(struct pt_regs *regs) 189 + { 190 + u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 191 + unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); 192 + 193 + if (phase1_result == 0) 194 + return regs->orig_ax; 195 + else 196 + return syscall_trace_enter_phase2(regs, arch, phase1_result); 197 + } 198 + 199 + void syscall_trace_leave(struct pt_regs *regs) 200 + { 201 + bool step; 202 + 203 + /* 204 + * We may come here right after calling schedule_user() 205 + * or do_notify_resume(), in which case we can be in RCU 206 + * user mode. 207 + */ 208 + user_exit(); 209 + 210 + audit_syscall_exit(regs); 211 + 212 + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 213 + trace_sys_exit(regs, regs->ax); 214 + 215 + /* 216 + * If TIF_SYSCALL_EMU is set, we only get here because of 217 + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). 218 + * We already reported this syscall instruction in 219 + * syscall_trace_enter(). 220 + */ 221 + step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && 222 + !test_thread_flag(TIF_SYSCALL_EMU); 223 + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 224 + tracehook_report_syscall_exit(regs, step); 225 + 226 + user_enter(); 227 + } 228 + 229 + /* 230 + * notification of userspace execution resumption 231 + * - triggered by the TIF_WORK_MASK flags 232 + */ 233 + __visible void 234 + do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 235 + { 236 + user_exit(); 237 + 238 + if (thread_info_flags & _TIF_UPROBE) 239 + uprobe_notify_resume(regs); 240 + 241 + /* deal with pending signal delivery */ 242 + if (thread_info_flags & _TIF_SIGPENDING) 243 + do_signal(regs); 244 + 245 + if (thread_info_flags & _TIF_NOTIFY_RESUME) { 246 + clear_thread_flag(TIF_NOTIFY_RESUME); 247 + tracehook_notify_resume(regs); 248 + } 249 + if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) 250 + fire_user_return_notifiers(); 251 + 252 + user_enter(); 253 + }
+1
arch/x86/include/asm/signal.h
··· 30 30 #endif /* __ASSEMBLY__ */ 31 31 #include <uapi/asm/signal.h> 32 32 #ifndef __ASSEMBLY__ 33 + extern void do_signal(struct pt_regs *regs); 33 34 extern void do_notify_resume(struct pt_regs *, void *, __u32); 34 35 35 36 #define __ARCH_HAS_SA_RESTORER
+1 -201
arch/x86/kernel/ptrace.c
··· 37 37 #include <asm/proto.h> 38 38 #include <asm/hw_breakpoint.h> 39 39 #include <asm/traps.h> 40 + #include <asm/syscall.h> 40 41 41 42 #include "tls.h" 42 - 43 - #define CREATE_TRACE_POINTS 44 - #include <trace/events/syscalls.h> 45 43 46 44 enum x86_regset { 47 45 REGSET_GENERAL, ··· 1441 1443 fill_sigtrap_info(tsk, regs, error_code, si_code, &info); 1442 1444 /* Send us the fake SIGTRAP */ 1443 1445 force_sig_info(SIGTRAP, &info, tsk); 1444 - } 1445 - 1446 - static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) 1447 - { 1448 - #ifdef CONFIG_X86_64 1449 - if (arch == AUDIT_ARCH_X86_64) { 1450 - audit_syscall_entry(regs->orig_ax, regs->di, 1451 - regs->si, regs->dx, regs->r10); 1452 - } else 1453 - #endif 1454 - { 1455 - audit_syscall_entry(regs->orig_ax, regs->bx, 1456 - regs->cx, regs->dx, regs->si); 1457 - } 1458 - } 1459 - 1460 - /* 1461 - * We can return 0 to resume the syscall or anything else to go to phase 1462 - * 2. If we resume the syscall, we need to put something appropriate in 1463 - * regs->orig_ax. 1464 - * 1465 - * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax 1466 - * are fully functional. 1467 - * 1468 - * For phase 2's benefit, our return value is: 1469 - * 0: resume the syscall 1470 - * 1: go to phase 2; no seccomp phase 2 needed 1471 - * anything else: go to phase 2; pass return value to seccomp 1472 - */ 1473 - unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) 1474 - { 1475 - unsigned long ret = 0; 1476 - u32 work; 1477 - 1478 - BUG_ON(regs != task_pt_regs(current)); 1479 - 1480 - work = ACCESS_ONCE(current_thread_info()->flags) & 1481 - _TIF_WORK_SYSCALL_ENTRY; 1482 - 1483 - /* 1484 - * If TIF_NOHZ is set, we are required to call user_exit() before 1485 - * doing anything that could touch RCU. 1486 - */ 1487 - if (work & _TIF_NOHZ) { 1488 - user_exit(); 1489 - work &= ~_TIF_NOHZ; 1490 - } 1491 - 1492 - #ifdef CONFIG_SECCOMP 1493 - /* 1494 - * Do seccomp first -- it should minimize exposure of other 1495 - * code, and keeping seccomp fast is probably more valuable 1496 - * than the rest of this. 1497 - */ 1498 - if (work & _TIF_SECCOMP) { 1499 - struct seccomp_data sd; 1500 - 1501 - sd.arch = arch; 1502 - sd.nr = regs->orig_ax; 1503 - sd.instruction_pointer = regs->ip; 1504 - #ifdef CONFIG_X86_64 1505 - if (arch == AUDIT_ARCH_X86_64) { 1506 - sd.args[0] = regs->di; 1507 - sd.args[1] = regs->si; 1508 - sd.args[2] = regs->dx; 1509 - sd.args[3] = regs->r10; 1510 - sd.args[4] = regs->r8; 1511 - sd.args[5] = regs->r9; 1512 - } else 1513 - #endif 1514 - { 1515 - sd.args[0] = regs->bx; 1516 - sd.args[1] = regs->cx; 1517 - sd.args[2] = regs->dx; 1518 - sd.args[3] = regs->si; 1519 - sd.args[4] = regs->di; 1520 - sd.args[5] = regs->bp; 1521 - } 1522 - 1523 - BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); 1524 - BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); 1525 - 1526 - ret = seccomp_phase1(&sd); 1527 - if (ret == SECCOMP_PHASE1_SKIP) { 1528 - regs->orig_ax = -1; 1529 - ret = 0; 1530 - } else if (ret != SECCOMP_PHASE1_OK) { 1531 - return ret; /* Go directly to phase 2 */ 1532 - } 1533 - 1534 - work &= ~_TIF_SECCOMP; 1535 - } 1536 - #endif 1537 - 1538 - /* Do our best to finish without phase 2. */ 1539 - if (work == 0) 1540 - return ret; /* seccomp and/or nohz only (ret == 0 here) */ 1541 - 1542 - #ifdef CONFIG_AUDITSYSCALL 1543 - if (work == _TIF_SYSCALL_AUDIT) { 1544 - /* 1545 - * If there is no more work to be done except auditing, 1546 - * then audit in phase 1. Phase 2 always audits, so, if 1547 - * we audit here, then we can't go on to phase 2. 1548 - */ 1549 - do_audit_syscall_entry(regs, arch); 1550 - return 0; 1551 - } 1552 - #endif 1553 - 1554 - return 1; /* Something is enabled that we can't handle in phase 1 */ 1555 - } 1556 - 1557 - /* Returns the syscall nr to run (which should match regs->orig_ax). */ 1558 - long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, 1559 - unsigned long phase1_result) 1560 - { 1561 - long ret = 0; 1562 - u32 work = ACCESS_ONCE(current_thread_info()->flags) & 1563 - _TIF_WORK_SYSCALL_ENTRY; 1564 - 1565 - BUG_ON(regs != task_pt_regs(current)); 1566 - 1567 - /* 1568 - * If we stepped into a sysenter/syscall insn, it trapped in 1569 - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 1570 - * If user-mode had set TF itself, then it's still clear from 1571 - * do_debug() and we need to set it again to restore the user 1572 - * state. If we entered on the slow path, TF was already set. 1573 - */ 1574 - if (work & _TIF_SINGLESTEP) 1575 - regs->flags |= X86_EFLAGS_TF; 1576 - 1577 - #ifdef CONFIG_SECCOMP 1578 - /* 1579 - * Call seccomp_phase2 before running the other hooks so that 1580 - * they can see any changes made by a seccomp tracer. 1581 - */ 1582 - if (phase1_result > 1 && seccomp_phase2(phase1_result)) { 1583 - /* seccomp failures shouldn't expose any additional code. */ 1584 - return -1; 1585 - } 1586 - #endif 1587 - 1588 - if (unlikely(work & _TIF_SYSCALL_EMU)) 1589 - ret = -1L; 1590 - 1591 - if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && 1592 - tracehook_report_syscall_entry(regs)) 1593 - ret = -1L; 1594 - 1595 - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1596 - trace_sys_enter(regs, regs->orig_ax); 1597 - 1598 - do_audit_syscall_entry(regs, arch); 1599 - 1600 - return ret ?: regs->orig_ax; 1601 - } 1602 - 1603 - long syscall_trace_enter(struct pt_regs *regs) 1604 - { 1605 - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 1606 - unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); 1607 - 1608 - if (phase1_result == 0) 1609 - return regs->orig_ax; 1610 - else 1611 - return syscall_trace_enter_phase2(regs, arch, phase1_result); 1612 - } 1613 - 1614 - void syscall_trace_leave(struct pt_regs *regs) 1615 - { 1616 - bool step; 1617 - 1618 - /* 1619 - * We may come here right after calling schedule_user() 1620 - * or do_notify_resume(), in which case we can be in RCU 1621 - * user mode. 1622 - */ 1623 - user_exit(); 1624 - 1625 - audit_syscall_exit(regs); 1626 - 1627 - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1628 - trace_sys_exit(regs, regs->ax); 1629 - 1630 - /* 1631 - * If TIF_SYSCALL_EMU is set, we only get here because of 1632 - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). 1633 - * We already reported this syscall instruction in 1634 - * syscall_trace_enter(). 1635 - */ 1636 - step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && 1637 - !test_thread_flag(TIF_SYSCALL_EMU); 1638 - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1639 - tracehook_report_syscall_exit(regs, step); 1640 - 1641 - user_enter(); 1642 1446 }
+1 -27
arch/x86/kernel/signal.c
··· 700 700 * want to handle. Thus you cannot kill init even with a SIGKILL even by 701 701 * mistake. 702 702 */ 703 - static void do_signal(struct pt_regs *regs) 703 + void do_signal(struct pt_regs *regs) 704 704 { 705 705 struct ksignal ksig; 706 706 ··· 733 733 * back. 734 734 */ 735 735 restore_saved_sigmask(); 736 - } 737 - 738 - /* 739 - * notification of userspace execution resumption 740 - * - triggered by the TIF_WORK_MASK flags 741 - */ 742 - __visible void 743 - do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 744 - { 745 - user_exit(); 746 - 747 - if (thread_info_flags & _TIF_UPROBE) 748 - uprobe_notify_resume(regs); 749 - 750 - /* deal with pending signal delivery */ 751 - if (thread_info_flags & _TIF_SIGPENDING) 752 - do_signal(regs); 753 - 754 - if (thread_info_flags & _TIF_NOTIFY_RESUME) { 755 - clear_thread_flag(TIF_NOTIFY_RESUME); 756 - tracehook_notify_resume(regs); 757 - } 758 - if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) 759 - fire_user_return_notifiers(); 760 - 761 - user_enter(); 762 736 } 763 737 764 738 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)