Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * single_step_syscall.c - single-steps various x86 syscalls
4 * Copyright (c) 2014-2015 Andrew Lutomirski
5 *
6 * This is a very simple series of tests that makes system calls with
7 * the TF flag set. This exercises some nasty kernel code in the
8 * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
9 * immediately issues #DB from CPL 0. This requires special handling in
10 * the kernel.
11 */
12
13#define _GNU_SOURCE
14
15#include <sys/time.h>
16#include <time.h>
17#include <stdlib.h>
18#include <sys/syscall.h>
19#include <unistd.h>
20#include <stdio.h>
21#include <string.h>
22#include <inttypes.h>
23#include <sys/mman.h>
24#include <sys/signal.h>
25#include <sys/ucontext.h>
26#include <asm/ldt.h>
27#include <err.h>
28#include <setjmp.h>
29#include <stddef.h>
30#include <stdbool.h>
31#include <sys/ptrace.h>
32#include <sys/user.h>
33
34static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
35 int flags)
36{
37 struct sigaction sa;
38 memset(&sa, 0, sizeof(sa));
39 sa.sa_sigaction = handler;
40 sa.sa_flags = SA_SIGINFO | flags;
41 sigemptyset(&sa.sa_mask);
42 if (sigaction(sig, &sa, 0))
43 err(1, "sigaction");
44}
45
46static volatile sig_atomic_t sig_traps;
47
48#ifdef __x86_64__
49# define REG_IP REG_RIP
50# define WIDTH "q"
51# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
52#else
53# define REG_IP REG_EIP
54# define WIDTH "l"
55# define INT80_CLOBBERS
56#endif
57
58static unsigned long get_eflags(void)
59{
60 unsigned long eflags;
61 asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
62 return eflags;
63}
64
65static void set_eflags(unsigned long eflags)
66{
67 asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
68 : : "rm" (eflags) : "flags");
69}
70
71#define X86_EFLAGS_TF (1UL << 8)
72
73static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
74{
75 ucontext_t *ctx = (ucontext_t*)ctx_void;
76
77 if (get_eflags() & X86_EFLAGS_TF) {
78 set_eflags(get_eflags() & ~X86_EFLAGS_TF);
79 printf("[WARN]\tSIGTRAP handler had TF set\n");
80 _exit(1);
81 }
82
83 sig_traps++;
84
85 if (sig_traps == 10000 || sig_traps == 10001) {
86 printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
87 (int)sig_traps,
88 (unsigned long)info->si_addr,
89 (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
90 }
91}
92
93static void check_result(void)
94{
95 unsigned long new_eflags = get_eflags();
96 set_eflags(new_eflags & ~X86_EFLAGS_TF);
97
98 if (!sig_traps) {
99 printf("[FAIL]\tNo SIGTRAP\n");
100 exit(1);
101 }
102
103 if (!(new_eflags & X86_EFLAGS_TF)) {
104 printf("[FAIL]\tTF was cleared\n");
105 exit(1);
106 }
107
108 printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
109 sig_traps = 0;
110}
111
112int main()
113{
114#ifdef CAN_BUILD_32
115 int tmp;
116#endif
117
118 sethandler(SIGTRAP, sigtrap, 0);
119
120 printf("[RUN]\tSet TF and check nop\n");
121 set_eflags(get_eflags() | X86_EFLAGS_TF);
122 asm volatile ("nop");
123 check_result();
124
125#ifdef __x86_64__
126 printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
127 set_eflags(get_eflags() | X86_EFLAGS_TF);
128 extern unsigned char post_nop[];
129 asm volatile ("pushf" WIDTH "\n\t"
130 "pop" WIDTH " %%r11\n\t"
131 "nop\n\t"
132 "post_nop:"
133 : : "c" (post_nop) : "r11");
134 check_result();
135#endif
136#ifdef CAN_BUILD_32
137 printf("[RUN]\tSet TF and check int80\n");
138 set_eflags(get_eflags() | X86_EFLAGS_TF);
139 asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
140 : INT80_CLOBBERS);
141 check_result();
142#endif
143
144 /*
145 * This test is particularly interesting if fast syscalls use
146 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
147 * Specifically, SYSENTER does not clear TF, so either SYSENTER
148 * or the next instruction traps at CPL0. (Of course, Intel
149 * mostly forgot to document exactly what happens here.) So we
150 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
151 * no stack. The only sane way the kernel can possibly handle
152 * it is to clear TF on return from the #DB handler, but this
153 * happens way too early to set TF in the saved pt_regs, so the
154 * kernel has to do something clever to avoid losing track of
155 * the TF bit.
156 *
157 * Needless to say, we've had bugs in this area.
158 */
159 syscall(SYS_getpid); /* Force symbol binding without TF set. */
160 printf("[RUN]\tSet TF and check a fast syscall\n");
161 set_eflags(get_eflags() | X86_EFLAGS_TF);
162 syscall(SYS_getpid);
163 check_result();
164
165 /* Now make sure that another fast syscall doesn't set TF again. */
166 printf("[RUN]\tFast syscall with TF cleared\n");
167 fflush(stdout); /* Force a syscall */
168 if (get_eflags() & X86_EFLAGS_TF) {
169 printf("[FAIL]\tTF is now set\n");
170 exit(1);
171 }
172 if (sig_traps) {
173 printf("[FAIL]\tGot SIGTRAP\n");
174 exit(1);
175 }
176 printf("[OK]\tNothing unexpected happened\n");
177
178 return 0;
179}