Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests: Add benchmark for syscall user dispatch

This is the patch I'm using to evaluate the impact syscall user dispatch
has on native syscall (syscalls not redirected to userspace) when
enabled for the process and submiting syscalls though the unblocked
dispatch selector. It works by running a step to define a baseline of
the cost of executing sysinfo, then enabling SUD, and rerunning that
step.

On my test machine, an AMD Ryzen 5 1500X, I have the following results
with the latest version of syscall user dispatch patches.

root@olga:~# syscall_user_dispatch/sud_benchmark
Calibrating test set to last ~5 seconds...
test iterations = 37500000
Avg syscall time 134ns.
Caught sys_ff00
trapped_call_count 1, native_call_count 0.
Avg syscall time 147ns.
Interception overhead: 9.7% (+13ns).

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201127193238.821364-7-krisman@collabora.com

authored by

Gabriel Krisman Bertazi and committed by
Thomas Gleixner
d87ae0fa 179ef035

+201 -1
+1 -1
tools/testing/selftests/syscall_user_dispatch/Makefile
··· 5 5 6 6 CFLAGS += -Wall -I$(LINUX_HDR_PATH) 7 7 8 - TEST_GEN_PROGS := sud_test 8 + TEST_GEN_PROGS := sud_test sud_benchmark 9 9 include ../lib.mk
+200
tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2020 Collabora Ltd. 4 + * 5 + * Benchmark and test syscall user dispatch 6 + */ 7 + 8 + #define _GNU_SOURCE 9 + #include <stdio.h> 10 + #include <string.h> 11 + #include <stdlib.h> 12 + #include <signal.h> 13 + #include <errno.h> 14 + #include <time.h> 15 + #include <sys/time.h> 16 + #include <unistd.h> 17 + #include <sys/sysinfo.h> 18 + #include <sys/prctl.h> 19 + #include <sys/syscall.h> 20 + 21 + #ifndef PR_SET_SYSCALL_USER_DISPATCH 22 + # define PR_SET_SYSCALL_USER_DISPATCH 59 23 + # define PR_SYS_DISPATCH_OFF 0 24 + # define PR_SYS_DISPATCH_ON 1 25 + #endif 26 + 27 + #ifdef __NR_syscalls 28 + # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ 29 + #else 30 + # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ 31 + #endif 32 + 33 + /* 34 + * To test returning from a sigsys with selector blocked, the test 35 + * requires some per-architecture support (i.e. knowledge about the 36 + * signal trampoline address). On i386, we know it is on the vdso, and 37 + * a small trampoline is open-coded for x86_64. Other architectures 38 + * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN 39 + * out of the box, but don't enable them until they support syscall user 40 + * dispatch. 41 + */ 42 + #if defined(__x86_64__) || defined(__i386__) 43 + #define TEST_BLOCKED_RETURN 44 + #endif 45 + 46 + #ifdef __x86_64__ 47 + void* (syscall_dispatcher_start)(void); 48 + void* (syscall_dispatcher_end)(void); 49 + #else 50 + unsigned long syscall_dispatcher_start = 0; 51 + unsigned long syscall_dispatcher_end = 0; 52 + #endif 53 + 54 + unsigned long trapped_call_count = 0; 55 + unsigned long native_call_count = 0; 56 + 57 + char selector; 58 + #define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) 59 + #define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) 60 + 61 + #define CALIBRATION_STEP 100000 62 + #define CALIBRATE_TO_SECS 5 63 + int factor; 64 + 65 + static double one_sysinfo_step(void) 66 + { 67 + struct timespec t1, t2; 68 + int i; 69 + struct sysinfo info; 70 + 71 + clock_gettime(CLOCK_MONOTONIC, &t1); 72 + for (i = 0; i < CALIBRATION_STEP; i++) 73 + sysinfo(&info); 74 + clock_gettime(CLOCK_MONOTONIC, &t2); 75 + return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); 76 + } 77 + 78 + static void calibrate_set(void) 79 + { 80 + double elapsed = 0; 81 + 82 + printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS); 83 + 84 + while (elapsed < 1) { 85 + elapsed += one_sysinfo_step(); 86 + factor += CALIBRATE_TO_SECS; 87 + } 88 + 89 + printf("test iterations = %d\n", CALIBRATION_STEP * factor); 90 + } 91 + 92 + static double perf_syscall(void) 93 + { 94 + unsigned int i; 95 + double partial = 0; 96 + 97 + for (i = 0; i < factor; ++i) 98 + partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); 99 + return partial; 100 + } 101 + 102 + static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) 103 + { 104 + char buf[1024]; 105 + int len; 106 + 107 + SYSCALL_UNBLOCK; 108 + 109 + /* printf and friends are not signal-safe. */ 110 + len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall); 111 + write(1, buf, len); 112 + 113 + if (info->si_syscall == MAGIC_SYSCALL_1) 114 + trapped_call_count++; 115 + else 116 + native_call_count++; 117 + 118 + #ifdef TEST_BLOCKED_RETURN 119 + SYSCALL_BLOCK; 120 + #endif 121 + 122 + #ifdef __x86_64__ 123 + __asm__ volatile("movq $0xf, %rax"); 124 + __asm__ volatile("leaveq"); 125 + __asm__ volatile("add $0x8, %rsp"); 126 + __asm__ volatile("syscall_dispatcher_start:"); 127 + __asm__ volatile("syscall"); 128 + __asm__ volatile("nop"); /* Landing pad within dispatcher area */ 129 + __asm__ volatile("syscall_dispatcher_end:"); 130 + #endif 131 + 132 + } 133 + 134 + int main(void) 135 + { 136 + struct sigaction act; 137 + double time1, time2; 138 + int ret; 139 + sigset_t mask; 140 + 141 + memset(&act, 0, sizeof(act)); 142 + sigemptyset(&mask); 143 + 144 + act.sa_sigaction = handle_sigsys; 145 + act.sa_flags = SA_SIGINFO; 146 + act.sa_mask = mask; 147 + 148 + calibrate_set(); 149 + 150 + time1 = perf_syscall(); 151 + printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9); 152 + 153 + ret = sigaction(SIGSYS, &act, NULL); 154 + if (ret) { 155 + perror("Error sigaction:"); 156 + exit(-1); 157 + } 158 + 159 + fprintf(stderr, "Enabling syscall trapping.\n"); 160 + 161 + if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 162 + syscall_dispatcher_start, 163 + (syscall_dispatcher_end - syscall_dispatcher_start + 1), 164 + &selector)) { 165 + perror("prctl failed\n"); 166 + exit(-1); 167 + } 168 + 169 + SYSCALL_BLOCK; 170 + syscall(MAGIC_SYSCALL_1); 171 + 172 + #ifdef TEST_BLOCKED_RETURN 173 + if (selector == PR_SYS_DISPATCH_OFF) { 174 + fprintf(stderr, "Failed to return with selector blocked.\n"); 175 + exit(-1); 176 + } 177 + #endif 178 + 179 + SYSCALL_UNBLOCK; 180 + 181 + if (!trapped_call_count) { 182 + fprintf(stderr, "syscall trapping does not work.\n"); 183 + exit(-1); 184 + } 185 + 186 + time2 = perf_syscall(); 187 + 188 + if (native_call_count) { 189 + perror("syscall trapping intercepted more syscalls than expected\n"); 190 + exit(-1); 191 + } 192 + 193 + printf("trapped_call_count %lu, native_call_count %lu.\n", 194 + trapped_call_count, native_call_count); 195 + printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9); 196 + printf("Interception overhead: %.1lf%% (+%.0lfns).\n", 197 + 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); 198 + return 0; 199 + 200 + }