Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v5.8-rc2 576 lines 14 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#define _GNU_SOURCE 4 5#include <stdio.h> 6#include <sys/time.h> 7#include <time.h> 8#include <stdlib.h> 9#include <sys/syscall.h> 10#include <unistd.h> 11#include <dlfcn.h> 12#include <string.h> 13#include <inttypes.h> 14#include <signal.h> 15#include <sys/ucontext.h> 16#include <errno.h> 17#include <err.h> 18#include <sched.h> 19#include <stdbool.h> 20#include <setjmp.h> 21#include <sys/uio.h> 22 23#ifdef __x86_64__ 24# define VSYS(x) (x) 25#else 26# define VSYS(x) 0 27#endif 28 29#ifndef SYS_getcpu 30# ifdef __x86_64__ 31# define SYS_getcpu 309 32# else 33# define SYS_getcpu 318 34# endif 35#endif 36 37/* max length of lines in /proc/self/maps - anything longer is skipped here */ 38#define MAPS_LINE_LEN 128 39 40static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 41 int flags) 42{ 43 struct sigaction sa; 44 memset(&sa, 0, sizeof(sa)); 45 sa.sa_sigaction = handler; 46 sa.sa_flags = SA_SIGINFO | flags; 47 sigemptyset(&sa.sa_mask); 48 if (sigaction(sig, &sa, 0)) 49 err(1, "sigaction"); 50} 51 52/* vsyscalls and vDSO */ 53bool vsyscall_map_r = false, vsyscall_map_x = false; 54 55typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); 56const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); 57gtod_t vdso_gtod; 58 59typedef int (*vgettime_t)(clockid_t, struct timespec *); 60vgettime_t vdso_gettime; 61 62typedef long (*time_func_t)(time_t *t); 63const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); 64time_func_t vdso_time; 65 66typedef long (*getcpu_t)(unsigned *, unsigned *, void *); 67const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); 68getcpu_t vdso_getcpu; 69 70static void init_vdso(void) 71{ 72 void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 73 if (!vdso) 74 vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); 75 if (!vdso) { 76 printf("[WARN]\tfailed to find vDSO\n"); 77 return; 78 } 79 80 vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); 81 if (!vdso_gtod) 82 printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); 83 84 vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); 85 if (!vdso_gettime) 86 printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); 87 88 vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); 89 if (!vdso_time) 90 printf("[WARN]\tfailed to find time in vDSO\n"); 91 92 vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); 93 if (!vdso_getcpu) { 94 /* getcpu() was never wired up in the 32-bit vDSO. */ 95 printf("[%s]\tfailed to find getcpu in vDSO\n", 96 sizeof(long) == 8 ? "WARN" : "NOTE"); 97 } 98} 99 100static int init_vsys(void) 101{ 102#ifdef __x86_64__ 103 int nerrs = 0; 104 FILE *maps; 105 char line[MAPS_LINE_LEN]; 106 bool found = false; 107 108 maps = fopen("/proc/self/maps", "r"); 109 if (!maps) { 110 printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); 111 vsyscall_map_r = true; 112 return 0; 113 } 114 115 while (fgets(line, MAPS_LINE_LEN, maps)) { 116 char r, x; 117 void *start, *end; 118 char name[MAPS_LINE_LEN]; 119 120 /* sscanf() is safe here as strlen(name) >= strlen(line) */ 121 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", 122 &start, &end, &r, &x, name) != 5) 123 continue; 124 125 if (strcmp(name, "[vsyscall]")) 126 continue; 127 128 printf("\tvsyscall map: %s", line); 129 130 if (start != (void *)0xffffffffff600000 || 131 end != (void *)0xffffffffff601000) { 132 printf("[FAIL]\taddress range is nonsense\n"); 133 nerrs++; 134 } 135 136 printf("\tvsyscall permissions are %c-%c\n", r, x); 137 vsyscall_map_r = (r == 'r'); 138 vsyscall_map_x = (x == 'x'); 139 140 found = true; 141 break; 142 } 143 144 fclose(maps); 145 146 if (!found) { 147 printf("\tno vsyscall map in /proc/self/maps\n"); 148 vsyscall_map_r = false; 149 vsyscall_map_x = false; 150 } 151 152 return nerrs; 153#else 154 return 0; 155#endif 156} 157 158/* syscalls */ 159static inline long sys_gtod(struct timeval *tv, struct timezone *tz) 160{ 161 return syscall(SYS_gettimeofday, tv, tz); 162} 163 164static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) 165{ 166 return syscall(SYS_clock_gettime, id, ts); 167} 168 169static inline long sys_time(time_t *t) 170{ 171 return syscall(SYS_time, t); 172} 173 174static inline long sys_getcpu(unsigned * cpu, unsigned * node, 175 void* cache) 176{ 177 return syscall(SYS_getcpu, cpu, node, cache); 178} 179 180static jmp_buf jmpbuf; 181static volatile unsigned long segv_err; 182 183static void sigsegv(int sig, siginfo_t *info, void *ctx_void) 184{ 185 ucontext_t *ctx = (ucontext_t *)ctx_void; 186 187 segv_err = ctx->uc_mcontext.gregs[REG_ERR]; 188 siglongjmp(jmpbuf, 1); 189} 190 191static double tv_diff(const struct timeval *a, const struct timeval *b) 192{ 193 return (double)(a->tv_sec - b->tv_sec) + 194 (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; 195} 196 197static int check_gtod(const struct timeval *tv_sys1, 198 const struct timeval *tv_sys2, 199 const struct timezone *tz_sys, 200 const char *which, 201 const struct timeval *tv_other, 202 const struct timezone *tz_other) 203{ 204 int nerrs = 0; 205 double d1, d2; 206 207 if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { 208 printf("[FAIL] %s tz mismatch\n", which); 209 nerrs++; 210 } 211 212 d1 = tv_diff(tv_other, tv_sys1); 213 d2 = tv_diff(tv_sys2, tv_other); 214 printf("\t%s time offsets: %lf %lf\n", which, d1, d2); 215 216 if (d1 < 0 || d2 < 0) { 217 printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); 218 nerrs++; 219 } else { 220 printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); 221 } 222 223 return nerrs; 224} 225 226static int test_gtod(void) 227{ 228 struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; 229 struct timezone tz_sys, tz_vdso, tz_vsys; 230 long ret_vdso = -1; 231 long ret_vsys = -1; 232 int nerrs = 0; 233 234 printf("[RUN]\ttest gettimeofday()\n"); 235 236 if (sys_gtod(&tv_sys1, &tz_sys) != 0) 237 err(1, "syscall gettimeofday"); 238 if (vdso_gtod) 239 ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); 240 if (vsyscall_map_x) 241 ret_vsys = vgtod(&tv_vsys, &tz_vsys); 242 if (sys_gtod(&tv_sys2, &tz_sys) != 0) 243 err(1, "syscall gettimeofday"); 244 245 if (vdso_gtod) { 246 if (ret_vdso == 0) { 247 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); 248 } else { 249 printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); 250 nerrs++; 251 } 252 } 253 254 if (vsyscall_map_x) { 255 if (ret_vsys == 0) { 256 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); 257 } else { 258 printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); 259 nerrs++; 260 } 261 } 262 263 return nerrs; 264} 265 266static int test_time(void) { 267 int nerrs = 0; 268 269 printf("[RUN]\ttest time()\n"); 270 long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; 271 long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; 272 t_sys1 = sys_time(&t2_sys1); 273 if (vdso_time) 274 t_vdso = vdso_time(&t2_vdso); 275 if (vsyscall_map_x) 276 t_vsys = vtime(&t2_vsys); 277 t_sys2 = sys_time(&t2_sys2); 278 if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { 279 printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); 280 nerrs++; 281 return nerrs; 282 } 283 284 if (vdso_time) { 285 if (t_vdso < 0 || t_vdso != t2_vdso) { 286 printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); 287 nerrs++; 288 } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { 289 printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); 290 nerrs++; 291 } else { 292 printf("[OK]\tvDSO time() is okay\n"); 293 } 294 } 295 296 if (vsyscall_map_x) { 297 if (t_vsys < 0 || t_vsys != t2_vsys) { 298 printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); 299 nerrs++; 300 } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { 301 printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); 302 nerrs++; 303 } else { 304 printf("[OK]\tvsyscall time() is okay\n"); 305 } 306 } 307 308 return nerrs; 309} 310 311static int test_getcpu(int cpu) 312{ 313 int nerrs = 0; 314 long ret_sys, ret_vdso = -1, ret_vsys = -1; 315 316 printf("[RUN]\tgetcpu() on CPU %d\n", cpu); 317 318 cpu_set_t cpuset; 319 CPU_ZERO(&cpuset); 320 CPU_SET(cpu, &cpuset); 321 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { 322 printf("[SKIP]\tfailed to force CPU %d\n", cpu); 323 return nerrs; 324 } 325 326 unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; 327 unsigned node = 0; 328 bool have_node = false; 329 ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); 330 if (vdso_getcpu) 331 ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); 332 if (vsyscall_map_x) 333 ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); 334 335 if (ret_sys == 0) { 336 if (cpu_sys != cpu) { 337 printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); 338 nerrs++; 339 } 340 341 have_node = true; 342 node = node_sys; 343 } 344 345 if (vdso_getcpu) { 346 if (ret_vdso) { 347 printf("[FAIL]\tvDSO getcpu() failed\n"); 348 nerrs++; 349 } else { 350 if (!have_node) { 351 have_node = true; 352 node = node_vdso; 353 } 354 355 if (cpu_vdso != cpu) { 356 printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); 357 nerrs++; 358 } else { 359 printf("[OK]\tvDSO reported correct CPU\n"); 360 } 361 362 if (node_vdso != node) { 363 printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); 364 nerrs++; 365 } else { 366 printf("[OK]\tvDSO reported correct node\n"); 367 } 368 } 369 } 370 371 if (vsyscall_map_x) { 372 if (ret_vsys) { 373 printf("[FAIL]\tvsyscall getcpu() failed\n"); 374 nerrs++; 375 } else { 376 if (!have_node) { 377 have_node = true; 378 node = node_vsys; 379 } 380 381 if (cpu_vsys != cpu) { 382 printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); 383 nerrs++; 384 } else { 385 printf("[OK]\tvsyscall reported correct CPU\n"); 386 } 387 388 if (node_vsys != node) { 389 printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); 390 nerrs++; 391 } else { 392 printf("[OK]\tvsyscall reported correct node\n"); 393 } 394 } 395 } 396 397 return nerrs; 398} 399 400static int test_vsys_r(void) 401{ 402#ifdef __x86_64__ 403 printf("[RUN]\tChecking read access to the vsyscall page\n"); 404 bool can_read; 405 if (sigsetjmp(jmpbuf, 1) == 0) { 406 *(volatile int *)0xffffffffff600000; 407 can_read = true; 408 } else { 409 can_read = false; 410 } 411 412 if (can_read && !vsyscall_map_r) { 413 printf("[FAIL]\tWe have read access, but we shouldn't\n"); 414 return 1; 415 } else if (!can_read && vsyscall_map_r) { 416 printf("[FAIL]\tWe don't have read access, but we should\n"); 417 return 1; 418 } else if (can_read) { 419 printf("[OK]\tWe have read access\n"); 420 } else { 421 printf("[OK]\tWe do not have read access: #PF(0x%lx)\n", 422 segv_err); 423 } 424#endif 425 426 return 0; 427} 428 429static int test_vsys_x(void) 430{ 431#ifdef __x86_64__ 432 if (vsyscall_map_x) { 433 /* We already tested this adequately. */ 434 return 0; 435 } 436 437 printf("[RUN]\tMake sure that vsyscalls really page fault\n"); 438 439 bool can_exec; 440 if (sigsetjmp(jmpbuf, 1) == 0) { 441 vgtod(NULL, NULL); 442 can_exec = true; 443 } else { 444 can_exec = false; 445 } 446 447 if (can_exec) { 448 printf("[FAIL]\tExecuting the vsyscall did not page fault\n"); 449 return 1; 450 } else if (segv_err & (1 << 4)) { /* INSTR */ 451 printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n", 452 segv_err); 453 } else { 454 printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n", 455 segv_err); 456 return 1; 457 } 458#endif 459 460 return 0; 461} 462 463static int test_process_vm_readv(void) 464{ 465#ifdef __x86_64__ 466 char buf[4096]; 467 struct iovec local, remote; 468 int ret; 469 470 printf("[RUN]\tprocess_vm_readv() from vsyscall page\n"); 471 472 local.iov_base = buf; 473 local.iov_len = 4096; 474 remote.iov_base = (void *)0xffffffffff600000; 475 remote.iov_len = 4096; 476 ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); 477 if (ret != 4096) { 478 printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); 479 return 0; 480 } 481 482 if (vsyscall_map_r) { 483 if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) { 484 printf("[OK]\tIt worked and read correct data\n"); 485 } else { 486 printf("[FAIL]\tIt worked but returned incorrect data\n"); 487 return 1; 488 } 489 } 490#endif 491 492 return 0; 493} 494 495#ifdef __x86_64__ 496#define X86_EFLAGS_TF (1UL << 8) 497static volatile sig_atomic_t num_vsyscall_traps; 498 499static unsigned long get_eflags(void) 500{ 501 unsigned long eflags; 502 asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); 503 return eflags; 504} 505 506static void set_eflags(unsigned long eflags) 507{ 508 asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); 509} 510 511static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 512{ 513 ucontext_t *ctx = (ucontext_t *)ctx_void; 514 unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; 515 516 if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) 517 num_vsyscall_traps++; 518} 519 520static int test_emulation(void) 521{ 522 time_t tmp; 523 bool is_native; 524 525 if (!vsyscall_map_x) 526 return 0; 527 528 printf("[RUN]\tchecking that vsyscalls are emulated\n"); 529 sethandler(SIGTRAP, sigtrap, 0); 530 set_eflags(get_eflags() | X86_EFLAGS_TF); 531 vtime(&tmp); 532 set_eflags(get_eflags() & ~X86_EFLAGS_TF); 533 534 /* 535 * If vsyscalls are emulated, we expect a single trap in the 536 * vsyscall page -- the call instruction will trap with RIP 537 * pointing to the entry point before emulation takes over. 538 * In native mode, we expect two traps, since whatever code 539 * the vsyscall page contains will be more than just a ret 540 * instruction. 541 */ 542 is_native = (num_vsyscall_traps > 1); 543 544 printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", 545 (is_native ? "FAIL" : "OK"), 546 (is_native ? "native" : "emulated"), 547 (int)num_vsyscall_traps); 548 549 return is_native; 550} 551#endif 552 553int main(int argc, char **argv) 554{ 555 int nerrs = 0; 556 557 init_vdso(); 558 nerrs += init_vsys(); 559 560 nerrs += test_gtod(); 561 nerrs += test_time(); 562 nerrs += test_getcpu(0); 563 nerrs += test_getcpu(1); 564 565 sethandler(SIGSEGV, sigsegv, 0); 566 nerrs += test_vsys_r(); 567 nerrs += test_vsys_x(); 568 569 nerrs += test_process_vm_readv(); 570 571#ifdef __x86_64__ 572 nerrs += test_emulation(); 573#endif 574 575 return nerrs ? 1 : 0; 576}