at v6.5-rc7 388 lines 10 kB view raw
1/* 2 * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16/* 17 * Create a process without mappings by unmapping everything at once and 18 * holding it with ptrace(2). See what happens to 19 * 20 * /proc/${pid}/maps 21 * /proc/${pid}/numa_maps 22 * /proc/${pid}/smaps 23 * /proc/${pid}/smaps_rollup 24 */ 25#undef NDEBUG 26#include <assert.h> 27#include <errno.h> 28#include <stdint.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <fcntl.h> 33#include <sys/mman.h> 34#include <sys/ptrace.h> 35#include <sys/resource.h> 36#include <sys/types.h> 37#include <sys/wait.h> 38#include <unistd.h> 39 40/* 41 * 0: vsyscall VMA doesn't exist vsyscall=none 42 * 1: vsyscall VMA is --xp vsyscall=xonly 43 * 2: vsyscall VMA is r-xp vsyscall=emulate 44 */ 45static volatile int g_vsyscall; 46static const char *g_proc_pid_maps_vsyscall; 47static const char *g_proc_pid_smaps_vsyscall; 48 49static const char proc_pid_maps_vsyscall_0[] = ""; 50static const char proc_pid_maps_vsyscall_1[] = 51"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; 52static const char proc_pid_maps_vsyscall_2[] = 53"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 54 55static const char proc_pid_smaps_vsyscall_0[] = ""; 56 57static const char proc_pid_smaps_vsyscall_1[] = 58"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" 59"Size: 4 kB\n" 60"KernelPageSize: 4 kB\n" 61"MMUPageSize: 4 kB\n" 62"Rss: 0 kB\n" 63"Pss: 0 kB\n" 64"Pss_Dirty: 0 kB\n" 65"Shared_Clean: 0 kB\n" 66"Shared_Dirty: 0 kB\n" 67"Private_Clean: 0 kB\n" 68"Private_Dirty: 0 kB\n" 69"Referenced: 0 kB\n" 70"Anonymous: 0 kB\n" 71"LazyFree: 0 kB\n" 72"AnonHugePages: 0 kB\n" 73"ShmemPmdMapped: 0 kB\n" 74"FilePmdMapped: 0 kB\n" 75"Shared_Hugetlb: 0 kB\n" 76"Private_Hugetlb: 0 kB\n" 77"Swap: 0 kB\n" 78"SwapPss: 0 kB\n" 79"Locked: 0 kB\n" 80"THPeligible: 0\n" 81/* 82 * "ProtectionKey:" field is conditional. It is possible to check it as well, 83 * but I don't have such machine. 84 */ 85; 86 87static const char proc_pid_smaps_vsyscall_2[] = 88"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" 89"Size: 4 kB\n" 90"KernelPageSize: 4 kB\n" 91"MMUPageSize: 4 kB\n" 92"Rss: 0 kB\n" 93"Pss: 0 kB\n" 94"Pss_Dirty: 0 kB\n" 95"Shared_Clean: 0 kB\n" 96"Shared_Dirty: 0 kB\n" 97"Private_Clean: 0 kB\n" 98"Private_Dirty: 0 kB\n" 99"Referenced: 0 kB\n" 100"Anonymous: 0 kB\n" 101"LazyFree: 0 kB\n" 102"AnonHugePages: 0 kB\n" 103"ShmemPmdMapped: 0 kB\n" 104"FilePmdMapped: 0 kB\n" 105"Shared_Hugetlb: 0 kB\n" 106"Private_Hugetlb: 0 kB\n" 107"Swap: 0 kB\n" 108"SwapPss: 0 kB\n" 109"Locked: 0 kB\n" 110"THPeligible: 0\n" 111/* 112 * "ProtectionKey:" field is conditional. It is possible to check it as well, 113 * but I'm too tired. 114 */ 115; 116 117static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 118{ 119 _exit(EXIT_FAILURE); 120} 121 122static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) 123{ 124 _exit(g_vsyscall); 125} 126 127/* 128 * vsyscall page can't be unmapped, probe it directly. 129 */ 130static void vsyscall(void) 131{ 132 pid_t pid; 133 int wstatus; 134 135 pid = fork(); 136 if (pid < 0) { 137 fprintf(stderr, "fork, errno %d\n", errno); 138 exit(1); 139 } 140 if (pid == 0) { 141 setrlimit(RLIMIT_CORE, &(struct rlimit){}); 142 143 /* Hide "segfault at ffffffffff600000" messages. */ 144 struct sigaction act = {}; 145 act.sa_flags = SA_SIGINFO; 146 act.sa_sigaction = sigaction_SIGSEGV_vsyscall; 147 sigaction(SIGSEGV, &act, NULL); 148 149 g_vsyscall = 0; 150 /* gettimeofday(NULL, NULL); */ 151 uint64_t rax = 0xffffffffff600000; 152 asm volatile ( 153 "call *%[rax]" 154 : [rax] "+a" (rax) 155 : "D" (NULL), "S" (NULL) 156 : "rcx", "r11" 157 ); 158 159 g_vsyscall = 1; 160 *(volatile int *)0xffffffffff600000UL; 161 162 g_vsyscall = 2; 163 exit(g_vsyscall); 164 } 165 waitpid(pid, &wstatus, 0); 166 if (WIFEXITED(wstatus)) { 167 g_vsyscall = WEXITSTATUS(wstatus); 168 } else { 169 fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus); 170 exit(1); 171 } 172} 173 174static int test_proc_pid_maps(pid_t pid) 175{ 176 char buf[4096]; 177 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 178 int fd = open(buf, O_RDONLY); 179 if (fd == -1) { 180 perror("open /proc/${pid}/maps"); 181 return EXIT_FAILURE; 182 } else { 183 ssize_t rv = read(fd, buf, sizeof(buf)); 184 close(fd); 185 if (g_vsyscall == 0) { 186 assert(rv == 0); 187 } else { 188 size_t len = strlen(g_proc_pid_maps_vsyscall); 189 assert(rv == len); 190 assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); 191 } 192 return EXIT_SUCCESS; 193 } 194} 195 196static int test_proc_pid_numa_maps(pid_t pid) 197{ 198 char buf[4096]; 199 snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid); 200 int fd = open(buf, O_RDONLY); 201 if (fd == -1) { 202 if (errno == ENOENT) { 203 /* 204 * /proc/${pid}/numa_maps is under CONFIG_NUMA, 205 * it doesn't necessarily exist. 206 */ 207 return EXIT_SUCCESS; 208 } 209 perror("open /proc/${pid}/numa_maps"); 210 return EXIT_FAILURE; 211 } else { 212 ssize_t rv = read(fd, buf, sizeof(buf)); 213 close(fd); 214 assert(rv == 0); 215 return EXIT_SUCCESS; 216 } 217} 218 219static int test_proc_pid_smaps(pid_t pid) 220{ 221 char buf[4096]; 222 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 223 int fd = open(buf, O_RDONLY); 224 if (fd == -1) { 225 if (errno == ENOENT) { 226 /* 227 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, 228 * it doesn't necessarily exist. 229 */ 230 return EXIT_SUCCESS; 231 } 232 perror("open /proc/${pid}/smaps"); 233 return EXIT_FAILURE; 234 } else { 235 ssize_t rv = read(fd, buf, sizeof(buf)); 236 close(fd); 237 if (g_vsyscall == 0) { 238 assert(rv == 0); 239 } else { 240 size_t len = strlen(g_proc_pid_maps_vsyscall); 241 /* TODO "ProtectionKey:" */ 242 assert(rv > len); 243 assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); 244 } 245 return EXIT_SUCCESS; 246 } 247} 248 249static const char g_smaps_rollup[] = 250"00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" 251"Rss: 0 kB\n" 252"Pss: 0 kB\n" 253"Pss_Dirty: 0 kB\n" 254"Pss_Anon: 0 kB\n" 255"Pss_File: 0 kB\n" 256"Pss_Shmem: 0 kB\n" 257"Shared_Clean: 0 kB\n" 258"Shared_Dirty: 0 kB\n" 259"Private_Clean: 0 kB\n" 260"Private_Dirty: 0 kB\n" 261"Referenced: 0 kB\n" 262"Anonymous: 0 kB\n" 263"LazyFree: 0 kB\n" 264"AnonHugePages: 0 kB\n" 265"ShmemPmdMapped: 0 kB\n" 266"FilePmdMapped: 0 kB\n" 267"Shared_Hugetlb: 0 kB\n" 268"Private_Hugetlb: 0 kB\n" 269"Swap: 0 kB\n" 270"SwapPss: 0 kB\n" 271"Locked: 0 kB\n" 272; 273 274static int test_proc_pid_smaps_rollup(pid_t pid) 275{ 276 char buf[4096]; 277 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 278 int fd = open(buf, O_RDONLY); 279 if (fd == -1) { 280 if (errno == ENOENT) { 281 /* 282 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, 283 * it doesn't necessarily exist. 284 */ 285 return EXIT_SUCCESS; 286 } 287 perror("open /proc/${pid}/smaps_rollup"); 288 return EXIT_FAILURE; 289 } else { 290 ssize_t rv = read(fd, buf, sizeof(buf)); 291 close(fd); 292 assert(rv == sizeof(g_smaps_rollup) - 1); 293 assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); 294 return EXIT_SUCCESS; 295 } 296} 297 298int main(void) 299{ 300 int rv = EXIT_SUCCESS; 301 302 vsyscall(); 303 304 switch (g_vsyscall) { 305 case 0: 306 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; 307 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; 308 break; 309 case 1: 310 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; 311 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; 312 break; 313 case 2: 314 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; 315 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; 316 break; 317 default: 318 abort(); 319 } 320 321 pid_t pid = fork(); 322 if (pid == -1) { 323 perror("fork"); 324 return EXIT_FAILURE; 325 } else if (pid == 0) { 326 rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); 327 if (rv != 0) { 328 if (errno == EPERM) { 329 fprintf(stderr, 330"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" 331 ); 332 kill(getppid(), SIGTERM); 333 return EXIT_FAILURE; 334 } 335 perror("ptrace PTRACE_TRACEME"); 336 return EXIT_FAILURE; 337 } 338 339 /* 340 * Hide "segfault at ..." messages. Signal handler won't run. 341 */ 342 struct sigaction act = {}; 343 act.sa_flags = SA_SIGINFO; 344 act.sa_sigaction = sigaction_SIGSEGV; 345 sigaction(SIGSEGV, &act, NULL); 346 347#ifdef __amd64__ 348 munmap(NULL, ((size_t)1 << 47) - 4096); 349#else 350#error "implement 'unmap everything'" 351#endif 352 return EXIT_FAILURE; 353 } else { 354 /* 355 * TODO find reliable way to signal parent that munmap(2) completed. 356 * Child can't do it directly because it effectively doesn't exist 357 * anymore. Looking at child's VM files isn't 100% reliable either: 358 * due to a bug they may not become empty or empty-like. 359 */ 360 sleep(1); 361 362 if (rv == EXIT_SUCCESS) { 363 rv = test_proc_pid_maps(pid); 364 } 365 if (rv == EXIT_SUCCESS) { 366 rv = test_proc_pid_numa_maps(pid); 367 } 368 if (rv == EXIT_SUCCESS) { 369 rv = test_proc_pid_smaps(pid); 370 } 371 if (rv == EXIT_SUCCESS) { 372 rv = test_proc_pid_smaps_rollup(pid); 373 } 374 /* 375 * TODO test /proc/${pid}/statm, task_statm() 376 * ->start_code, ->end_code aren't updated by munmap(). 377 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything. 378 */ 379 380 /* Cut the rope. */ 381 int wstatus; 382 waitpid(pid, &wstatus, 0); 383 assert(WIFSTOPPED(wstatus)); 384 assert(WSTOPSIG(wstatus) == SIGSEGV); 385 } 386 387 return rv; 388}