at master 308 lines 7.6 kB view raw
1// SPDX-License-Identifier: LGPL-2.1 2/* 3 * rseq.c 4 * 5 * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; only 10 * version 2.1 of the License. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 */ 17 18#define _GNU_SOURCE 19#include <errno.h> 20#include <sched.h> 21#include <stdio.h> 22#include <stdlib.h> 23#include <string.h> 24#include <unistd.h> 25#include <syscall.h> 26#include <assert.h> 27#include <signal.h> 28#include <limits.h> 29#include <dlfcn.h> 30#include <stddef.h> 31#include <sys/auxv.h> 32#include <linux/auxvec.h> 33 34#include <linux/compiler.h> 35 36#include "kselftest.h" 37#include "rseq.h" 38 39/* 40 * Define weak versions to play nice with binaries that are statically linked 41 * against a libc that doesn't support registering its own rseq. 42 */ 43extern __weak ptrdiff_t __rseq_offset; 44extern __weak unsigned int __rseq_size; 45extern __weak unsigned int __rseq_flags; 46 47static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; 48static const unsigned int *libc_rseq_size_p = &__rseq_size; 49static const unsigned int *libc_rseq_flags_p = &__rseq_flags; 50 51/* Offset from the thread pointer to the rseq area. */ 52ptrdiff_t rseq_offset; 53 54/* 55 * Size of the registered rseq area. 0 if the registration was 56 * unsuccessful. 57 */ 58unsigned int rseq_size = -1U; 59 60/* Flags used during rseq registration. */ 61unsigned int rseq_flags; 62 63static int rseq_ownership; 64 65/* Allocate a large area for the TLS. */ 66#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 67 68/* Original struct rseq feature size is 20 bytes. */ 69#define ORIG_RSEQ_FEATURE_SIZE 20 70 71/* Original struct rseq allocation size is 32 bytes. */ 72#define ORIG_RSEQ_ALLOC_SIZE 32 73 74/* 75 * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an 76 * rseq registration that is larger than the current rseq ABI. 77 */ 78union rseq_tls { 79 struct rseq_abi abi; 80 char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE]; 81}; 82 83static 84__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = { 85 .abi = { 86 .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, 87 }, 88}; 89 90static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, 91 int flags, uint32_t sig) 92{ 93 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); 94} 95 96static int sys_getcpu(unsigned *cpu, unsigned *node) 97{ 98 return syscall(__NR_getcpu, cpu, node, NULL); 99} 100 101bool rseq_available(void) 102{ 103 int rc; 104 105 rc = sys_rseq(NULL, 0, 0, 0); 106 if (rc != -1) 107 abort(); 108 switch (errno) { 109 case ENOSYS: 110 return false; 111 case EINVAL: 112 return true; 113 default: 114 abort(); 115 } 116} 117 118/* The rseq areas need to be at least 32 bytes. */ 119static 120unsigned int get_rseq_min_alloc_size(void) 121{ 122 unsigned int alloc_size = rseq_size; 123 124 if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) 125 alloc_size = ORIG_RSEQ_ALLOC_SIZE; 126 return alloc_size; 127} 128 129/* 130 * Return the feature size supported by the kernel. 131 * 132 * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): 133 * 134 * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) 135 * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). 136 * 137 * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. 138 */ 139static 140unsigned int get_rseq_kernel_feature_size(void) 141{ 142 unsigned long auxv_rseq_feature_size, auxv_rseq_align; 143 144 auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); 145 assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); 146 147 auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); 148 assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); 149 if (auxv_rseq_feature_size) 150 return auxv_rseq_feature_size; 151 else 152 return ORIG_RSEQ_FEATURE_SIZE; 153} 154 155int rseq_register_current_thread(void) 156{ 157 int rc; 158 159 if (!rseq_ownership) { 160 /* Treat libc's ownership as a successful registration. */ 161 return 0; 162 } 163 rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); 164 if (rc) { 165 /* 166 * After at least one thread has registered successfully 167 * (rseq_size > 0), the registration of other threads should 168 * never fail. 169 */ 170 if (RSEQ_READ_ONCE(rseq_size) > 0) { 171 /* Incoherent success/failure within process. */ 172 abort(); 173 } 174 return -1; 175 } 176 assert(rseq_current_cpu_raw() >= 0); 177 178 /* 179 * The first thread to register sets the rseq_size to mimic the libc 180 * behavior. 181 */ 182 if (RSEQ_READ_ONCE(rseq_size) == 0) { 183 RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); 184 } 185 186 return 0; 187} 188 189int rseq_unregister_current_thread(void) 190{ 191 int rc; 192 193 if (!rseq_ownership) { 194 /* Treat libc's ownership as a successful unregistration. */ 195 return 0; 196 } 197 rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); 198 if (rc) 199 return -1; 200 return 0; 201} 202 203static __attribute__((constructor)) 204void rseq_init(void) 205{ 206 /* 207 * If the libc's registered rseq size isn't already valid, it may be 208 * because the binary is dynamically linked and not necessarily due to 209 * libc not having registered a restartable sequence. Try to find the 210 * symbols if that's the case. 211 */ 212 if (!libc_rseq_size_p || !*libc_rseq_size_p) { 213 libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); 214 libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); 215 libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); 216 } 217 if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && 218 *libc_rseq_size_p != 0) { 219 unsigned int libc_rseq_size; 220 221 /* rseq registration owned by glibc */ 222 rseq_offset = *libc_rseq_offset_p; 223 libc_rseq_size = *libc_rseq_size_p; 224 rseq_flags = *libc_rseq_flags_p; 225 226 /* 227 * Previous versions of glibc expose the value 228 * 32 even though the kernel only supported 20 229 * bytes initially. Therefore treat 32 as a 230 * special-case. glibc 2.40 exposes a 20 bytes 231 * __rseq_size without using getauxval(3) to 232 * query the supported size, while still allocating a 32 233 * bytes area. Also treat 20 as a special-case. 234 * 235 * Special-cases are handled by using the following 236 * value as active feature set size: 237 * 238 * rseq_size = min(32, get_rseq_kernel_feature_size()) 239 */ 240 switch (libc_rseq_size) { 241 case ORIG_RSEQ_FEATURE_SIZE: 242 fallthrough; 243 case ORIG_RSEQ_ALLOC_SIZE: 244 { 245 unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); 246 247 if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) 248 rseq_size = rseq_kernel_feature_size; 249 else 250 rseq_size = ORIG_RSEQ_ALLOC_SIZE; 251 break; 252 } 253 default: 254 /* Otherwise just use the __rseq_size from libc as rseq_size. */ 255 rseq_size = libc_rseq_size; 256 break; 257 } 258 return; 259 } 260 rseq_ownership = 1; 261 262 /* Calculate the offset of the rseq area from the thread pointer. */ 263 rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer(); 264 265 /* rseq flags are deprecated, always set to 0. */ 266 rseq_flags = 0; 267 268 /* 269 * Set the size to 0 until at least one thread registers to mimic the 270 * libc behavior. 271 */ 272 rseq_size = 0; 273} 274 275static __attribute__((destructor)) 276void rseq_exit(void) 277{ 278 if (!rseq_ownership) 279 return; 280 rseq_offset = 0; 281 rseq_size = -1U; 282 rseq_ownership = 0; 283} 284 285int32_t rseq_fallback_current_cpu(void) 286{ 287 int32_t cpu; 288 289 cpu = sched_getcpu(); 290 if (cpu < 0) { 291 perror("sched_getcpu()"); 292 abort(); 293 } 294 return cpu; 295} 296 297int32_t rseq_fallback_current_node(void) 298{ 299 uint32_t cpu_id, node_id; 300 int ret; 301 302 ret = sys_getcpu(&cpu_id, &node_id); 303 if (ret) { 304 perror("sys_getcpu()"); 305 return ret; 306 } 307 return (int32_t) node_id; 308}