this repo has no description
at fixPythonPipStalling 409 lines 13 kB view raw
1/* 2This file is part of Darling. 3 4Copyright (C) 2015-2018 Lubos Dolezel 5 6Darling is free software: you can redistribute it and/or modify 7it under the terms of the GNU General Public License as published by 8the Free Software Foundation, either version 3 of the License, or 9(at your option) any later version. 10 11Darling is distributed in the hope that it will be useful, 12but WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14GNU General Public License for more details. 15 16You should have received a copy of the GNU General Public License 17along with Darling. If not, see <http://www.gnu.org/licenses/>. 18*/ 19 20#include "threads.h" 21#include <pthread.h> 22#include <sys/mman.h> 23#include <semaphore.h> 24#include <string.h> 25#include <stdbool.h> 26#include <stdlib.h> 27#include <signal.h> 28#include <unistd.h> 29#include <sys/syscall.h> 30#include <setjmp.h> 31#include <sys/syscall.h> 32#include <sys/socket.h> 33#include <stdio.h> 34#include <fcntl.h> 35 36#include "dthreads.h" 37 38#include <darlingserver/rpc.h> 39 40extern int __mldr_create_rpc_socket(void); 41extern void __mldr_close_rpc_socket(int socket); 42 43// The point of this file is build macOS threads on top of native libc's threads, 44// otherwise it would not be possible to make native calls from these threads. 45 46static __thread jmp_buf t_jmpbuf; 47static __thread void* t_freeaddr; 48static __thread size_t t_freesize; 49static __thread int t_server_socket = -1; 50static __thread darling_thread_create_callbacks_t t_callbacks = NULL; 51 52typedef void (*thread_ep)(void**, int, ...); 53struct arg_struct 54{ 55 thread_ep entry_point; 56 uintptr_t real_entry_point; 57 uintptr_t arg1; // `user_arg` for normal threads; `keventlist` for workqueues 58 uintptr_t arg2; // `stack_addr` for normal threads; `flags` for workqueues 59 uintptr_t arg3; // `flags` for normal threads; `nkevents` for workqueues 60 union { 61 void* _backwards_compat; // kept around to avoid modifying assembly 62 int port; 63 }; 64 unsigned long pth_obj_size; 65 void* pth; 66 darling_thread_create_callbacks_t callbacks; 67 uintptr_t stack_bottom; 68 uintptr_t stack_addr; 69 bool is_workqueue; 70}; 71 72static void* darling_thread_entry(void* p); 73 74#ifndef PTHREAD_STACK_MIN 75# define PTHREAD_STACK_MIN 16384 76#endif 77 78#define DEFAULT_DTHREAD_GUARD_SIZE 0x1000 79 80static inline void *align_16(uintptr_t ptr) { 81 return (void *) ((uintptr_t) ptr & ~(uintptr_t) 15); 82} 83 84static dthread_t dthread_structure_init(dthread_t dthread, size_t guard_size, void* stack_addr, size_t stack_size, void* base_addr, size_t total_size) { 85 // the pthread signature is the address of the pthread XORed with the "pointer munge" token passed in by the kernel 86 // since the LKM doesn't pass in a token, it's always zero, so the signature is equal to just the address 87 dthread->sig = (uintptr_t)dthread; 88 89 dthread->tsd[DTHREAD_TSD_SLOT_PTHREAD_SELF] = dthread; 90 dthread->tsd[DTHREAD_TSD_SLOT_ERRNO] = &dthread->err_no; 91 dthread->tsd[DTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void*)(uintptr_t)(DTHREAD_DEFAULT_PRIORITY); 92 dthread->tsd[DTHREAD_TSD_SLOT_PTR_MUNGE] = 0; 93 dthread->tl_has_custom_stack = 0; 94 dthread->lock = (darwin_os_unfair_lock){0}; 95 96 dthread->stackaddr = stack_addr; 97 dthread->stackbottom = (char*)stack_addr - stack_size; 98 dthread->freeaddr = base_addr; 99 dthread->freesize = total_size; 100 dthread->guardsize = guard_size; 101 102 dthread->cancel_state = DTHREAD_CANCEL_ENABLE | DTHREAD_CANCEL_DEFERRED; 103 104 // technically, these next values are defaults; we don't have a way to get more info from the user 105 // 106 // it's not too important since the only cases where we initialize the dthread structure ourselves is when we're working with workqueues, 107 // and those initialize their own dthread structures when they get them 108 109 dthread->tl_joinable = 1; 110 dthread->inherit = DTHREAD_INHERIT_SCHED; 111 dthread->tl_policy = DARWIN_POLICY_TIMESHARE; 112 113 return dthread; 114}; 115 116static dthread_t dthread_structure_allocate(size_t stack_size, size_t guard_size, void** stack_addr) { 117 size_t total_size = guard_size + stack_size + sizeof(struct _dthread); 118 119 // allocate our stack, guard page, and dthread structure 120 void* base_addr = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 121 122 // protect our guard page 123 mprotect(base_addr, guard_size, PROT_NONE); 124 125 /** 126 * memory layout of newly allocated block: 127 * 128 * [base_addr] [base_addr + total_size] 129 * -------------------------------------------- 130 * | guard page | stack | dthread | 131 */ 132 133 // stack_addr points to the top of the stack (i.e. the highest address) 134 *stack_addr = ((char*)base_addr) + stack_size + guard_size; 135 136 // the dthread sits above the stack 137 // (and by "above", i mean the lowest address of the dthread is the highest address of the stack) 138 dthread_t dthread = (dthread_t)*stack_addr; 139 // zero-out the entrire dthread structure 140 memset(dthread, 0, sizeof(struct _dthread)); 141 142 return dthread_structure_init(dthread, guard_size, *stack_addr, stack_size, base_addr, total_size); 143}; 144 145void* __darling_thread_create(unsigned long stack_size, unsigned long pth_obj_size, 146 void* entry_point, uintptr_t real_entry_point, 147 uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, 148 darling_thread_create_callbacks_t callbacks, void* pth) 149{ 150 struct arg_struct args = { 151 .entry_point = (thread_ep)entry_point, 152 .real_entry_point = real_entry_point, 153 .arg1 = arg1, 154 .arg2 = arg2, 155 .arg3 = arg3, 156 .port = 0, 157 .pth_obj_size = pth_obj_size, 158 .pth = NULL, // set later on 159 .callbacks = callbacks, 160 .stack_addr = 0, // set later on 161 .is_workqueue = real_entry_point == 0, // our `workq_kernreturn` sets `real_entry_point` to NULL; `bsdthread_create` actually passes a value 162 }; 163 pthread_attr_t attr; 164 pthread_t nativeLibcThread; 165 166 pthread_attr_init(&attr); 167 //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 168 // pthread_attr_setstacksize(&attr, stack_size); 169 170 // in some cases, we're already given a pthread object, stack, and guard page; 171 // in those cases, just use what we're given (it also contains more information) 172 // 173 // otherwise, allocate them ourselves 174 if (pth == NULL || args.is_workqueue) { 175 pth = dthread_structure_allocate(stack_size, DEFAULT_DTHREAD_GUARD_SIZE, (void**)&args.stack_addr); 176 } else if (!args.is_workqueue) { 177 // `arg2` is `stack_addr` for normal threads 178 args.stack_addr = arg2; 179 } 180 181 args.stack_bottom = args.stack_addr - stack_size; 182 183 // pthread_attr_setstack is buggy. The documentation states we should provide the lowest 184 // address of the stack, yet some versions regard it as the highest address instead. 185 // Therefore it's better to just make the pthread stack as small as possible and then switch 186 // to our own stack instead. 187 //pthread_attr_setstack(&attr, ((char*)pth) + pth_obj_size, stack_size - pth_obj_size - 0x1000); 188 189 // std::cout << "Allocated stack at " << pth << ", size " << stack_size << std::endl; 190 191 pthread_attr_setstacksize(&attr, 4096); 192 193 //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 194 195 args.pth = pth; 196 pthread_create(&nativeLibcThread, &attr, darling_thread_entry, &args); 197 pthread_attr_destroy(&attr); 198 199 while (args.pth != NULL) 200 sched_yield(); 201 202 return pth; 203} 204 205static void* darling_thread_entry(void* p) 206{ 207 struct arg_struct* in_args = (struct arg_struct*) p; 208 struct arg_struct args; 209 210 memcpy(&args, in_args, sizeof(args)); 211 212 dthread_t dthread = args.pth; 213 uintptr_t* flags = args.is_workqueue ? &args.arg2 : &args.arg3; 214 215 // create a new dserver RPC socket 216 int new_rpc_fd = __mldr_create_rpc_socket(); 217 if (new_rpc_fd < 0) { 218 // we can't do anything if we don't get our own separate connection to darlingserver 219 fprintf(stderr, "Failed to create socket\n"); 220 abort(); 221 } 222 223 // guard the new RPC FD 224 args.callbacks->rpc_guard(new_rpc_fd); 225 226 // the socket is ready; assign it now 227 t_server_socket = new_rpc_fd; 228 t_callbacks = args.callbacks; 229 230 // libpthread now expects the kernel to set the TSD 231 // so, since we're pretending to be the kernel handling threads... 232 args.callbacks->thread_set_tsd_base(&dthread->tsd[0], 0); 233 *flags |= args.is_workqueue ? DWQ_FLAG_THREAD_TSD_BASE_SET : DTHREAD_START_TSD_BASE_SET; 234 235 // let's check-in with darlingserver on this new thread 236 int dummy_stack_variable; 237 // the lifetime pipe fd is ignored as the process should already have been registered 238 if (dserver_rpc_explicit_checkin(t_server_socket, false, &dummy_stack_variable, -1) < 0) { 239 // we can't do ANYTHING if darlingserver doesn't acknowledge us successfully 240 abort(); 241 } 242 243 int thread_self_port = args.callbacks->thread_self_trap(); 244 dthread->tsd[DTHREAD_TSD_SLOT_MACH_THREAD_SELF] = (void*)(intptr_t)thread_self_port; 245 args.port = thread_self_port; 246 247 in_args->pth = NULL; 248 249 if (setjmp(t_jmpbuf)) 250 { 251 // Terminate the Linux thread 252 munmap(t_freeaddr, t_freesize); 253 pthread_detach(pthread_self()); 254 return NULL; 255 } 256 257 void *stack_ptr = align_16(args.stack_addr); 258 259 // No additional function calls should occur beyond this point. Otherwise, we will risk our 260 // registers being call-clobbered. I recommend reading the following doc for more details: 261 // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html 262#if __x86_64__ 263 register void* arg1 asm("rdi") = args.pth; 264 register int arg2 asm("esi") = args.port; 265 register uintptr_t arg3 asm("rdx") = args.real_entry_point; 266 register uintptr_t arg4 asm("rcx") = args.arg1; 267 register uintptr_t arg5 asm("r8") = args.arg2; 268 register uintptr_t arg6 asm("r9") = args.arg3; 269#elif __i386__ 270 uintptr_t arg3 = args.real_entry_point; 271#endif 272 273 if (arg3 == 0) { 274 arg3 = (long) args.stack_bottom; 275 } 276 277#ifdef __x86_64__ 278 asm volatile( 279 // Zero out the frame base register. 280 "xorq %%rbp, %%rbp\n" 281 // Switch to the new stack. 282 "movq %[stack_ptr], %%rsp\n" 283 // Push a fake return address. 284 "pushq $0\n" 285 // Jump to the entry point. 286 "jmp *%[entry_point]" :: 287 288 // Function arguments 289 "r"(arg1),"r"(arg2),"r"(arg3),"r"(arg4),"r"(arg5),"r"(arg6), 290 291 [entry_point] "r"(args.entry_point), 292 [stack_ptr] "r"(stack_ptr) 293 ); 294#elif defined(__i386__) // args in eax, ebx, ecx, edx, edi, esi 295 __asm__ __volatile__ ( 296 // Zero out the frame base register. 297 "xorl %%ebp, %%ebp\n" 298 // Switch to the new stack. 299 "movl %[stack_ptr], %%esp\n" 300 // Make sure stack is 16 aligned (before we push the fake return address) 301 "sub $8, %%esp\n" 302 // Unlike x86_64, all function arguments must be stored in the stack 303 "pushl 16(%[args])\n" // 6th argument | args.arg3 304 "pushl 12(%[args])\n" // 5th argument | args.arg2 305 "pushl 8(%[args])\n" // 4th argument | args.arg1 306 "pushl %[arg3]\n" // 3rd argument | args3 307 "pushl 20(%[args])\n" // 2nd argument | args.port 308 "pushl 28(%[args])\n" // 1st argument | args.pth 309 // Push a fake return address. 310 "pushl $0\n" 311 // Jump to the entry point. 312 "jmp *%[entry_point]" :: 313 314 // Function arguments to push to the stack. 315 [args] "r"(&args), [arg3]"r"(arg3), 316 317 [entry_point] "r"(args.entry_point), 318 [stack_ptr] "r"(stack_ptr) 319 ); 320#else 321 #error Not implemented 322 // args.entry_point(args.pth, args.port, args.real_entry_point, args.arg1, args.arg2, args.arg3); 323#endif 324 __builtin_unreachable(); 325} 326 327int __darling_thread_terminate(void* stackaddr, 328 unsigned long freesize, unsigned long pthobj_size) 329{ 330 int checkout_result = 0; 331 332 if (t_server_socket != -1) { 333 checkout_result = dserver_rpc_explicit_checkout(t_server_socket, -1, false); 334 } else { 335 checkout_result = dserver_rpc_checkout(-1, false); 336 } 337 338 if (checkout_result < 0) { 339 // failing to check-out is not fatal. 340 // it's not ideal, but it's not fatal. 341 #define CHECKOUT_FAILURE_MESSAGE "Failed to checkout" 342 if (t_server_socket != -1) { 343 dserver_rpc_explicit_kprintf(t_server_socket, CHECKOUT_FAILURE_MESSAGE, sizeof(CHECKOUT_FAILURE_MESSAGE) - 1); 344 } else { 345 dserver_rpc_kprintf(CHECKOUT_FAILURE_MESSAGE, sizeof(CHECKOUT_FAILURE_MESSAGE) - 1); 346 } 347 } 348 349 // close the RPC FD (if necessary) 350 // it should already have been unguarded by our caller 351 if (t_server_socket != -1) { 352 __mldr_close_rpc_socket(t_server_socket); 353 } 354 355 if (getpid() == syscall(SYS_gettid)) 356 { 357 // dispatch_main() calls pthread_exit(NULL) on the main thread, 358 // which turns our process into a zombie on Linux. 359 // Let's just hang around forever. 360 sigset_t mask; 361 memset(&mask, 0, sizeof(mask)); 362 363 while (1) 364 sigsuspend(&mask); 365 } 366 367 t_freeaddr = stackaddr; 368 t_freesize = freesize; 369 370 longjmp(t_jmpbuf, 1); 371 372 __builtin_unreachable(); 373} 374 375extern void* __mldr_main_stack_top; 376 377void* __darling_thread_get_stack(void) 378{ 379 return __mldr_main_stack_top; 380} 381 382extern int __dserver_main_thread_socket_fd; 383 384int __darling_thread_rpc_socket(void) { 385 if (t_server_socket == -1) { 386 if (getpid() == syscall(SYS_gettid)) { 387 // this is the main thread 388 t_server_socket = __dserver_main_thread_socket_fd; 389 } else { 390 // threads should already have a per-thread socket assigned when they're created 391 abort(); 392 } 393 } 394 return t_server_socket; 395}; 396 397void __darling_thread_rpc_socket_refresh(void) { 398 int new_rpc_fd = __mldr_create_rpc_socket(); 399 if (new_rpc_fd < 0) { 400 abort(); 401 } 402 403 t_server_socket = new_rpc_fd; 404 405 // if this is the main thread, also update the socket used by mldr 406 if (getpid() == syscall(SYS_gettid)) { 407 __dserver_main_thread_socket_fd = t_server_socket; 408 } 409};