this repo has no description
1/*
2This file is part of Darling.
3
4Copyright (C) 2015-2018 Lubos Dolezel
5
6Darling is free software: you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation, either version 3 of the License, or
9(at your option) any later version.
10
11Darling is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Darling. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#include "threads.h"
21#include <pthread.h>
22#include <sys/mman.h>
23#include <semaphore.h>
24#include <string.h>
25#include <stdbool.h>
26#include <stdlib.h>
27#include <signal.h>
28#include <unistd.h>
29#include <sys/syscall.h>
30#include <setjmp.h>
31#include <sys/syscall.h>
32#include <sys/socket.h>
33#include <stdio.h>
34#include <fcntl.h>
35
36#include "dthreads.h"
37
38#include <darlingserver/rpc.h>
39
40extern int __mldr_create_rpc_socket(void);
41extern void __mldr_close_rpc_socket(int socket);
42
43// The point of this file is build macOS threads on top of native libc's threads,
44// otherwise it would not be possible to make native calls from these threads.
45
46static __thread jmp_buf t_jmpbuf;
47static __thread void* t_freeaddr;
48static __thread size_t t_freesize;
49static __thread int t_server_socket = -1;
50static __thread darling_thread_create_callbacks_t t_callbacks = NULL;
51
52typedef void (*thread_ep)(void**, int, ...);
53struct arg_struct
54{
55 thread_ep entry_point;
56 uintptr_t real_entry_point;
57 uintptr_t arg1; // `user_arg` for normal threads; `keventlist` for workqueues
58 uintptr_t arg2; // `stack_addr` for normal threads; `flags` for workqueues
59 uintptr_t arg3; // `flags` for normal threads; `nkevents` for workqueues
60 union {
61 void* _backwards_compat; // kept around to avoid modifying assembly
62 int port;
63 };
64 unsigned long pth_obj_size;
65 void* pth;
66 darling_thread_create_callbacks_t callbacks;
67 uintptr_t stack_bottom;
68 uintptr_t stack_addr;
69 bool is_workqueue;
70};
71
72static void* darling_thread_entry(void* p);
73
74#ifndef PTHREAD_STACK_MIN
75# define PTHREAD_STACK_MIN 16384
76#endif
77
78#define DEFAULT_DTHREAD_GUARD_SIZE 0x1000
79
80static inline void *align_16(uintptr_t ptr) {
81 return (void *) ((uintptr_t) ptr & ~(uintptr_t) 15);
82}
83
84static dthread_t dthread_structure_init(dthread_t dthread, size_t guard_size, void* stack_addr, size_t stack_size, void* base_addr, size_t total_size) {
85 // the pthread signature is the address of the pthread XORed with the "pointer munge" token passed in by the kernel
86 // since the LKM doesn't pass in a token, it's always zero, so the signature is equal to just the address
87 dthread->sig = (uintptr_t)dthread;
88
89 dthread->tsd[DTHREAD_TSD_SLOT_PTHREAD_SELF] = dthread;
90 dthread->tsd[DTHREAD_TSD_SLOT_ERRNO] = &dthread->err_no;
91 dthread->tsd[DTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void*)(uintptr_t)(DTHREAD_DEFAULT_PRIORITY);
92 dthread->tsd[DTHREAD_TSD_SLOT_PTR_MUNGE] = 0;
93 dthread->tl_has_custom_stack = 0;
94 dthread->lock = (darwin_os_unfair_lock){0};
95
96 dthread->stackaddr = stack_addr;
97 dthread->stackbottom = (char*)stack_addr - stack_size;
98 dthread->freeaddr = base_addr;
99 dthread->freesize = total_size;
100 dthread->guardsize = guard_size;
101
102 dthread->cancel_state = DTHREAD_CANCEL_ENABLE | DTHREAD_CANCEL_DEFERRED;
103
104 // technically, these next values are defaults; we don't have a way to get more info from the user
105 //
106 // it's not too important since the only cases where we initialize the dthread structure ourselves is when we're working with workqueues,
107 // and those initialize their own dthread structures when they get them
108
109 dthread->tl_joinable = 1;
110 dthread->inherit = DTHREAD_INHERIT_SCHED;
111 dthread->tl_policy = DARWIN_POLICY_TIMESHARE;
112
113 return dthread;
114};
115
116static dthread_t dthread_structure_allocate(size_t stack_size, size_t guard_size, void** stack_addr) {
117 size_t total_size = guard_size + stack_size + sizeof(struct _dthread);
118
119 // allocate our stack, guard page, and dthread structure
120 void* base_addr = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
121
122 // protect our guard page
123 mprotect(base_addr, guard_size, PROT_NONE);
124
125 /**
126 * memory layout of newly allocated block:
127 *
128 * [base_addr] [base_addr + total_size]
129 * --------------------------------------------
130 * | guard page | stack | dthread |
131 */
132
133 // stack_addr points to the top of the stack (i.e. the highest address)
134 *stack_addr = ((char*)base_addr) + stack_size + guard_size;
135
136 // the dthread sits above the stack
137 // (and by "above", i mean the lowest address of the dthread is the highest address of the stack)
138 dthread_t dthread = (dthread_t)*stack_addr;
139 // zero-out the entrire dthread structure
140 memset(dthread, 0, sizeof(struct _dthread));
141
142 return dthread_structure_init(dthread, guard_size, *stack_addr, stack_size, base_addr, total_size);
143};
144
145void* __darling_thread_create(unsigned long stack_size, unsigned long pth_obj_size,
146 void* entry_point, uintptr_t real_entry_point,
147 uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
148 darling_thread_create_callbacks_t callbacks, void* pth)
149{
150 struct arg_struct args = {
151 .entry_point = (thread_ep)entry_point,
152 .real_entry_point = real_entry_point,
153 .arg1 = arg1,
154 .arg2 = arg2,
155 .arg3 = arg3,
156 .port = 0,
157 .pth_obj_size = pth_obj_size,
158 .pth = NULL, // set later on
159 .callbacks = callbacks,
160 .stack_addr = 0, // set later on
161 .is_workqueue = real_entry_point == 0, // our `workq_kernreturn` sets `real_entry_point` to NULL; `bsdthread_create` actually passes a value
162 };
163 pthread_attr_t attr;
164 pthread_t nativeLibcThread;
165
166 pthread_attr_init(&attr);
167 //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
168 // pthread_attr_setstacksize(&attr, stack_size);
169
170 // in some cases, we're already given a pthread object, stack, and guard page;
171 // in those cases, just use what we're given (it also contains more information)
172 //
173 // otherwise, allocate them ourselves
174 if (pth == NULL || args.is_workqueue) {
175 pth = dthread_structure_allocate(stack_size, DEFAULT_DTHREAD_GUARD_SIZE, (void**)&args.stack_addr);
176 } else if (!args.is_workqueue) {
177 // `arg2` is `stack_addr` for normal threads
178 args.stack_addr = arg2;
179 }
180
181 args.stack_bottom = args.stack_addr - stack_size;
182
183 // pthread_attr_setstack is buggy. The documentation states we should provide the lowest
184 // address of the stack, yet some versions regard it as the highest address instead.
185 // Therefore it's better to just make the pthread stack as small as possible and then switch
186 // to our own stack instead.
187 //pthread_attr_setstack(&attr, ((char*)pth) + pth_obj_size, stack_size - pth_obj_size - 0x1000);
188
189 // std::cout << "Allocated stack at " << pth << ", size " << stack_size << std::endl;
190
191 pthread_attr_setstacksize(&attr, 4096);
192
193 //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
194
195 args.pth = pth;
196 pthread_create(&nativeLibcThread, &attr, darling_thread_entry, &args);
197 pthread_attr_destroy(&attr);
198
199 while (args.pth != NULL)
200 sched_yield();
201
202 return pth;
203}
204
205static void* darling_thread_entry(void* p)
206{
207 struct arg_struct* in_args = (struct arg_struct*) p;
208 struct arg_struct args;
209
210 memcpy(&args, in_args, sizeof(args));
211
212 dthread_t dthread = args.pth;
213 uintptr_t* flags = args.is_workqueue ? &args.arg2 : &args.arg3;
214
215 // create a new dserver RPC socket
216 int new_rpc_fd = __mldr_create_rpc_socket();
217 if (new_rpc_fd < 0) {
218 // we can't do anything if we don't get our own separate connection to darlingserver
219 fprintf(stderr, "Failed to create socket\n");
220 abort();
221 }
222
223 // guard the new RPC FD
224 args.callbacks->rpc_guard(new_rpc_fd);
225
226 // the socket is ready; assign it now
227 t_server_socket = new_rpc_fd;
228 t_callbacks = args.callbacks;
229
230 // libpthread now expects the kernel to set the TSD
231 // so, since we're pretending to be the kernel handling threads...
232 args.callbacks->thread_set_tsd_base(&dthread->tsd[0], 0);
233 *flags |= args.is_workqueue ? DWQ_FLAG_THREAD_TSD_BASE_SET : DTHREAD_START_TSD_BASE_SET;
234
235 // let's check-in with darlingserver on this new thread
236 int dummy_stack_variable;
237 // the lifetime pipe fd is ignored as the process should already have been registered
238 if (dserver_rpc_explicit_checkin(t_server_socket, false, &dummy_stack_variable, -1) < 0) {
239 // we can't do ANYTHING if darlingserver doesn't acknowledge us successfully
240 abort();
241 }
242
243 int thread_self_port = args.callbacks->thread_self_trap();
244 dthread->tsd[DTHREAD_TSD_SLOT_MACH_THREAD_SELF] = (void*)(intptr_t)thread_self_port;
245 args.port = thread_self_port;
246
247 in_args->pth = NULL;
248
249 if (setjmp(t_jmpbuf))
250 {
251 // Terminate the Linux thread
252 munmap(t_freeaddr, t_freesize);
253 pthread_detach(pthread_self());
254 return NULL;
255 }
256
257 void *stack_ptr = align_16(args.stack_addr);
258
259 // No additional function calls should occur beyond this point. Otherwise, we will risk our
260 // registers being call-clobbered. I recommend reading the following doc for more details:
261 // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html
262#if __x86_64__
263 register void* arg1 asm("rdi") = args.pth;
264 register int arg2 asm("esi") = args.port;
265 register uintptr_t arg3 asm("rdx") = args.real_entry_point;
266 register uintptr_t arg4 asm("rcx") = args.arg1;
267 register uintptr_t arg5 asm("r8") = args.arg2;
268 register uintptr_t arg6 asm("r9") = args.arg3;
269#elif __i386__
270 uintptr_t arg3 = args.real_entry_point;
271#endif
272
273 if (arg3 == 0) {
274 arg3 = (long) args.stack_bottom;
275 }
276
277#ifdef __x86_64__
278 asm volatile(
279 // Zero out the frame base register.
280 "xorq %%rbp, %%rbp\n"
281 // Switch to the new stack.
282 "movq %[stack_ptr], %%rsp\n"
283 // Push a fake return address.
284 "pushq $0\n"
285 // Jump to the entry point.
286 "jmp *%[entry_point]" ::
287
288 // Function arguments
289 "r"(arg1),"r"(arg2),"r"(arg3),"r"(arg4),"r"(arg5),"r"(arg6),
290
291 [entry_point] "r"(args.entry_point),
292 [stack_ptr] "r"(stack_ptr)
293 );
294#elif defined(__i386__) // args in eax, ebx, ecx, edx, edi, esi
295 __asm__ __volatile__ (
296 // Zero out the frame base register.
297 "xorl %%ebp, %%ebp\n"
298 // Switch to the new stack.
299 "movl %[stack_ptr], %%esp\n"
300 // Make sure stack is 16 aligned (before we push the fake return address)
301 "sub $8, %%esp\n"
302 // Unlike x86_64, all function arguments must be stored in the stack
303 "pushl 16(%[args])\n" // 6th argument | args.arg3
304 "pushl 12(%[args])\n" // 5th argument | args.arg2
305 "pushl 8(%[args])\n" // 4th argument | args.arg1
306 "pushl %[arg3]\n" // 3rd argument | args3
307 "pushl 20(%[args])\n" // 2nd argument | args.port
308 "pushl 28(%[args])\n" // 1st argument | args.pth
309 // Push a fake return address.
310 "pushl $0\n"
311 // Jump to the entry point.
312 "jmp *%[entry_point]" ::
313
314 // Function arguments to push to the stack.
315 [args] "r"(&args), [arg3]"r"(arg3),
316
317 [entry_point] "r"(args.entry_point),
318 [stack_ptr] "r"(stack_ptr)
319 );
320#else
321 #error Not implemented
322 // args.entry_point(args.pth, args.port, args.real_entry_point, args.arg1, args.arg2, args.arg3);
323#endif
324 __builtin_unreachable();
325}
326
327int __darling_thread_terminate(void* stackaddr,
328 unsigned long freesize, unsigned long pthobj_size)
329{
330 int checkout_result = 0;
331
332 if (t_server_socket != -1) {
333 checkout_result = dserver_rpc_explicit_checkout(t_server_socket, -1, false);
334 } else {
335 checkout_result = dserver_rpc_checkout(-1, false);
336 }
337
338 if (checkout_result < 0) {
339 // failing to check-out is not fatal.
340 // it's not ideal, but it's not fatal.
341 #define CHECKOUT_FAILURE_MESSAGE "Failed to checkout"
342 if (t_server_socket != -1) {
343 dserver_rpc_explicit_kprintf(t_server_socket, CHECKOUT_FAILURE_MESSAGE, sizeof(CHECKOUT_FAILURE_MESSAGE) - 1);
344 } else {
345 dserver_rpc_kprintf(CHECKOUT_FAILURE_MESSAGE, sizeof(CHECKOUT_FAILURE_MESSAGE) - 1);
346 }
347 }
348
349 // close the RPC FD (if necessary)
350 // it should already have been unguarded by our caller
351 if (t_server_socket != -1) {
352 __mldr_close_rpc_socket(t_server_socket);
353 }
354
355 if (getpid() == syscall(SYS_gettid))
356 {
357 // dispatch_main() calls pthread_exit(NULL) on the main thread,
358 // which turns our process into a zombie on Linux.
359 // Let's just hang around forever.
360 sigset_t mask;
361 memset(&mask, 0, sizeof(mask));
362
363 while (1)
364 sigsuspend(&mask);
365 }
366
367 t_freeaddr = stackaddr;
368 t_freesize = freesize;
369
370 longjmp(t_jmpbuf, 1);
371
372 __builtin_unreachable();
373}
374
375extern void* __mldr_main_stack_top;
376
377void* __darling_thread_get_stack(void)
378{
379 return __mldr_main_stack_top;
380}
381
382extern int __dserver_main_thread_socket_fd;
383
384int __darling_thread_rpc_socket(void) {
385 if (t_server_socket == -1) {
386 if (getpid() == syscall(SYS_gettid)) {
387 // this is the main thread
388 t_server_socket = __dserver_main_thread_socket_fd;
389 } else {
390 // threads should already have a per-thread socket assigned when they're created
391 abort();
392 }
393 }
394 return t_server_socket;
395};
396
397void __darling_thread_rpc_socket_refresh(void) {
398 int new_rpc_fd = __mldr_create_rpc_socket();
399 if (new_rpc_fd < 0) {
400 abort();
401 }
402
403 t_server_socket = new_rpc_fd;
404
405 // if this is the main thread, also update the socket used by mldr
406 if (getpid() == syscall(SYS_gettid)) {
407 __dserver_main_thread_socket_fd = t_server_socket;
408 }
409};