Serenity Operating System
1/*
2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/Singleton.h>
9#include <Kernel/Debug.h>
10#include <Kernel/Memory/InodeVMObject.h>
11#include <Kernel/Memory/MemoryManager.h>
12#include <Kernel/Process.h>
13
14namespace Kernel {
15
16static Singleton<SpinlockProtected<HashMap<GlobalFutexKey, NonnullLockRefPtr<FutexQueue>>, LockRank::None>> s_global_futex_queues;
17
18void Process::clear_futex_queues_on_exec()
19{
20 s_global_futex_queues->with([this](auto& queues) {
21 auto const* address_space = this->address_space().with([](auto& space) { return space.ptr(); });
22 queues.remove_all_matching([address_space](auto& futex_key, auto& futex_queue) {
23 if ((futex_key.raw.offset & futex_key_private_flag) == 0)
24 return false;
25 if (futex_key.private_.address_space != address_space)
26 return false;
27 bool did_wake_all;
28 futex_queue->wake_all(did_wake_all);
29 VERIFY(did_wake_all); // No one should be left behind...
30 return true;
31 });
32 });
33}
34
35ErrorOr<GlobalFutexKey> Process::get_futex_key(FlatPtr user_address, bool shared)
36{
37 if (user_address & 0b11) // user_address points to a u32, so must be 4byte aligned
38 return EINVAL;
39
40 auto range = Memory::VirtualRange { VirtualAddress(user_address), sizeof(u32) };
41
42 if (!Kernel::Memory::is_user_range(range))
43 return EFAULT;
44
45 if (!shared) { // If this is thread-shared, we can skip searching the matching region
46 return GlobalFutexKey {
47 .private_ = {
48 .address_space = this->address_space().with([](auto& space) { return space.ptr(); }),
49 .user_address = user_address | futex_key_private_flag,
50 }
51 };
52 }
53
54 return address_space().with([&](auto& space) -> ErrorOr<GlobalFutexKey> {
55 auto* matching_region = space->find_region_containing(range);
56 if (!matching_region)
57 return EFAULT;
58
59 // The user wants to share this futex, but if the address doesn't point to a shared resource, there's not
60 // much sharing to be done, so let's mark this as private
61 if (!matching_region->is_shared()) {
62 return GlobalFutexKey {
63 .private_ = {
64 .address_space = space.ptr(),
65 .user_address = user_address | futex_key_private_flag,
66 }
67 };
68 }
69
70 // This address is backed by a shared VMObject, if it's an AnonymousVMObject, it can be shared between processes
71 // via forking, and shared regions that are cloned during a fork retain their original AnonymousVMObject.
72 // On the other hand, if it's a SharedInodeVMObject, it can be shared by two processes mapping the same file as
73 // MAP_SHARED, but since they are deduplicated based on the inode, in all cases the VMObject pointer should be
74 // a unique global identifier.
75 // NOTE: This assumes that a program will not unmap the only region keeping the vmobject alive while waiting on it,
76 // if it does, it will get stuck waiting forever until interrupted by a signal, but since that use case is defined as
77 // a programmer error, we are fine with it.
78
79 auto const& vmobject = matching_region->vmobject();
80 if (vmobject.is_inode())
81 VERIFY(vmobject.is_shared_inode());
82
83 return GlobalFutexKey {
84 .shared = {
85 .vmobject = &vmobject,
86 .offset = matching_region->offset_in_vmobject_from_vaddr(range.base()) }
87 };
88 });
89}
90
91ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> user_params)
92{
93 VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this);
94 auto params = TRY(copy_typed_from_user(user_params));
95
96 Thread::BlockTimeout timeout;
97 u32 cmd = params.futex_op & FUTEX_CMD_MASK;
98
99 bool use_realtime_clock = (params.futex_op & FUTEX_CLOCK_REALTIME) != 0;
100 if (use_realtime_clock && cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET) {
101 return ENOSYS;
102 }
103
104 bool shared = (params.futex_op & FUTEX_PRIVATE_FLAG) == 0;
105
106 switch (cmd) {
107 case FUTEX_WAIT:
108 case FUTEX_WAIT_BITSET:
109 case FUTEX_REQUEUE:
110 case FUTEX_CMP_REQUEUE: {
111 if (params.timeout) {
112 auto timeout_time = TRY(copy_time_from_user(params.timeout));
113 bool is_absolute = cmd != FUTEX_WAIT;
114 clockid_t clock_id = use_realtime_clock ? CLOCK_REALTIME_COARSE : CLOCK_MONOTONIC_COARSE;
115 timeout = Thread::BlockTimeout(is_absolute, &timeout_time, nullptr, clock_id);
116 }
117 if (cmd == FUTEX_WAIT_BITSET && params.val3 == FUTEX_BITSET_MATCH_ANY)
118 cmd = FUTEX_WAIT;
119 break;
120 case FUTEX_WAKE_BITSET:
121 if (params.val3 == FUTEX_BITSET_MATCH_ANY)
122 cmd = FUTEX_WAKE;
123 break;
124 }
125 }
126
127 auto find_futex_queue = [&](GlobalFutexKey futex_key, bool create_if_not_found, bool* did_create = nullptr) -> ErrorOr<LockRefPtr<FutexQueue>> {
128 VERIFY(!create_if_not_found || did_create != nullptr);
129 return s_global_futex_queues->with([&](auto& queues) -> ErrorOr<LockRefPtr<FutexQueue>> {
130 auto it = queues.find(futex_key);
131 if (it != queues.end())
132 return it->value;
133 if (!create_if_not_found)
134 return nullptr;
135 *did_create = true;
136 auto futex_queue = TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) FutexQueue));
137 auto result = TRY(queues.try_set(futex_key, futex_queue));
138 VERIFY(result == AK::HashSetResult::InsertedNewEntry);
139 return futex_queue;
140 });
141 };
142
143 auto remove_futex_queue = [&](GlobalFutexKey futex_key) {
144 return s_global_futex_queues->with([&](auto& queues) {
145 auto it = queues.find(futex_key);
146 if (it == queues.end())
147 return;
148 if (it->value->try_remove())
149 queues.remove(it);
150 });
151 };
152
153 auto do_wake = [&](FlatPtr user_address, u32 count, Optional<u32> const& bitmask) -> ErrorOr<int> {
154 if (count == 0)
155 return 0;
156 auto futex_key = TRY(get_futex_key(user_address, shared));
157 auto futex_queue = TRY(find_futex_queue(futex_key, false));
158 if (!futex_queue)
159 return 0;
160 bool is_empty;
161 u32 woke_count = futex_queue->wake_n(count, bitmask, is_empty);
162 if (is_empty) {
163 // If there are no more waiters, we want to get rid of the futex!
164 remove_futex_queue(futex_key);
165 }
166 return (int)woke_count;
167 };
168
169 auto user_address = FlatPtr(params.userspace_address);
170 auto user_address2 = FlatPtr(params.userspace_address2);
171
172 auto do_wait = [&](u32 bitset) -> ErrorOr<FlatPtr> {
173 bool did_create;
174 LockRefPtr<FutexQueue> futex_queue;
175 auto futex_key = TRY(get_futex_key(user_address, shared));
176 do {
177 auto user_value = user_atomic_load_relaxed(params.userspace_address);
178 if (!user_value.has_value())
179 return EFAULT;
180 if (user_value.value() != params.val) {
181 dbgln_if(FUTEX_DEBUG, "futex wait: EAGAIN. user value: {:p} @ {:p} != val: {}", user_value.value(), params.userspace_address, params.val);
182 return EAGAIN;
183 }
184 atomic_thread_fence(AK::MemoryOrder::memory_order_acquire);
185
186 did_create = false;
187 futex_queue = TRY(find_futex_queue(futex_key, true, &did_create));
188 VERIFY(futex_queue);
189 // We need to try again if we didn't create this queue and the existing queue
190 // was removed before we were able to queue an imminent wait.
191 } while (!did_create && !futex_queue->queue_imminent_wait());
192
193 // We must not hold the lock before blocking. But we have a reference
194 // to the FutexQueue so that we can keep it alive.
195
196 Thread::BlockResult block_result = futex_queue->wait_on(timeout, bitset);
197
198 if (futex_queue->is_empty_and_no_imminent_waits()) {
199 // If there are no more waiters, we want to get rid of the futex!
200 remove_futex_queue(futex_key);
201 }
202 if (block_result == Thread::BlockResult::InterruptedByTimeout) {
203 return ETIMEDOUT;
204 }
205 return 0;
206 };
207
208 auto do_requeue = [&](Optional<u32> val3) -> ErrorOr<FlatPtr> {
209 auto user_value = user_atomic_load_relaxed(params.userspace_address);
210 if (!user_value.has_value())
211 return EFAULT;
212 if (val3.has_value() && val3.value() != user_value.value())
213 return EAGAIN;
214 atomic_thread_fence(AK::MemoryOrder::memory_order_acquire);
215
216 auto futex_key = TRY(get_futex_key(user_address, shared));
217 auto futex_queue = TRY(find_futex_queue(futex_key, false));
218 if (!futex_queue)
219 return 0;
220
221 LockRefPtr<FutexQueue> target_futex_queue;
222 bool is_empty = false;
223 bool is_target_empty = false;
224 auto futex_key2 = TRY(get_futex_key(user_address2, shared));
225 auto woken_or_requeued = TRY(futex_queue->wake_n_requeue(
226 params.val, [&]() -> ErrorOr<FutexQueue*> {
227 // NOTE: futex_queue's lock is being held while this callback is called
228 // The reason we're doing this in a callback is that we don't want to always
229 // create a target queue, only if we actually have anything to move to it!
230 target_futex_queue = TRY(find_futex_queue(futex_key2, true));
231 return target_futex_queue.ptr();
232 },
233 params.val2, is_empty, is_target_empty));
234 if (is_empty)
235 remove_futex_queue(futex_key);
236 if (is_target_empty && target_futex_queue)
237 remove_futex_queue(futex_key2);
238 return woken_or_requeued;
239 };
240
241 switch (cmd) {
242 case FUTEX_WAIT:
243 return do_wait(0);
244
245 case FUTEX_WAKE:
246 return TRY(do_wake(user_address, params.val, {}));
247
248 case FUTEX_WAKE_OP: {
249 Optional<u32> oldval;
250 u32 op_arg = _FUTEX_OP_ARG(params.val3);
251 auto op = _FUTEX_OP(params.val3);
252 if (op & FUTEX_OP_ARG_SHIFT) {
253 op_arg = 1 << op_arg;
254 op &= FUTEX_OP_ARG_SHIFT;
255 }
256 atomic_thread_fence(AK::MemoryOrder::memory_order_release);
257 switch (op) {
258 case FUTEX_OP_SET:
259 oldval = user_atomic_exchange_relaxed(params.userspace_address2, op_arg);
260 break;
261 case FUTEX_OP_ADD:
262 oldval = user_atomic_fetch_add_relaxed(params.userspace_address2, op_arg);
263 break;
264 case FUTEX_OP_OR:
265 oldval = user_atomic_fetch_or_relaxed(params.userspace_address2, op_arg);
266 break;
267 case FUTEX_OP_ANDN:
268 oldval = user_atomic_fetch_and_not_relaxed(params.userspace_address2, op_arg);
269 break;
270 case FUTEX_OP_XOR:
271 oldval = user_atomic_fetch_xor_relaxed(params.userspace_address2, op_arg);
272 break;
273 default:
274 return EINVAL;
275 }
276 if (!oldval.has_value())
277 return EFAULT;
278 atomic_thread_fence(AK::MemoryOrder::memory_order_acquire);
279 auto result = TRY(do_wake(user_address, params.val, {}));
280 if (params.val2 > 0) {
281 bool compare_result;
282 switch (_FUTEX_CMP(params.val3)) {
283 case FUTEX_OP_CMP_EQ:
284 compare_result = (oldval.value() == _FUTEX_CMP_ARG(params.val3));
285 break;
286 case FUTEX_OP_CMP_NE:
287 compare_result = (oldval.value() != _FUTEX_CMP_ARG(params.val3));
288 break;
289 case FUTEX_OP_CMP_LT:
290 compare_result = (oldval.value() < _FUTEX_CMP_ARG(params.val3));
291 break;
292 case FUTEX_OP_CMP_LE:
293 compare_result = (oldval.value() <= _FUTEX_CMP_ARG(params.val3));
294 break;
295 case FUTEX_OP_CMP_GT:
296 compare_result = (oldval.value() > _FUTEX_CMP_ARG(params.val3));
297 break;
298 case FUTEX_OP_CMP_GE:
299 compare_result = (oldval.value() >= _FUTEX_CMP_ARG(params.val3));
300 break;
301 default:
302 return EINVAL;
303 }
304 if (compare_result)
305 result += TRY(do_wake(user_address2, params.val2, {}));
306 }
307 return result;
308 }
309
310 case FUTEX_REQUEUE:
311 return do_requeue({});
312
313 case FUTEX_CMP_REQUEUE:
314 return do_requeue(params.val3);
315
316 case FUTEX_WAIT_BITSET:
317 VERIFY(params.val3 != FUTEX_BITSET_MATCH_ANY); // we should have turned it into FUTEX_WAIT
318 if (params.val3 == 0)
319 return EINVAL;
320 return do_wait(params.val3);
321
322 case FUTEX_WAKE_BITSET:
323 VERIFY(params.val3 != FUTEX_BITSET_MATCH_ANY); // we should have turned it into FUTEX_WAKE
324 if (params.val3 == 0)
325 return EINVAL;
326 return TRY(do_wake(user_address, params.val, params.val3));
327 }
328 return ENOSYS;
329}
330
331}