Serenity Operating System
1/*
2 * Copyright (c) 2021, Gunnar Beutner <gbeutner@serenityos.org>
3 * Copyright (c) 2021, Sergey Bugaev <bugaevc@serenityos.org>
4 * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
5 *
6 * SPDX-License-Identifier: BSD-2-Clause
7 */
8
9#include <AK/Assertions.h>
10#include <AK/Atomic.h>
11#include <AK/DeprecatedString.h>
12#include <AK/HashMap.h>
13#include <AK/ScopeGuard.h>
14#include <AK/Types.h>
15#include <bits/pthread_cancel.h>
16#include <errno.h>
17#include <fcntl.h>
18#include <pthread.h>
19#include <semaphore.h>
20#include <serenity.h>
21#include <string.h>
22#include <sys/file.h>
23#include <sys/mman.h>
24#include <sys/stat.h>
25
26static constexpr u32 SEM_MAGIC = 0x78951230;
27
28// Whether sem_wait() or sem_post() is responsible for waking any sleeping
29// threads.
30static constexpr u32 POST_WAKES = 1 << 31;
31
32static constexpr auto sem_path_prefix = "/tmp/semaphore/"sv;
33static constexpr auto SEM_NAME_MAX = PATH_MAX - sem_path_prefix.length();
34static ErrorOr<DeprecatedString> sem_name_to_path(char const* name)
35{
36 if (name[0] != '/')
37 return EINVAL;
38 ++name;
39
40 auto name_length = strnlen(name, SEM_NAME_MAX);
41 if (name[name_length])
42 return ENAMETOOLONG;
43
44 auto name_view = StringView { name, name_length };
45 if (name_view.contains('/'))
46 return EINVAL;
47
48 StringBuilder builder;
49 TRY(builder.try_append(sem_path_prefix));
50 TRY(builder.try_append(name_view));
51 return builder.to_deprecated_string();
52}
53
54struct NamedSemaphore {
55 size_t times_opened { 0 };
56 dev_t dev { 0 };
57 ino_t ino { 0 };
58 sem_t* sem { nullptr };
59};
60
61static HashMap<DeprecatedString, NamedSemaphore> s_named_semaphores;
62static pthread_mutex_t s_sem_mutex = PTHREAD_MUTEX_INITIALIZER;
63static pthread_once_t s_sem_once = PTHREAD_ONCE_INIT;
64
65// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_open.html
66sem_t* sem_open(char const* name, int flags, ...)
67{
68 auto path_or_error = sem_name_to_path(name);
69 if (path_or_error.is_error()) {
70 errno = path_or_error.error().code();
71 return SEM_FAILED;
72 }
73 auto path = path_or_error.release_value();
74
75 if (flags & ~(O_CREAT | O_EXCL)) {
76 errno = EINVAL;
77 return SEM_FAILED;
78 }
79
80 mode_t mode = 0;
81 unsigned int value = 0;
82 if (flags & O_CREAT) {
83 va_list ap;
84 va_start(ap, flags);
85 mode = va_arg(ap, unsigned int);
86 value = va_arg(ap, unsigned int);
87 va_end(ap);
88 }
89
90 // Ensure we are not in the middle of modifying this structure while a child is being forked, which will cause the child to end up with a partially-modified entry
91 pthread_once(&s_sem_once, []() {
92 pthread_atfork([]() { pthread_mutex_lock(&s_sem_mutex); }, []() { pthread_mutex_unlock(&s_sem_mutex); }, []() { pthread_mutex_unlock(&s_sem_mutex); });
93 });
94
95 pthread_mutex_lock(&s_sem_mutex);
96 ScopeGuard unlock_guard = [] { pthread_mutex_unlock(&s_sem_mutex); };
97
98 int fd = open(path.characters(), O_RDWR | O_CLOEXEC | flags, mode);
99 if (fd == -1)
100 return SEM_FAILED;
101
102 ScopeGuard close_guard = [&fd] {
103 if (fd != -1)
104 close(fd);
105 };
106
107 if (flock(fd, LOCK_EX) == -1)
108 return SEM_FAILED;
109
110 struct stat statbuf;
111 if (fstat(fd, &statbuf) == -1)
112 return SEM_FAILED;
113
114 auto existing_semaphore = s_named_semaphores.get(path);
115 if (existing_semaphore.has_value()) {
116 // If the file did not exist (aka if O_CREAT && O_EXCL but no EEXIST), or if the inode was replaced, remove the entry and start from scratch
117 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL) || existing_semaphore->dev != statbuf.st_dev || existing_semaphore->ino != statbuf.st_ino) {
118 s_named_semaphores.remove(path);
119 } else { // otherwise, this is valid pre-existing named semaphore, so just increase the count and return it
120 existing_semaphore->times_opened++;
121 return existing_semaphore->sem;
122 }
123 }
124
125 // If the file is smaller than the size, it's an uninitialized semaphore, so let's write an initial value
126 if (statbuf.st_size < (off_t)sizeof(sem_t)) {
127 sem_t init_sem;
128 init_sem.magic = SEM_MAGIC;
129 init_sem.value = value;
130 init_sem.flags = SEM_FLAG_PROCESS_SHARED | SEM_FLAG_NAMED;
131 if (write(fd, &init_sem, sizeof(sem_t)) != sizeof(sem_t))
132 return SEM_FAILED;
133 }
134
135 if (flock(fd, LOCK_UN) == -1)
136 return SEM_FAILED;
137
138 auto* sem = (sem_t*)mmap(nullptr, sizeof(sem_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
139 if (sem == MAP_FAILED)
140 return SEM_FAILED;
141
142 ArmedScopeGuard munmap_guard = [&sem] {
143 munmap(sem, sizeof(sem_t));
144 };
145
146 if (sem->magic != SEM_MAGIC) {
147 errno = EINVAL;
148 return SEM_FAILED;
149 }
150
151 auto result = s_named_semaphores.try_set(move(path), { .times_opened = 1, .dev = statbuf.st_dev, .ino = statbuf.st_ino, .sem = sem });
152 if (result.is_error()) {
153 errno = result.error().code();
154 return SEM_FAILED;
155 }
156
157 munmap_guard.disarm();
158 return sem;
159}
160
161// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_close.html
162int sem_close(sem_t* sem)
163{
164 if (sem->magic != SEM_MAGIC) {
165 errno = EINVAL;
166 return -1;
167 }
168
169 if ((sem->flags & SEM_FLAG_NAMED) == 0) {
170 errno = EINVAL;
171 return -1;
172 }
173
174 pthread_mutex_lock(&s_sem_mutex);
175 ScopeGuard unlock_guard = [] { pthread_mutex_unlock(&s_sem_mutex); };
176
177 auto it = s_named_semaphores.begin();
178 for (; it != s_named_semaphores.end(); ++it) {
179 if (it->value.sem != sem)
180 continue;
181 auto is_last = --it->value.times_opened == 0;
182 if (is_last) {
183 munmap(it->value.sem, sizeof(sem_t));
184 s_named_semaphores.remove(it);
185 }
186 return 0;
187 }
188
189 errno = EINVAL;
190 return -1;
191}
192
193// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_unlink.html
194int sem_unlink(char const* name)
195{
196 auto path_or_error = sem_name_to_path(name);
197 if (path_or_error.is_error()) {
198 errno = path_or_error.error().code();
199 return -1;
200 }
201 auto path = path_or_error.release_value();
202
203 return unlink(path.characters());
204}
205
206// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_init.html
207int sem_init(sem_t* sem, int process_shared, unsigned int value)
208{
209 if (value > SEM_VALUE_MAX) {
210 errno = EINVAL;
211 return -1;
212 }
213
214 sem->magic = SEM_MAGIC;
215 sem->value = value;
216 sem->flags = process_shared ? SEM_FLAG_PROCESS_SHARED : 0;
217 return 0;
218}
219
220// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_destroy.html
221int sem_destroy(sem_t* sem)
222{
223 if (sem->magic != SEM_MAGIC) {
224 errno = EINVAL;
225 return -1;
226 }
227
228 if (sem->flags & SEM_FLAG_NAMED) {
229 errno = EINVAL;
230 return -1;
231 }
232
233 sem->magic = 0;
234 return 0;
235}
236
237// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_getvalue.html
238int sem_getvalue(sem_t* sem, int* sval)
239{
240 if (sem->magic != SEM_MAGIC) {
241 errno = EINVAL;
242 return -1;
243 }
244
245 u32 value = AK::atomic_load(&sem->value, AK::memory_order_relaxed);
246 *sval = value & ~POST_WAKES;
247 return 0;
248}
249
250// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_post.html
251int sem_post(sem_t* sem)
252{
253 if (sem->magic != SEM_MAGIC) {
254 errno = EINVAL;
255 return -1;
256 }
257
258 u32 value = AK::atomic_fetch_add(&sem->value, 1u, AK::memory_order_release);
259 // Fast path: no need to wake.
260 if (!(value & POST_WAKES)) [[likely]]
261 return 0;
262
263 // Pass the responsibility for waking more threads if more slots become
264 // available later to sem_wait() in the thread we're about to wake, as
265 // opposed to further sem_post() calls that free up those slots.
266 value = AK::atomic_fetch_and(&sem->value, ~POST_WAKES, AK::memory_order_relaxed);
267 // Check if another sem_post() call has handled it already.
268 if (!(value & POST_WAKES)) [[likely]]
269 return 0;
270 int rc = futex_wake(&sem->value, 1, sem->flags & SEM_FLAG_PROCESS_SHARED);
271 VERIFY(rc >= 0);
272 return 0;
273}
274
275// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_trywait.html
276int sem_trywait(sem_t* sem)
277{
278 if (sem->magic != SEM_MAGIC) {
279 errno = EINVAL;
280 return -1;
281 }
282
283 u32 value = AK::atomic_load(&sem->value, AK::memory_order_relaxed);
284 u32 count = value & ~POST_WAKES;
285 if (count == 0) {
286 errno = EAGAIN;
287 return -1;
288 }
289 // Decrement the count without touching the flag.
290 u32 desired = (count - 1) | (value & POST_WAKES);
291 bool exchanged = AK::atomic_compare_exchange_strong(&sem->value, value, desired, AK::memory_order_acquire);
292 if (exchanged) [[likely]] {
293 return 0;
294 } else {
295 errno = EAGAIN;
296 return -1;
297 }
298}
299
300// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_wait.html
301int sem_wait(sem_t* sem)
302{
303 if (sem->magic != SEM_MAGIC) {
304 errno = EINVAL;
305 return -1;
306 }
307
308 return sem_timedwait(sem, nullptr);
309}
310
311// https://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_timedwait.html
312int sem_timedwait(sem_t* sem, const struct timespec* abstime)
313{
314 __pthread_maybe_cancel();
315
316 if (sem->magic != SEM_MAGIC) {
317 errno = EINVAL;
318 return -1;
319 }
320
321 u32 value = AK::atomic_load(&sem->value, AK::memory_order_relaxed);
322 bool responsible_for_waking = false;
323 bool process_shared = sem->flags & SEM_FLAG_PROCESS_SHARED;
324
325 while (true) {
326 u32 count = value & ~POST_WAKES;
327 if (count > 0) [[likely]] {
328 // It looks like there are some free slots.
329 u32 whether_post_wakes = value & POST_WAKES;
330 bool going_to_wake = false;
331 if (responsible_for_waking && !whether_post_wakes) {
332 // If we have ourselves been woken up previously, and the
333 // POST_WAKES flag is not set, that means some more slots might
334 // be available now, and it's us who has to wake up additional
335 // threads.
336 if (count > 1) [[unlikely]]
337 going_to_wake = true;
338 // Pass the responsibility for waking up further threads back to
339 // sem_post() calls. In particular, we don't want the threads
340 // we're about to wake to try to wake anyone else.
341 whether_post_wakes = POST_WAKES;
342 }
343 // Now, try to commit this.
344 u32 desired = (count - 1) | whether_post_wakes;
345 bool exchanged = AK::atomic_compare_exchange_strong(&sem->value, value, desired, AK::memory_order_acquire);
346 if (!exchanged) [[unlikely]]
347 // Re-evaluate.
348 continue;
349 if (going_to_wake) [[unlikely]] {
350 int rc = futex_wake(&sem->value, count - 1, process_shared);
351 VERIFY(rc >= 0);
352 }
353 return 0;
354 }
355 // We're probably going to sleep, so attempt to set the flag. We do not
356 // commit to sleeping yet, though, as setting the flag may fail and
357 // cause us to reevaluate what we're doing.
358 if (value == 0) {
359 bool exchanged = AK::atomic_compare_exchange_strong(&sem->value, value, POST_WAKES, AK::memory_order_relaxed);
360 if (!exchanged) [[unlikely]]
361 // Re-evaluate.
362 continue;
363 value = POST_WAKES;
364 }
365 // At this point, we're committed to sleeping.
366 responsible_for_waking = true;
367 futex_wait(&sem->value, value, abstime, CLOCK_REALTIME, process_shared);
368 // This is the state we will probably see upon being waked:
369 value = 1;
370 }
371}