Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/Demangle.h>
28#include <AK/FileSystemPath.h>
29#include <AK/ScopeGuard.h>
30#include <AK/StdLibExtras.h>
31#include <AK/StringBuilder.h>
32#include <AK/Time.h>
33#include <AK/Types.h>
34#include <Kernel/Arch/i386/CPU.h>
35#include <Kernel/Devices/BlockDevice.h>
36#include <Kernel/Devices/KeyboardDevice.h>
37#include <Kernel/Devices/NullDevice.h>
38#include <Kernel/Devices/PCSpeaker.h>
39#include <Kernel/Devices/PIT.h>
40#include <Kernel/Devices/RandomDevice.h>
41#include <Kernel/FileSystem/Custody.h>
42#include <Kernel/FileSystem/DevPtsFS.h>
43#include <Kernel/FileSystem/Ext2FileSystem.h>
44#include <Kernel/FileSystem/FIFO.h>
45#include <Kernel/FileSystem/FileDescription.h>
46#include <Kernel/FileSystem/InodeWatcher.h>
47#include <Kernel/FileSystem/ProcFS.h>
48#include <Kernel/FileSystem/TmpFS.h>
49#include <Kernel/FileSystem/VirtualFileSystem.h>
50#include <Kernel/Heap/kmalloc.h>
51#include <Kernel/KBufferBuilder.h>
52#include <Kernel/KSyms.h>
53#include <Kernel/KernelInfoPage.h>
54#include <Kernel/Module.h>
55#include <Kernel/Multiboot.h>
56#include <Kernel/Net/Socket.h>
57#include <Kernel/PerformanceEventBuffer.h>
58#include <Kernel/Process.h>
59#include <Kernel/ProcessTracer.h>
60#include <Kernel/Profiling.h>
61#include <Kernel/RTC.h>
62#include <Kernel/Random.h>
63#include <Kernel/Scheduler.h>
64#include <Kernel/SharedBuffer.h>
65#include <Kernel/Syscall.h>
66#include <Kernel/TTY/MasterPTY.h>
67#include <Kernel/TTY/TTY.h>
68#include <Kernel/Thread.h>
69#include <Kernel/VM/InodeVMObject.h>
70#include <Kernel/VM/PageDirectory.h>
71#include <Kernel/VM/PurgeableVMObject.h>
72#include <LibBareMetal/IO.h>
73#include <LibBareMetal/Output/Console.h>
74#include <LibBareMetal/StdLib.h>
75#include <LibC/errno_numbers.h>
76#include <LibC/limits.h>
77#include <LibC/signal_numbers.h>
78#include <LibELF/ELFLoader.h>
79
80//#define PROCESS_DEBUG
81//#define DEBUG_POLL_SELECT
82//#define DEBUG_IO
83//#define TASK_DEBUG
84//#define FORK_DEBUG
85//#define EXEC_DEBUG
86//#define SIGNAL_DEBUG
87//#define SHARED_BUFFER_DEBUG
88
89namespace Kernel {
90
91static void create_signal_trampolines();
92static void create_kernel_info_page();
93
94Process* Process::current;
95
96static pid_t next_pid;
97InlineLinkedList<Process>* g_processes;
98static String* s_hostname;
99static Lock* s_hostname_lock;
100static VirtualAddress s_info_page_address_for_userspace;
101static VirtualAddress s_info_page_address_for_kernel;
102VirtualAddress g_return_to_ring3_from_signal_trampoline;
103HashMap<String, OwnPtr<Module>>* g_modules;
104
105pid_t Process::allocate_pid()
106{
107 InterruptDisabler disabler;
108 return next_pid++;
109}
110
111void Process::initialize()
112{
113 g_modules = new HashMap<String, OwnPtr<Module>>;
114
115 next_pid = 0;
116 g_processes = new InlineLinkedList<Process>;
117 s_hostname = new String("courage");
118 s_hostname_lock = new Lock;
119
120 create_signal_trampolines();
121 create_kernel_info_page();
122}
123
124void Process::update_info_page_timestamp(const timeval& tv)
125{
126 auto* info_page = (KernelInfoPage*)s_info_page_address_for_kernel.as_ptr();
127 info_page->serial++;
128 const_cast<timeval&>(info_page->now) = tv;
129}
130
131Vector<pid_t> Process::all_pids()
132{
133 Vector<pid_t> pids;
134 InterruptDisabler disabler;
135 pids.ensure_capacity((int)g_processes->size_slow());
136 for (auto& process : *g_processes)
137 pids.append(process.pid());
138 return pids;
139}
140
141Vector<Process*> Process::all_processes()
142{
143 Vector<Process*> processes;
144 InterruptDisabler disabler;
145 processes.ensure_capacity((int)g_processes->size_slow());
146 for (auto& process : *g_processes)
147 processes.append(&process);
148 return processes;
149}
150
151bool Process::in_group(gid_t gid) const
152{
153 return m_gid == gid || m_extra_gids.contains(gid);
154}
155
156Range Process::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment)
157{
158 vaddr.mask(PAGE_MASK);
159 size = PAGE_ROUND_UP(size);
160 if (vaddr.is_null())
161 return page_directory().range_allocator().allocate_anywhere(size, alignment);
162 return page_directory().range_allocator().allocate_specific(vaddr, size);
163}
164
165static unsigned prot_to_region_access_flags(int prot)
166{
167 unsigned access = 0;
168 if (prot & PROT_READ)
169 access |= Region::Access::Read;
170 if (prot & PROT_WRITE)
171 access |= Region::Access::Write;
172 if (prot & PROT_EXEC)
173 access |= Region::Access::Execute;
174 return access;
175}
176
177Region& Process::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject)
178{
179 auto& region = add_region(Region::create_user_accessible(range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access()));
180 region.set_mmap(source_region.is_mmap());
181 region.set_stack(source_region.is_stack());
182 size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
183 for (size_t i = 0; i < region.page_count(); ++i) {
184 if (source_region.should_cow(page_offset_in_source_region + i))
185 region.set_should_cow(i, true);
186 }
187 return region;
188}
189
190Region* Process::allocate_region(const Range& range, const String& name, int prot, bool commit)
191{
192 ASSERT(range.is_valid());
193 auto& region = add_region(Region::create_user_accessible(range, name, prot_to_region_access_flags(prot)));
194 region.map(page_directory());
195 if (commit)
196 region.commit();
197 return ®ion;
198}
199
200Region* Process::allocate_region(VirtualAddress vaddr, size_t size, const String& name, int prot, bool commit)
201{
202 auto range = allocate_range(vaddr, size);
203 if (!range.is_valid())
204 return nullptr;
205 return allocate_region(range, name, prot, commit);
206}
207
208Region* Process::allocate_file_backed_region(VirtualAddress vaddr, size_t size, NonnullRefPtr<Inode> inode, const String& name, int prot)
209{
210 auto range = allocate_range(vaddr, size);
211 if (!range.is_valid())
212 return nullptr;
213 auto& region = add_region(Region::create_user_accessible(range, inode, name, prot_to_region_access_flags(prot)));
214 region.map(page_directory());
215 return ®ion;
216}
217
218Region* Process::allocate_region_with_vmobject(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool user_accessible)
219{
220 ASSERT(range.is_valid());
221 size_t end_in_vmobject = offset_in_vmobject + range.size();
222 if (end_in_vmobject <= offset_in_vmobject) {
223 dbgprintf("allocate_region_with_vmobject: Overflow (offset + size)\n");
224 return nullptr;
225 }
226 if (offset_in_vmobject >= vmobject->size()) {
227 dbgprintf("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.\n");
228 return nullptr;
229 }
230 if (end_in_vmobject > vmobject->size()) {
231 dbgprintf("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.\n");
232 return nullptr;
233 }
234 offset_in_vmobject &= PAGE_MASK;
235 Region* region;
236 if (user_accessible)
237 region = &add_region(Region::create_user_accessible(range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot)));
238 else
239 region = &add_region(Region::create_kernel_only(range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot)));
240 region->map(page_directory());
241 return region;
242}
243
244
245Region* Process::allocate_region_with_vmobject(VirtualAddress vaddr, size_t size, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool user_accessible)
246{
247 auto range = allocate_range(vaddr, size);
248 if (!range.is_valid())
249 return nullptr;
250 return allocate_region_with_vmobject(range, move(vmobject), offset_in_vmobject, name, prot, user_accessible);
251}
252
253bool Process::deallocate_region(Region& region)
254{
255 InterruptDisabler disabler;
256 if (m_region_lookup_cache.region == ®ion)
257 m_region_lookup_cache.region = nullptr;
258 for (size_t i = 0; i < m_regions.size(); ++i) {
259 if (&m_regions[i] == ®ion) {
260 m_regions.unstable_remove(i);
261 return true;
262 }
263 }
264 return false;
265}
266
267Region* Process::region_from_range(const Range& range)
268{
269 if (m_region_lookup_cache.range == range && m_region_lookup_cache.region)
270 return m_region_lookup_cache.region;
271
272 size_t size = PAGE_ROUND_UP(range.size());
273 for (auto& region : m_regions) {
274 if (region.vaddr() == range.base() && region.size() == size) {
275 m_region_lookup_cache.range = range;
276 m_region_lookup_cache.region = region.make_weak_ptr();
277 return ®ion;
278 }
279 }
280 return nullptr;
281}
282
283Region* Process::region_containing(const Range& range)
284{
285 for (auto& region : m_regions) {
286 if (region.contains(range))
287 return ®ion;
288 }
289 return nullptr;
290}
291
292int Process::sys$set_mmap_name(const Syscall::SC_set_mmap_name_params* user_params)
293{
294 REQUIRE_PROMISE(stdio);
295
296 Syscall::SC_set_mmap_name_params params;
297 if (!validate_read_and_copy_typed(¶ms, user_params))
298 return -EFAULT;
299
300 if (params.name.length > PATH_MAX)
301 return -ENAMETOOLONG;
302
303 auto name = validate_and_copy_string_from_user(params.name);
304 if (name.is_null())
305 return -EFAULT;
306
307 auto* region = region_from_range({ VirtualAddress(params.addr), params.size });
308 if (!region)
309 return -EINVAL;
310 if (!region->is_mmap())
311 return -EPERM;
312 region->set_name(name);
313 return 0;
314}
315
316static bool validate_mmap_prot(int prot, bool map_stack)
317{
318 bool readable = prot & PROT_READ;
319 bool writable = prot & PROT_WRITE;
320 bool executable = prot & PROT_EXEC;
321
322 if (writable && executable)
323 return false;
324
325 if (map_stack) {
326 if (executable)
327 return false;
328 if (!readable || !writable)
329 return false;
330 }
331
332 return true;
333}
334
335static bool validate_inode_mmap_prot(const Process& process, int prot, const Inode& inode)
336{
337 auto metadata = inode.metadata();
338 if ((prot & PROT_WRITE) && !metadata.may_write(process))
339 return false;
340 if ((prot & PROT_READ) && !metadata.may_read(process))
341 return false;
342 InterruptDisabler disabler;
343 if (inode.vmobject()) {
344 if ((prot & PROT_EXEC) && inode.vmobject()->writable_mappings())
345 return false;
346 if ((prot & PROT_WRITE) && inode.vmobject()->executable_mappings())
347 return false;
348 }
349 return true;
350}
351
352// Carve out a virtual address range from a region and return the two regions on either side
353Vector<Region*, 2> Process::split_region_around_range(const Region& source_region, const Range& desired_range)
354{
355 Range old_region_range = source_region.range();
356 auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
357
358 ASSERT(!remaining_ranges_after_unmap.is_empty());
359 auto make_replacement_region = [&](const Range& new_range) -> Region& {
360 ASSERT(new_range.base() >= old_region_range.base());
361 ASSERT(new_range.end() <= old_region_range.end());
362 size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
363 return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
364 };
365 Vector<Region*, 2> new_regions;
366 for (auto& new_range : remaining_ranges_after_unmap) {
367 new_regions.unchecked_append(&make_replacement_region(new_range));
368 }
369 return new_regions;
370}
371
372void* Process::sys$mmap(const Syscall::SC_mmap_params* user_params)
373{
374 REQUIRE_PROMISE(stdio);
375
376 Syscall::SC_mmap_params params;
377 if (!validate_read_and_copy_typed(¶ms, user_params))
378 return (void*)-EFAULT;
379
380 void* addr = (void*)params.addr;
381 size_t size = params.size;
382 size_t alignment = params.alignment;
383 int prot = params.prot;
384 int flags = params.flags;
385 int fd = params.fd;
386 int offset = params.offset;
387
388 if (alignment & ~PAGE_MASK)
389 return (void*)-EINVAL;
390
391 if (!is_user_range(VirtualAddress(addr), size))
392 return (void*)-EFAULT;
393
394 String name;
395 if (params.name.characters) {
396 if (params.name.length > PATH_MAX)
397 return (void*)-ENAMETOOLONG;
398 name = validate_and_copy_string_from_user(params.name);
399 if (name.is_null())
400 return (void*)-EFAULT;
401 }
402
403 if (size == 0)
404 return (void*)-EINVAL;
405 if ((uintptr_t)addr & ~PAGE_MASK)
406 return (void*)-EINVAL;
407
408 bool map_shared = flags & MAP_SHARED;
409 bool map_anonymous = flags & MAP_ANONYMOUS;
410 bool map_purgeable = flags & MAP_PURGEABLE;
411 bool map_private = flags & MAP_PRIVATE;
412 bool map_stack = flags & MAP_STACK;
413 bool map_fixed = flags & MAP_FIXED;
414
415 if (map_shared && map_private)
416 return (void*)-EINVAL;
417
418 if (!map_shared && !map_private)
419 return (void*)-EINVAL;
420
421 if (!validate_mmap_prot(prot, map_stack))
422 return (void*)-EINVAL;
423
424 if (map_stack && (!map_private || !map_anonymous))
425 return (void*)-EINVAL;
426
427 Region* region = nullptr;
428
429 auto range = allocate_range(VirtualAddress(addr), size, alignment);
430 if (!range.is_valid())
431 return (void*)-ENOMEM;
432
433 if (map_purgeable) {
434 auto vmobject = PurgeableVMObject::create_with_size(size);
435 region = allocate_region_with_vmobject(range, vmobject, 0, !name.is_null() ? name : "mmap (purgeable)", prot);
436 if (!region && (!map_fixed && addr != 0))
437 region = allocate_region_with_vmobject({}, size, vmobject, 0, !name.is_null() ? name : "mmap (purgeable)", prot);
438 } else if (map_anonymous) {
439 region = allocate_region(range, !name.is_null() ? name : "mmap", prot, false);
440 if (!region && (!map_fixed && addr != 0))
441 region = allocate_region(allocate_range({}, size), !name.is_null() ? name : "mmap", prot, false);
442 } else {
443 if (offset < 0)
444 return (void*)-EINVAL;
445 if (static_cast<size_t>(offset) & ~PAGE_MASK)
446 return (void*)-EINVAL;
447 // FIXME: Implement MAP_PRIVATE for FileDescription-backed mmap
448 if (map_private)
449 return (void*)-ENOTSUP;
450 auto description = file_description(fd);
451 if (!description)
452 return (void*)-EBADF;
453 if (description->is_directory())
454 return (void*)-ENODEV;
455 if ((prot & PROT_READ) && !description->is_readable())
456 return (void*)-EACCES;
457 if ((prot & PROT_WRITE) && !description->is_writable())
458 return (void*)-EACCES;
459 if (description->inode()) {
460 if (!validate_inode_mmap_prot(*this, prot, *description->inode()))
461 return (void*)-EACCES;
462 }
463 auto region_or_error = description->mmap(*this, VirtualAddress(addr), static_cast<size_t>(offset), size, prot);
464 if (region_or_error.is_error()) {
465 // Fail if MAP_FIXED or address is 0, retry otherwise
466 if (map_fixed || addr == 0)
467 return (void*)(int)region_or_error.error();
468 region_or_error = description->mmap(*this, {}, static_cast<size_t>(offset), size, prot);
469 }
470 if (region_or_error.is_error())
471 return (void*)(int)region_or_error.error();
472 region = region_or_error.value();
473 }
474
475 if (!region)
476 return (void*)-ENOMEM;
477 region->set_mmap(true);
478 if (map_shared)
479 region->set_shared(true);
480 if (map_stack)
481 region->set_stack(true);
482 if (!name.is_null())
483 region->set_name(name);
484 return region->vaddr().as_ptr();
485}
486
487int Process::sys$munmap(void* addr, size_t size)
488{
489 REQUIRE_PROMISE(stdio);
490
491 if (!size)
492 return -EINVAL;
493
494 if (!is_user_range(VirtualAddress(addr), size))
495 return -EFAULT;
496
497 Range range_to_unmap { VirtualAddress(addr), size };
498 if (auto* whole_region = region_from_range(range_to_unmap)) {
499 if (!whole_region->is_mmap())
500 return -EPERM;
501 bool success = deallocate_region(*whole_region);
502 ASSERT(success);
503 return 0;
504 }
505
506 if (auto* old_region = region_containing(range_to_unmap)) {
507 if (!old_region->is_mmap())
508 return -EPERM;
509
510 auto new_regions = split_region_around_range(*old_region, range_to_unmap);
511
512 // We manually unmap the old region here, specifying that we *don't* want the VM deallocated.
513 old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
514 deallocate_region(*old_region);
515
516 // Instead we give back the unwanted VM manually.
517 page_directory().range_allocator().deallocate(range_to_unmap);
518
519 // And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
520 for (auto* new_region : new_regions) {
521 new_region->map(page_directory());
522 }
523 return 0;
524 }
525
526 // FIXME: We should also support munmap() across multiple regions. (#175)
527
528 return -EINVAL;
529}
530
531int Process::sys$mprotect(void* addr, size_t size, int prot)
532{
533 REQUIRE_PROMISE(stdio);
534
535 if (!size)
536 return -EINVAL;
537
538 if (!is_user_range(VirtualAddress(addr), size))
539 return -EFAULT;
540
541 Range range_to_mprotect = { VirtualAddress(addr), size };
542
543 if (auto* whole_region = region_from_range(range_to_mprotect)) {
544 if (!whole_region->is_mmap())
545 return -EPERM;
546 if (!validate_mmap_prot(prot, whole_region->is_stack()))
547 return -EINVAL;
548 if (whole_region->access() == prot_to_region_access_flags(prot))
549 return 0;
550 if (whole_region->vmobject().is_inode()
551 && !validate_inode_mmap_prot(*this, prot, static_cast<const InodeVMObject&>(whole_region->vmobject()).inode())) {
552 return -EACCES;
553 }
554 whole_region->set_readable(prot & PROT_READ);
555 whole_region->set_writable(prot & PROT_WRITE);
556 whole_region->set_executable(prot & PROT_EXEC);
557 whole_region->remap();
558 return 0;
559 }
560
561 // Check if we can carve out the desired range from an existing region
562 if (auto* old_region = region_containing(range_to_mprotect)) {
563 if (!old_region->is_mmap())
564 return -EPERM;
565 if (!validate_mmap_prot(prot, old_region->is_stack()))
566 return -EINVAL;
567 if (old_region->access() == prot_to_region_access_flags(prot))
568 return 0;
569 if (old_region->vmobject().is_inode()
570 && !validate_inode_mmap_prot(*this, prot, static_cast<const InodeVMObject&>(old_region->vmobject()).inode())) {
571 return -EACCES;
572 }
573
574 // This vector is the region(s) adjacent to our range.
575 // We need to allocate a new region for the range we wanted to change permission bits on.
576 auto adjacent_regions = split_region_around_range(*old_region, range_to_mprotect);
577
578 size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get());
579 auto& new_region = allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject);
580 new_region.set_readable(prot & PROT_READ);
581 new_region.set_writable(prot & PROT_WRITE);
582 new_region.set_executable(prot & PROT_EXEC);
583
584 // Unmap the old region here, specifying that we *don't* want the VM deallocated.
585 old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
586 deallocate_region(*old_region);
587
588 // Map the new regions using our page directory (they were just allocated and don't have one).
589 for (auto* adjacent_region : adjacent_regions) {
590 adjacent_region->map(page_directory());
591 }
592 new_region.map(page_directory());
593 return 0;
594 }
595
596 // FIXME: We should also support mprotect() across multiple regions. (#175) (#964)
597
598 return -EINVAL;
599}
600
601int Process::sys$madvise(void* address, size_t size, int advice)
602{
603 REQUIRE_PROMISE(stdio);
604
605 if (!size)
606 return -EINVAL;
607
608 if (!is_user_range(VirtualAddress(address), size))
609 return -EFAULT;
610
611 auto* region = region_from_range({ VirtualAddress(address), size });
612 if (!region)
613 return -EINVAL;
614 if (!region->is_mmap())
615 return -EPERM;
616 if ((advice & MADV_SET_VOLATILE) && (advice & MADV_SET_NONVOLATILE))
617 return -EINVAL;
618 if (advice & MADV_SET_VOLATILE) {
619 if (!region->vmobject().is_purgeable())
620 return -EPERM;
621 auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
622 vmobject.set_volatile(true);
623 return 0;
624 }
625 if (advice & MADV_SET_NONVOLATILE) {
626 if (!region->vmobject().is_purgeable())
627 return -EPERM;
628 auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
629 if (!vmobject.is_volatile())
630 return 0;
631 vmobject.set_volatile(false);
632 bool was_purged = vmobject.was_purged();
633 vmobject.set_was_purged(false);
634 return was_purged ? 1 : 0;
635 }
636 if (advice & MADV_GET_VOLATILE) {
637 if (!region->vmobject().is_purgeable())
638 return -EPERM;
639 auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
640 return vmobject.is_volatile() ? 0 : 1;
641 }
642 return -EINVAL;
643}
644
645int Process::sys$purge(int mode)
646{
647 REQUIRE_NO_PROMISES;
648 if (!is_superuser())
649 return -EPERM;
650 int purged_page_count = 0;
651 if (mode & PURGE_ALL_VOLATILE) {
652 NonnullRefPtrVector<PurgeableVMObject> vmobjects;
653 {
654 InterruptDisabler disabler;
655 MM.for_each_vmobject([&](auto& vmobject) {
656 if (vmobject.is_purgeable())
657 vmobjects.append(static_cast<PurgeableVMObject&>(vmobject));
658 return IterationDecision::Continue;
659 });
660 }
661 for (auto& vmobject : vmobjects) {
662 purged_page_count += vmobject.purge();
663 }
664 }
665 if (mode & PURGE_ALL_CLEAN_INODE) {
666 NonnullRefPtrVector<InodeVMObject> vmobjects;
667 {
668 InterruptDisabler disabler;
669 MM.for_each_vmobject([&](auto& vmobject) {
670 if (vmobject.is_inode())
671 vmobjects.append(static_cast<InodeVMObject&>(vmobject));
672 return IterationDecision::Continue;
673 });
674 }
675 for (auto& vmobject : vmobjects) {
676 purged_page_count += vmobject.release_all_clean_pages();
677 }
678 }
679 return purged_page_count;
680}
681
682int Process::sys$gethostname(char* buffer, ssize_t size)
683{
684 REQUIRE_PROMISE(stdio);
685 if (size < 0)
686 return -EINVAL;
687 if (!validate_write(buffer, size))
688 return -EFAULT;
689 LOCKER(*s_hostname_lock);
690 if ((size_t)size < (s_hostname->length() + 1))
691 return -ENAMETOOLONG;
692 copy_to_user(buffer, s_hostname->characters(), s_hostname->length() + 1);
693 return 0;
694}
695
696pid_t Process::sys$fork(RegisterState& regs)
697{
698 REQUIRE_PROMISE(proc);
699 Thread* child_first_thread = nullptr;
700 auto* child = new Process(child_first_thread, m_name, m_uid, m_gid, m_pid, m_ring, m_cwd, m_executable, m_tty, this);
701 child->m_root_directory = m_root_directory;
702 child->m_root_directory_relative_to_global_root = m_root_directory_relative_to_global_root;
703 child->m_promises = m_promises;
704 child->m_execpromises = m_execpromises;
705 child->m_veil_state = m_veil_state;
706 child->m_unveiled_paths = m_unveiled_paths;
707 child->m_fds = m_fds;
708 child->m_sid = m_sid;
709 child->m_pgid = m_pgid;
710 child->m_umask = m_umask;
711
712#ifdef FORK_DEBUG
713 dbgprintf("fork: child=%p\n", child);
714#endif
715
716 for (auto& region : m_regions) {
717#ifdef FORK_DEBUG
718 dbg() << "fork: cloning Region{" << ®ion << "} '" << region.name() << "' @ " << region.vaddr();
719#endif
720 auto& child_region = child->add_region(region.clone());
721 child_region.map(child->page_directory());
722
723 if (®ion == m_master_tls_region)
724 child->m_master_tls_region = &child_region;
725 }
726
727 child->m_extra_gids = m_extra_gids;
728
729 auto& child_tss = child_first_thread->m_tss;
730 child_tss.eax = 0; // fork() returns 0 in the child :^)
731 child_tss.ebx = regs.ebx;
732 child_tss.ecx = regs.ecx;
733 child_tss.edx = regs.edx;
734 child_tss.ebp = regs.ebp;
735 child_tss.esp = regs.userspace_esp;
736 child_tss.esi = regs.esi;
737 child_tss.edi = regs.edi;
738 child_tss.eflags = regs.eflags;
739 child_tss.eip = regs.eip;
740 child_tss.cs = regs.cs;
741 child_tss.ds = regs.ds;
742 child_tss.es = regs.es;
743 child_tss.fs = regs.fs;
744 child_tss.gs = regs.gs;
745 child_tss.ss = regs.userspace_ss;
746
747#ifdef FORK_DEBUG
748 dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x, kstack %w:%x\n", child_tss.cs, child_tss.eip, child_tss.ss, child_tss.esp, child_tss.ss0, child_tss.esp0);
749#endif
750
751 {
752 InterruptDisabler disabler;
753 g_processes->prepend(child);
754 }
755#ifdef TASK_DEBUG
756 kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child_tss.eip);
757#endif
758
759 child_first_thread->set_state(Thread::State::Skip1SchedulerPass);
760 return child->pid();
761}
762
763int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment, RefPtr<FileDescription> interpreter_description)
764{
765 ASSERT(is_ring3());
766 auto path = main_program_description->absolute_path();
767 dbg() << "do_exec(" << path << ")";
768 // FIXME(Thread): Kill any threads the moment we commit to the exec().
769 if (thread_count() != 1) {
770 dbgprintf("Gonna die because I have many threads! These are the threads:\n");
771 for_each_thread([](Thread& thread) {
772 dbgprintf("Thread{%p}: TID=%d, PID=%d\n", &thread, thread.tid(), thread.pid());
773 return IterationDecision::Continue;
774 });
775 ASSERT(thread_count() == 1);
776 ASSERT_NOT_REACHED();
777 }
778
779 size_t total_blob_size = 0;
780 for (auto& a : arguments)
781 total_blob_size += a.length() + 1;
782 for (auto& e : environment)
783 total_blob_size += e.length() + 1;
784
785 size_t total_meta_size = sizeof(char*) * (arguments.size() + 1) + sizeof(char*) * (environment.size() + 1);
786
787 // FIXME: How much stack space does process startup need?
788 if ((total_blob_size + total_meta_size) >= Thread::default_userspace_stack_size)
789 return -E2BIG;
790
791 auto parts = path.split('/');
792 if (parts.is_empty())
793 return -ENOENT;
794
795 RefPtr<InodeVMObject> vmobject;
796 if (interpreter_description) {
797 vmobject = InodeVMObject::create_with_inode(*interpreter_description->inode());
798 } else {
799 vmobject = InodeVMObject::create_with_inode(*main_program_description->inode());
800 }
801
802 if (static_cast<const InodeVMObject&>(*vmobject).writable_mappings()) {
803 dbg() << "Refusing to execute a write-mapped program";
804 return -ETXTBSY;
805 }
806
807 // Disable profiling temporarily in case it's running on this process.
808 bool was_profiling = is_profiling();
809 TemporaryChange profiling_disabler(m_profiling, false);
810
811 auto old_page_directory = move(m_page_directory);
812 auto old_regions = move(m_regions);
813 m_page_directory = PageDirectory::create_for_userspace(*this);
814#ifdef MM_DEBUG
815 dbgprintf("Process %u exec: PD=%x created\n", pid(), m_page_directory.ptr());
816#endif
817
818 MM.enter_process_paging_scope(*this);
819
820 Region* region { nullptr };
821
822 InodeMetadata loader_metadata;
823
824 // FIXME: Hoooo boy this is a hack if I ever saw one.
825 // This is the 'random' offset we're giving to our ET_DYN exectuables to start as.
826 // It also happens to be the static Virtual Addresss offset every static exectuable gets :)
827 // Without this, some assumptions by the ELF loading hooks below are severely broken.
828 // 0x08000000 is a verified random number chosen by random dice roll https://xkcd.com/221/
829 u32 totally_random_offset = interpreter_description ? 0x08000000 : 0;
830
831 // FIXME: We should be able to load both the PT_INTERP interpreter and the main program... once the RTLD is smart enough
832 if (interpreter_description) {
833 loader_metadata = interpreter_description->metadata();
834 region = allocate_region_with_vmobject(VirtualAddress(), loader_metadata.size, *vmobject, 0, interpreter_description->absolute_path(), PROT_READ, false);
835 // we don't need the interpreter file desciption after we've loaded (or not) it into memory
836 interpreter_description = nullptr;
837 } else {
838 loader_metadata = main_program_description->metadata();
839 region = allocate_region_with_vmobject(VirtualAddress(), loader_metadata.size, *vmobject, 0, main_program_description->absolute_path(), PROT_READ, false);
840 }
841
842 ASSERT(region);
843
844 Region* master_tls_region { nullptr };
845 size_t master_tls_size = 0;
846 size_t master_tls_alignment = 0;
847 u32 entry_eip = 0;
848
849 OwnPtr<ELFLoader> loader;
850 {
851 ArmedScopeGuard rollback_regions_guard([&]() {
852 ASSERT(Process::current == this);
853 m_page_directory = move(old_page_directory);
854 m_regions = move(old_regions);
855 MM.enter_process_paging_scope(*this);
856 });
857
858 loader = make<ELFLoader>(region->vaddr().as_ptr(), loader_metadata.size);
859
860 // Load the correct executable -- either interp or main program.
861 // FIXME: Once we actually load both interp and main, we'll need to be more clever about this.
862 // In that case, both will be ET_DYN objects, so they'll both be completely relocatable.
863 // That means, we can put them literally anywhere in User VM space (ASLR anyone?).
864 // ALSO FIXME: Reminder to really really fix that 'totally random offset' business.
865 loader->map_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, size_t offset_in_image, bool is_readable, bool is_writable, bool is_executable, const String& name) -> u8* {
866 ASSERT(size);
867 ASSERT(alignment == PAGE_SIZE);
868 int prot = 0;
869 if (is_readable)
870 prot |= PROT_READ;
871 if (is_writable)
872 prot |= PROT_WRITE;
873 if (is_executable)
874 prot |= PROT_EXEC;
875 if (auto* region = allocate_region_with_vmobject(vaddr.offset(totally_random_offset), size, *vmobject, offset_in_image, String(name), prot))
876 return region->vaddr().as_ptr();
877 return nullptr;
878 };
879 loader->alloc_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, bool is_readable, bool is_writable, const String& name) -> u8* {
880 ASSERT(size);
881 ASSERT(alignment == PAGE_SIZE);
882 int prot = 0;
883 if (is_readable)
884 prot |= PROT_READ;
885 if (is_writable)
886 prot |= PROT_WRITE;
887 if (auto* region = allocate_region(vaddr.offset(totally_random_offset), size, String(name), prot))
888 return region->vaddr().as_ptr();
889 return nullptr;
890 };
891
892 // FIXME: Move TLS region allocation to userspace: LibC and the dynamic loader.
893 // LibC if we end up with a statically linked executable, and the
894 // dynamic loader so that it can create new TLS blocks for each shared libarary
895 // that gets loaded as part of DT_NEEDED processing, and via dlopen()
896 // If that doesn't happen quickly, at least pass the location of the TLS region
897 // some ELF Auxilliary Vector so the loader can use it/create new ones as necessary.
898 loader->tls_section_hook = [&](size_t size, size_t alignment) {
899 ASSERT(size);
900 master_tls_region = allocate_region({}, size, String(), PROT_READ | PROT_WRITE);
901 master_tls_size = size;
902 master_tls_alignment = alignment;
903 return master_tls_region->vaddr().as_ptr();
904 };
905 bool success = loader->load();
906 if (!success) {
907 kprintf("do_exec: Failure loading %s\n", path.characters());
908 return -ENOEXEC;
909 }
910 // FIXME: Validate that this virtual address is within executable region,
911 // instead of just non-null. You could totally have a DSO with entry point of
912 // the beginning of the text segement.
913 if (!loader->entry().offset(totally_random_offset).get()) {
914 kprintf("do_exec: Failure loading %s, entry pointer is invalid! (%p)\n", path.characters(), loader->entry().offset(totally_random_offset).get());
915 return -ENOEXEC;
916 }
917
918 rollback_regions_guard.disarm();
919
920 // NOTE: At this point, we've committed to the new executable.
921 entry_eip = loader->entry().offset(totally_random_offset).get();
922
923#ifdef EXEC_DEBUG
924 kprintf("Memory layout after ELF load:");
925 dump_regions();
926#endif
927 }
928
929 m_elf_loader = move(loader);
930 m_executable = main_program_description->custody();
931
932 m_promises = m_execpromises;
933
934 m_veil_state = VeilState::None;
935 m_unveiled_paths.clear();
936
937 // Copy of the master TLS region that we will clone for new threads
938 m_master_tls_region = master_tls_region;
939
940 auto main_program_metadata = main_program_description->metadata();
941
942 if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
943 if (main_program_metadata.is_setuid())
944 m_euid = main_program_metadata.uid;
945 if (main_program_metadata.is_setgid())
946 m_egid = main_program_metadata.gid;
947 }
948
949 Thread::current->set_default_signal_dispositions();
950 Thread::current->m_signal_mask = 0;
951 Thread::current->m_pending_signals = 0;
952
953 m_futex_queues.clear();
954
955 m_region_lookup_cache = {};
956
957 disown_all_shared_buffers();
958
959 for (size_t i = 0; i < m_fds.size(); ++i) {
960 auto& daf = m_fds[i];
961 if (daf.description && daf.flags & FD_CLOEXEC) {
962 daf.description->close();
963 daf = {};
964 }
965 }
966
967 Thread* new_main_thread = nullptr;
968 if (Process::current == this) {
969 new_main_thread = Thread::current;
970 } else {
971 for_each_thread([&](auto& thread) {
972 new_main_thread = &thread;
973 return IterationDecision::Break;
974 });
975 }
976 ASSERT(new_main_thread);
977
978 // NOTE: We create the new stack before disabling interrupts since it will zero-fault
979 // and we don't want to deal with faults after this point.
980 u32 new_userspace_esp = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment));
981
982 // We cli() manually here because we don't want to get interrupted between do_exec() and Schedule::yield().
983 // The reason is that the task redirection we've set up above will be clobbered by the timer IRQ.
984 // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
985 if (Process::current == this)
986 cli();
987
988 // NOTE: Be careful to not trigger any page faults below!
989
990 Scheduler::prepare_to_modify_tss(*new_main_thread);
991
992 m_name = parts.take_last();
993 new_main_thread->set_name(m_name);
994
995 auto& tss = new_main_thread->m_tss;
996
997 u32 old_esp0 = tss.esp0;
998
999 m_master_tls_size = master_tls_size;
1000 m_master_tls_alignment = master_tls_alignment;
1001
1002 new_main_thread->make_thread_specific_region({});
1003 new_main_thread->reset_fpu_state();
1004
1005 memset(&tss, 0, sizeof(TSS32));
1006 tss.iomapbase = sizeof(TSS32);
1007
1008 tss.eflags = 0x0202;
1009 tss.eip = entry_eip;
1010 tss.cs = 0x1b;
1011 tss.ds = 0x23;
1012 tss.es = 0x23;
1013 tss.fs = 0x23;
1014 tss.gs = thread_specific_selector() | 3;
1015 tss.ss = 0x23;
1016 tss.cr3 = page_directory().cr3();
1017 tss.esp = new_userspace_esp;
1018 tss.ss0 = 0x10;
1019 tss.esp0 = old_esp0;
1020 tss.ss2 = m_pid;
1021
1022#ifdef TASK_DEBUG
1023 kprintf("Process %u (%s) exec'd %s @ %p\n", pid(), name().characters(), path.characters(), tss.eip);
1024#endif
1025
1026 if (was_profiling)
1027 Profiling::did_exec(path);
1028
1029 new_main_thread->set_state(Thread::State::Skip1SchedulerPass);
1030 big_lock().force_unlock_if_locked();
1031 return 0;
1032}
1033
1034static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
1035{
1036 int word_start = 2;
1037 int word_length = 0;
1038 if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
1039 Vector<String> interpreter_words;
1040
1041 for (int i = 2; i < nread; ++i) {
1042 if (first_page[i] == '\n') {
1043 break;
1044 }
1045
1046 if (first_page[i] != ' ') {
1047 ++word_length;
1048 }
1049
1050 if (first_page[i] == ' ') {
1051 if (word_length > 0) {
1052 interpreter_words.append(String(&first_page[word_start], word_length));
1053 }
1054 word_length = 0;
1055 word_start = i + 1;
1056 }
1057 }
1058
1059 if (word_length > 0)
1060 interpreter_words.append(String(&first_page[word_start], word_length));
1061
1062 if (!interpreter_words.is_empty())
1063 return interpreter_words;
1064 }
1065
1066 return KResult(-ENOEXEC);
1067}
1068
1069KResultOr<NonnullRefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, char (&first_page)[PAGE_SIZE], int nread, size_t file_size)
1070{
1071 if (nread < (int)sizeof(Elf32_Ehdr))
1072 return KResult(-ENOEXEC);
1073
1074 auto elf_header = (Elf32_Ehdr*)first_page;
1075 if (!ELFImage::validate_elf_header(*elf_header, file_size)) {
1076 dbgprintf("%s(%d) exec(%s): File has invalid ELF header\n", m_name.characters(), m_pid, path.characters());
1077 return KResult(-ENOEXEC);
1078 }
1079
1080 // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
1081 String interpreter_path;
1082 if (!ELFImage::validate_program_headers(*elf_header, file_size, (u8*)first_page, nread, interpreter_path)) {
1083 dbgprintf("%s(%d) exec(%s): File has invalid ELF Program headers\n", m_name.characters(), m_pid, path.characters());
1084 return KResult(-ENOEXEC);
1085 }
1086
1087 if (!interpreter_path.is_empty()) {
1088 // Programs with an interpreter better be relocatable executables or we don't know what to do...
1089 if (elf_header->e_type != ET_DYN)
1090 return KResult(-ENOEXEC);
1091
1092 dbgprintf("%s(%d) exec(%s): Using program interpreter %s\n", m_name.characters(), m_pid, path.characters(), interpreter_path.characters());
1093 auto interp_result = VFS::the().open(interpreter_path, O_EXEC, 0, current_directory());
1094 if (interp_result.is_error()) {
1095 dbgprintf("%s(%d) exec(%s): Unable to open program interpreter %s\n", m_name.characters(), m_pid, path.characters(), interpreter_path.characters());
1096 return interp_result.error();
1097 }
1098 auto interpreter_description = interp_result.value();
1099 auto interp_metadata = interpreter_description->metadata();
1100
1101 ASSERT(interpreter_description->inode());
1102
1103 // Validate the program interpreter as a valid elf binary.
1104 // If your program interpreter is a #! file or something, it's time to stop playing games :)
1105 if (interp_metadata.size < (int)sizeof(Elf32_Ehdr))
1106 return KResult(-ENOEXEC);
1107
1108 memset(first_page, 0, sizeof(first_page));
1109 nread = interpreter_description->read((u8*)&first_page, sizeof(first_page));
1110
1111 if (nread < (int)sizeof(Elf32_Ehdr))
1112 return KResult(-ENOEXEC);
1113
1114 elf_header = (Elf32_Ehdr*)first_page;
1115 if (!ELFImage::validate_elf_header(*elf_header, interp_metadata.size)) {
1116 dbgprintf("%s(%d) exec(%s): Interpreter (%s) has invalid ELF header\n", m_name.characters(), m_pid, path.characters(), interpreter_description->absolute_path().characters());
1117 return KResult(-ENOEXEC);
1118 }
1119
1120 // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
1121 String interpreter_interpreter_path;
1122 if (!ELFImage::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, interpreter_interpreter_path)) {
1123 dbgprintf("%s(%d) exec(%s): Interpreter (%s) has invalid ELF Program headers\n", m_name.characters(), m_pid, path.characters(), interpreter_description->absolute_path().characters());
1124 return KResult(-ENOEXEC);
1125 }
1126
1127 if (!interpreter_interpreter_path.is_empty()) {
1128 dbgprintf("%s(%d) exec(%s): Interpreter (%s) has its own interpreter (%s)! No thank you!\n",
1129 m_name.characters(), m_pid, path.characters(), interpreter_description->absolute_path().characters(), interpreter_interpreter_path.characters());
1130 return KResult(-ELOOP);
1131 }
1132
1133 return interpreter_description;
1134 }
1135
1136 if (elf_header->e_type != ET_EXEC) {
1137 // We can't exec an ET_REL, that's just an object file from the compiler
1138 // If it's ET_DYN with no PT_INTERP, then we can't load it properly either
1139 return KResult(-ENOEXEC);
1140 }
1141
1142 // No interpreter, but, path refers to a valid elf image
1143 return KResult(KSuccess);
1144}
1145
1146int Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
1147{
1148 if (recursion_depth > 2) {
1149 dbgprintf("%s(%d) exec(%s): SHENANIGANS! recursed too far trying to find #! interpreter\n", m_name.characters(), m_pid, path.characters());
1150 return -ELOOP;
1151 }
1152
1153 // Open the file to check what kind of binary format it is
1154 // Currently supported formats:
1155 // - #! interpreted file
1156 // - ELF32
1157 // * ET_EXEC binary that just gets loaded
1158 // * ET_DYN binary that requires a program interpreter
1159 //
1160 auto result = VFS::the().open(path, O_EXEC, 0, current_directory());
1161 if (result.is_error())
1162 return result.error();
1163 auto description = result.value();
1164 auto metadata = description->metadata();
1165
1166 // Always gonna need at least 3 bytes. these are for #!X
1167 if (metadata.size < 3)
1168 return -ENOEXEC;
1169
1170 ASSERT(description->inode());
1171
1172 // Read the first page of the program into memory so we can validate the binfmt of it
1173 char first_page[PAGE_SIZE];
1174 int nread = description->read((u8*)&first_page, sizeof(first_page));
1175
1176 // 1) #! interpreted file
1177 auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread);
1178 if (!shebang_result.is_error()) {
1179 Vector<String> new_arguments(shebang_result.value());
1180
1181 new_arguments.append(path);
1182
1183 arguments.remove(0);
1184 new_arguments.append(move(arguments));
1185
1186 return exec(shebang_result.value().first(), move(new_arguments), move(environment), ++recursion_depth);
1187 }
1188
1189 // #2) ELF32 for i386
1190 auto elf_result = find_elf_interpreter_for_executable(path, first_page, nread, metadata.size);
1191 RefPtr<FileDescription> interpreter_description;
1192 // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
1193 if (!elf_result.is_error())
1194 interpreter_description = elf_result.value();
1195 else if (elf_result.error().is_error())
1196 return elf_result.error();
1197
1198 // The bulk of exec() is done by do_exec(), which ensures that all locals
1199 // are cleaned up by the time we yield-teleport below.
1200 int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description));
1201 if (rc < 0)
1202 return rc;
1203
1204 if (Process::current == this) {
1205 Scheduler::yield();
1206 ASSERT_NOT_REACHED();
1207 }
1208 return 0;
1209}
1210
1211int Process::sys$execve(const Syscall::SC_execve_params* user_params)
1212{
1213 REQUIRE_PROMISE(exec);
1214 // NOTE: Be extremely careful with allocating any kernel memory in exec().
1215 // On success, the kernel stack will be lost.
1216 Syscall::SC_execve_params params;
1217 if (!validate_read_and_copy_typed(¶ms, user_params))
1218 return -EFAULT;
1219
1220 if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
1221 return -E2BIG;
1222
1223 String path;
1224 {
1225 auto path_arg = get_syscall_path_argument(params.path);
1226 if (path_arg.is_error())
1227 return path_arg.error();
1228 path = path_arg.value();
1229 }
1230
1231 auto copy_user_strings = [&](const auto& list, auto& output) {
1232 if (!list.length)
1233 return true;
1234 if (!validate_read_typed(list.strings, list.length))
1235 return false;
1236 Vector<Syscall::StringArgument, 32> strings;
1237 strings.resize(list.length);
1238 copy_from_user(strings.data(), list.strings, list.length * sizeof(Syscall::StringArgument));
1239 for (size_t i = 0; i < list.length; ++i) {
1240 auto string = validate_and_copy_string_from_user(strings[i]);
1241 if (string.is_null())
1242 return false;
1243 output.append(move(string));
1244 }
1245 return true;
1246 };
1247
1248 Vector<String> arguments;
1249 if (!copy_user_strings(params.arguments, arguments))
1250 return -EFAULT;
1251
1252 Vector<String> environment;
1253 if (!copy_user_strings(params.environment, environment))
1254 return -EFAULT;
1255
1256 int rc = exec(move(path), move(arguments), move(environment));
1257 ASSERT(rc < 0); // We should never continue after a successful exec!
1258 return rc;
1259}
1260
1261Process* Process::create_user_process(Thread*& first_thread, const String& path, uid_t uid, gid_t gid, pid_t parent_pid, int& error, Vector<String>&& arguments, Vector<String>&& environment, TTY* tty)
1262{
1263 auto parts = path.split('/');
1264 if (arguments.is_empty()) {
1265 arguments.append(parts.last());
1266 }
1267 RefPtr<Custody> cwd;
1268 RefPtr<Custody> root;
1269 {
1270 InterruptDisabler disabler;
1271 if (auto* parent = Process::from_pid(parent_pid)) {
1272 cwd = parent->m_cwd;
1273 root = parent->m_root_directory;
1274 }
1275 }
1276
1277 if (!cwd)
1278 cwd = VFS::the().root_custody();
1279
1280 if (!root)
1281 root = VFS::the().root_custody();
1282
1283 auto* process = new Process(first_thread, parts.take_last(), uid, gid, parent_pid, Ring3, move(cwd), nullptr, tty);
1284 process->m_fds.resize(m_max_open_file_descriptors);
1285 auto& device_to_use_as_tty = tty ? (CharacterDevice&)*tty : NullDevice::the();
1286 auto description = device_to_use_as_tty.open(O_RDWR).value();
1287 process->m_fds[0].set(*description);
1288 process->m_fds[1].set(*description);
1289 process->m_fds[2].set(*description);
1290
1291 error = process->exec(path, move(arguments), move(environment));
1292 if (error != 0) {
1293 delete process;
1294 return nullptr;
1295 }
1296
1297 {
1298 InterruptDisabler disabler;
1299 g_processes->prepend(process);
1300 }
1301#ifdef TASK_DEBUG
1302 kprintf("Process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), first_thread->tss().eip);
1303#endif
1304 error = 0;
1305 return process;
1306}
1307
1308Process* Process::create_kernel_process(Thread*& first_thread, String&& name, void (*e)())
1309{
1310 auto* process = new Process(first_thread, move(name), (uid_t)0, (gid_t)0, (pid_t)0, Ring0);
1311 first_thread->tss().eip = (uintptr_t)e;
1312
1313 if (process->pid() != 0) {
1314 InterruptDisabler disabler;
1315 g_processes->prepend(process);
1316#ifdef TASK_DEBUG
1317 kprintf("Kernel process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), first_thread->tss().eip);
1318#endif
1319 }
1320
1321 first_thread->set_state(Thread::State::Runnable);
1322 return process;
1323}
1324
1325Process::Process(Thread*& first_thread, const String& name, uid_t uid, gid_t gid, pid_t ppid, RingLevel ring, RefPtr<Custody> cwd, RefPtr<Custody> executable, TTY* tty, Process* fork_parent)
1326 : m_name(move(name))
1327 , m_pid(allocate_pid())
1328 , m_uid(uid)
1329 , m_gid(gid)
1330 , m_euid(uid)
1331 , m_egid(gid)
1332 , m_ring(ring)
1333 , m_executable(move(executable))
1334 , m_cwd(move(cwd))
1335 , m_tty(tty)
1336 , m_ppid(ppid)
1337{
1338#ifdef PROCESS_DEBUG
1339 dbg() << "Created new process " << m_name << "(" << m_pid << ")";
1340#endif
1341
1342 m_page_directory = PageDirectory::create_for_userspace(*this, fork_parent ? &fork_parent->page_directory().range_allocator() : nullptr);
1343#ifdef MM_DEBUG
1344 dbgprintf("Process %u ctor: PD=%x created\n", pid(), m_page_directory.ptr());
1345#endif
1346
1347 if (fork_parent) {
1348 // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process.
1349 first_thread = Thread::current->clone(*this);
1350 } else {
1351 // NOTE: This non-forked code path is only taken when the kernel creates a process "manually" (at boot.)
1352 first_thread = new Thread(*this);
1353 }
1354}
1355
1356Process::~Process()
1357{
1358 ASSERT(thread_count() == 0);
1359}
1360
1361void Process::dump_regions()
1362{
1363 kprintf("Process %s(%u) regions:\n", name().characters(), pid());
1364 kprintf("BEGIN END SIZE ACCESS NAME\n");
1365 for (auto& region : m_regions) {
1366 kprintf("%08x -- %08x %08x %c%c%c%c%c%c %s\n",
1367 region.vaddr().get(),
1368 region.vaddr().offset(region.size() - 1).get(),
1369 region.size(),
1370 region.is_readable() ? 'R' : ' ',
1371 region.is_writable() ? 'W' : ' ',
1372 region.is_executable() ? 'X' : ' ',
1373 region.is_shared() ? 'S' : ' ',
1374 region.is_stack() ? 'T' : ' ',
1375 region.vmobject().is_purgeable() ? 'P' : ' ',
1376 region.name().characters());
1377 }
1378
1379 MM.dump_kernel_regions();
1380}
1381
1382void Process::sys$exit(int status)
1383{
1384 cli();
1385#ifdef TASK_DEBUG
1386 kprintf("sys$exit: %s(%u) exit with status %d\n", name().characters(), pid(), status);
1387#endif
1388
1389 if (status != 0)
1390 dump_backtrace();
1391
1392 m_termination_status = status;
1393 m_termination_signal = 0;
1394 die();
1395 Thread::current->die_if_needed();
1396 ASSERT_NOT_REACHED();
1397}
1398
1399void signal_trampoline_dummy(void)
1400{
1401 // The trampoline preserves the current eax, pushes the signal code and
1402 // then calls the signal handler. We do this because, when interrupting a
1403 // blocking syscall, that syscall may return some special error code in eax;
1404 // This error code would likely be overwritten by the signal handler, so it's
1405 // neccessary to preserve it here.
1406 asm(
1407 ".intel_syntax noprefix\n"
1408 "asm_signal_trampoline:\n"
1409 "push ebp\n"
1410 "mov ebp, esp\n"
1411 "push eax\n" // we have to store eax 'cause it might be the return value from a syscall
1412 "sub esp, 4\n" // align the stack to 16 bytes
1413 "mov eax, [ebp+12]\n" // push the signal code
1414 "push eax\n"
1415 "call [ebp+8]\n" // call the signal handler
1416 "add esp, 8\n"
1417 "mov eax, %P0\n"
1418 "int 0x82\n" // sigreturn syscall
1419 "asm_signal_trampoline_end:\n"
1420 ".att_syntax" ::"i"(Syscall::SC_sigreturn));
1421}
1422
1423extern "C" void asm_signal_trampoline(void);
1424extern "C" void asm_signal_trampoline_end(void);
1425
1426void create_signal_trampolines()
1427{
1428 InterruptDisabler disabler;
1429 // NOTE: We leak this region.
1430 auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines", Region::Access::Read | Region::Access::Write | Region::Access::Execute, false).leak_ptr();
1431 g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr();
1432
1433 u8* trampoline = (u8*)asm_signal_trampoline;
1434 u8* trampoline_end = (u8*)asm_signal_trampoline_end;
1435 size_t trampoline_size = trampoline_end - trampoline;
1436
1437 {
1438 SmapDisabler disabler;
1439 u8* code_ptr = (u8*)trampoline_region->vaddr().as_ptr();
1440 memcpy(code_ptr, trampoline, trampoline_size);
1441 }
1442
1443 trampoline_region->set_writable(false);
1444 trampoline_region->remap();
1445}
1446
1447void create_kernel_info_page()
1448{
1449 auto* info_page_region_for_userspace = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Kernel info page", Region::Access::Read).leak_ptr();
1450 auto* info_page_region_for_kernel = MM.allocate_kernel_region_with_vmobject(info_page_region_for_userspace->vmobject(), PAGE_SIZE, "Kernel info page", Region::Access::Read | Region::Access::Write).leak_ptr();
1451 s_info_page_address_for_userspace = info_page_region_for_userspace->vaddr();
1452 s_info_page_address_for_kernel = info_page_region_for_kernel->vaddr();
1453 memset(s_info_page_address_for_kernel.as_ptr(), 0, PAGE_SIZE);
1454}
1455
1456int Process::sys$sigreturn(RegisterState& registers)
1457{
1458 REQUIRE_PROMISE(stdio);
1459 SmapDisabler disabler;
1460
1461 //Here, we restore the state pushed by dispatch signal and asm_signal_trampoline.
1462 u32* stack_ptr = (u32*)registers.userspace_esp;
1463 u32 smuggled_eax = *stack_ptr;
1464
1465 //pop the stored eax, ebp, return address, handler and signal code
1466 stack_ptr += 5;
1467
1468 Thread::current->m_signal_mask = *stack_ptr;
1469 stack_ptr++;
1470
1471 //pop edi, esi, ebp, esp, ebx, edx, ecx and eax
1472 memcpy(®isters.edi, stack_ptr, 8 * sizeof(uintptr_t));
1473 stack_ptr += 8;
1474
1475 registers.eip = *stack_ptr;
1476 stack_ptr++;
1477
1478 registers.eflags = *stack_ptr;
1479 stack_ptr++;
1480
1481 registers.userspace_esp = registers.esp;
1482 return smuggled_eax;
1483}
1484
1485void Process::crash(int signal, u32 eip)
1486{
1487 ASSERT_INTERRUPTS_DISABLED();
1488 ASSERT(!is_dead());
1489 ASSERT(Process::current == this);
1490
1491 if (eip >= 0xc0000000 && ksyms_ready) {
1492 auto* ksym = ksymbolicate(eip);
1493 dbgprintf("\033[31;1m%p %s +%d\033[0m\n", eip, ksym ? demangle(ksym->name).characters() : "(k?)", ksym ? eip - ksym->address : 0);
1494 } else if (m_elf_loader) {
1495 dbgprintf("\033[31;1m%p %s\033[0m\n", eip, m_elf_loader->symbolicate(eip).characters());
1496 } else {
1497 dbgprintf("\033[31;1m%p (?)\033[0m\n", eip);
1498 }
1499 dump_backtrace();
1500
1501 m_termination_signal = signal;
1502 dump_regions();
1503 ASSERT(is_ring3());
1504 die();
1505 // We can not return from here, as there is nowhere
1506 // to unwind to, so die right away.
1507 Thread::current->die_if_needed();
1508 ASSERT_NOT_REACHED();
1509}
1510
1511Process* Process::from_pid(pid_t pid)
1512{
1513 ASSERT_INTERRUPTS_DISABLED();
1514 for (auto& process : *g_processes) {
1515 if (process.pid() == pid)
1516 return &process;
1517 }
1518 return nullptr;
1519}
1520
1521RefPtr<FileDescription> Process::file_description(int fd) const
1522{
1523 if (fd < 0)
1524 return nullptr;
1525 if (static_cast<size_t>(fd) < m_fds.size())
1526 return m_fds[fd].description.ptr();
1527 return nullptr;
1528}
1529
1530int Process::fd_flags(int fd) const
1531{
1532 if (fd < 0)
1533 return -1;
1534 if (static_cast<size_t>(fd) < m_fds.size())
1535 return m_fds[fd].flags;
1536 return -1;
1537}
1538
1539ssize_t Process::sys$get_dir_entries(int fd, void* buffer, ssize_t size)
1540{
1541 REQUIRE_PROMISE(stdio);
1542 if (size < 0)
1543 return -EINVAL;
1544 if (!validate_write(buffer, size))
1545 return -EFAULT;
1546 auto description = file_description(fd);
1547 if (!description)
1548 return -EBADF;
1549 return description->get_dir_entries((u8*)buffer, size);
1550}
1551
1552int Process::sys$lseek(int fd, off_t offset, int whence)
1553{
1554 REQUIRE_PROMISE(stdio);
1555 auto description = file_description(fd);
1556 if (!description)
1557 return -EBADF;
1558 return description->seek(offset, whence);
1559}
1560
1561int Process::sys$ttyname_r(int fd, char* buffer, ssize_t size)
1562{
1563 REQUIRE_PROMISE(tty);
1564 if (size < 0)
1565 return -EINVAL;
1566 if (!validate_write(buffer, size))
1567 return -EFAULT;
1568 auto description = file_description(fd);
1569 if (!description)
1570 return -EBADF;
1571 if (!description->is_tty())
1572 return -ENOTTY;
1573 String tty_name = description->tty()->tty_name();
1574 if ((size_t)size < tty_name.length() + 1)
1575 return -ERANGE;
1576 copy_to_user(buffer, tty_name.characters(), tty_name.length() + 1);
1577 return 0;
1578}
1579
1580int Process::sys$ptsname_r(int fd, char* buffer, ssize_t size)
1581{
1582 REQUIRE_PROMISE(tty);
1583 if (size < 0)
1584 return -EINVAL;
1585 if (!validate_write(buffer, size))
1586 return -EFAULT;
1587 auto description = file_description(fd);
1588 if (!description)
1589 return -EBADF;
1590 auto* master_pty = description->master_pty();
1591 if (!master_pty)
1592 return -ENOTTY;
1593 auto pts_name = master_pty->pts_name();
1594 if ((size_t)size < pts_name.length() + 1)
1595 return -ERANGE;
1596 copy_to_user(buffer, pts_name.characters(), pts_name.length() + 1);
1597 return 0;
1598}
1599
1600ssize_t Process::sys$writev(int fd, const struct iovec* iov, int iov_count)
1601{
1602 REQUIRE_PROMISE(stdio);
1603 if (iov_count < 0)
1604 return -EINVAL;
1605
1606 if (!validate_read_typed(iov, iov_count))
1607 return -EFAULT;
1608
1609 u64 total_length = 0;
1610 Vector<iovec, 32> vecs;
1611 vecs.resize(iov_count);
1612 copy_from_user(vecs.data(), iov, iov_count * sizeof(iovec));
1613 for (auto& vec : vecs) {
1614 if (!validate_read(vec.iov_base, vec.iov_len))
1615 return -EFAULT;
1616 total_length += vec.iov_len;
1617 if (total_length > INT32_MAX)
1618 return -EINVAL;
1619 }
1620
1621 auto description = file_description(fd);
1622 if (!description)
1623 return -EBADF;
1624
1625 if (!description->is_writable())
1626 return -EBADF;
1627
1628 int nwritten = 0;
1629 for (auto& vec : vecs) {
1630 int rc = do_write(*description, (const u8*)vec.iov_base, vec.iov_len);
1631 if (rc < 0) {
1632 if (nwritten == 0)
1633 return rc;
1634 return nwritten;
1635 }
1636 nwritten += rc;
1637 }
1638
1639 return nwritten;
1640}
1641
1642ssize_t Process::do_write(FileDescription& description, const u8* data, int data_size)
1643{
1644 ssize_t nwritten = 0;
1645 if (!description.is_blocking()) {
1646 if (!description.can_write())
1647 return -EAGAIN;
1648 }
1649
1650 if (description.should_append()) {
1651#ifdef IO_DEBUG
1652 dbgprintf("seeking to end (O_APPEND)\n");
1653#endif
1654 description.seek(0, SEEK_END);
1655 }
1656
1657 while (nwritten < data_size) {
1658#ifdef IO_DEBUG
1659 dbgprintf("while %u < %u\n", nwritten, size);
1660#endif
1661 if (!description.can_write()) {
1662#ifdef IO_DEBUG
1663 dbgprintf("block write on %d\n", fd);
1664#endif
1665 if (Thread::current->block<Thread::WriteBlocker>(description) != Thread::BlockResult::WokeNormally) {
1666 if (nwritten == 0)
1667 return -EINTR;
1668 }
1669 }
1670 ssize_t rc = description.write(data + nwritten, data_size - nwritten);
1671#ifdef IO_DEBUG
1672 dbgprintf(" -> write returned %d\n", rc);
1673#endif
1674 if (rc < 0) {
1675 // FIXME: Support returning partial nwritten with errno.
1676 ASSERT(nwritten == 0);
1677 return rc;
1678 }
1679 if (rc == 0)
1680 break;
1681 nwritten += rc;
1682 }
1683 return nwritten;
1684}
1685
1686ssize_t Process::sys$write(int fd, const u8* data, ssize_t size)
1687{
1688 REQUIRE_PROMISE(stdio);
1689 if (size < 0)
1690 return -EINVAL;
1691 if (size == 0)
1692 return 0;
1693 if (!validate_read(data, size))
1694 return -EFAULT;
1695#ifdef DEBUG_IO
1696 dbgprintf("%s(%u): sys$write(%d, %p, %u)\n", name().characters(), pid(), fd, data, size);
1697#endif
1698 auto description = file_description(fd);
1699 if (!description)
1700 return -EBADF;
1701 if (!description->is_writable())
1702 return -EBADF;
1703
1704 return do_write(*description, data, size);
1705}
1706
1707ssize_t Process::sys$read(int fd, u8* buffer, ssize_t size)
1708{
1709 REQUIRE_PROMISE(stdio);
1710 if (size < 0)
1711 return -EINVAL;
1712 if (size == 0)
1713 return 0;
1714 if (!validate_write(buffer, size))
1715 return -EFAULT;
1716#ifdef DEBUG_IO
1717 dbgprintf("%s(%u) sys$read(%d, %p, %u)\n", name().characters(), pid(), fd, buffer, size);
1718#endif
1719 auto description = file_description(fd);
1720 if (!description)
1721 return -EBADF;
1722 if (!description->is_readable())
1723 return -EBADF;
1724 if (description->is_directory())
1725 return -EISDIR;
1726 if (description->is_blocking()) {
1727 if (!description->can_read()) {
1728 if (Thread::current->block<Thread::ReadBlocker>(*description) != Thread::BlockResult::WokeNormally)
1729 return -EINTR;
1730 if (!description->can_read())
1731 return -EAGAIN;
1732 }
1733 }
1734 return description->read(buffer, size);
1735}
1736
1737int Process::sys$close(int fd)
1738{
1739 REQUIRE_PROMISE(stdio);
1740 auto description = file_description(fd);
1741#ifdef DEBUG_IO
1742 dbgprintf("%s(%u) sys$close(%d) %p\n", name().characters(), pid(), fd, description.ptr());
1743#endif
1744 if (!description)
1745 return -EBADF;
1746 int rc = description->close();
1747 m_fds[fd] = {};
1748 return rc;
1749}
1750
1751int Process::sys$utime(const char* user_path, size_t path_length, const utimbuf* user_buf)
1752{
1753 REQUIRE_PROMISE(fattr);
1754 if (user_buf && !validate_read_typed(user_buf))
1755 return -EFAULT;
1756 auto path = get_syscall_path_argument(user_path, path_length);
1757 if (path.is_error())
1758 return path.error();
1759 utimbuf buf;
1760 if (user_buf) {
1761 copy_from_user(&buf, user_buf);
1762 } else {
1763 auto now = kgettimeofday();
1764 buf = { now.tv_sec, now.tv_sec };
1765 }
1766 return VFS::the().utime(path.value(), current_directory(), buf.actime, buf.modtime);
1767}
1768
1769int Process::sys$access(const char* user_path, size_t path_length, int mode)
1770{
1771 REQUIRE_PROMISE(rpath);
1772 auto path = get_syscall_path_argument(user_path, path_length);
1773 if (path.is_error())
1774 return path.error();
1775 return VFS::the().access(path.value(), mode, current_directory());
1776}
1777
1778int Process::sys$fcntl(int fd, int cmd, u32 arg)
1779{
1780 REQUIRE_PROMISE(stdio);
1781#ifdef DEBUG_IO
1782 dbgprintf("sys$fcntl: fd=%d, cmd=%d, arg=%u\n", fd, cmd, arg);
1783#endif
1784 auto description = file_description(fd);
1785 if (!description)
1786 return -EBADF;
1787 // NOTE: The FD flags are not shared between FileDescription objects.
1788 // This means that dup() doesn't copy the FD_CLOEXEC flag!
1789 switch (cmd) {
1790 case F_DUPFD: {
1791 int arg_fd = (int)arg;
1792 if (arg_fd < 0)
1793 return -EINVAL;
1794 int new_fd = alloc_fd(arg_fd);
1795 if (new_fd < 0)
1796 return new_fd;
1797 m_fds[new_fd].set(*description);
1798 return new_fd;
1799 }
1800 case F_GETFD:
1801 return m_fds[fd].flags;
1802 case F_SETFD:
1803 m_fds[fd].flags = arg;
1804 break;
1805 case F_GETFL:
1806 return description->file_flags();
1807 case F_SETFL:
1808 description->set_file_flags(arg);
1809 break;
1810 default:
1811 ASSERT_NOT_REACHED();
1812 }
1813 return 0;
1814}
1815
1816int Process::sys$fstat(int fd, stat* statbuf)
1817{
1818 REQUIRE_PROMISE(stdio);
1819 if (!validate_write_typed(statbuf))
1820 return -EFAULT;
1821 auto description = file_description(fd);
1822 if (!description)
1823 return -EBADF;
1824 return description->fstat(*statbuf);
1825}
1826
1827int Process::sys$stat(const Syscall::SC_stat_params* user_params)
1828{
1829 REQUIRE_PROMISE(rpath);
1830 Syscall::SC_stat_params params;
1831 if (!validate_read_and_copy_typed(¶ms, user_params))
1832 return -EFAULT;
1833 if (!validate_write_typed(params.statbuf))
1834 return -EFAULT;
1835 auto path = get_syscall_path_argument(params.path);
1836 if (path.is_error())
1837 return path.error();
1838 auto metadata_or_error = VFS::the().lookup_metadata(path.value(), current_directory(), params.follow_symlinks ? 0 : O_NOFOLLOW_NOERROR);
1839 if (metadata_or_error.is_error())
1840 return metadata_or_error.error();
1841 stat statbuf;
1842 auto result = metadata_or_error.value().stat(statbuf);
1843 if (result.is_error())
1844 return result;
1845 copy_to_user(params.statbuf, &statbuf);
1846 return 0;
1847}
1848
1849template<typename DataType, typename SizeType>
1850bool Process::validate(const Syscall::MutableBufferArgument<DataType, SizeType>& buffer)
1851{
1852 return validate_write(buffer.data, buffer.size);
1853}
1854
1855template<typename DataType, typename SizeType>
1856bool Process::validate(const Syscall::ImmutableBufferArgument<DataType, SizeType>& buffer)
1857{
1858 return validate_read(buffer.data, buffer.size);
1859}
1860
1861String Process::validate_and_copy_string_from_user(const char* user_characters, size_t user_length) const
1862{
1863 if (user_length == 0)
1864 return String::empty();
1865 if (!user_characters)
1866 return {};
1867 if (!validate_read(user_characters, user_length))
1868 return {};
1869 SmapDisabler disabler;
1870 size_t measured_length = strnlen(user_characters, user_length);
1871 return String(user_characters, measured_length);
1872}
1873
1874String Process::validate_and_copy_string_from_user(const Syscall::StringArgument& string) const
1875{
1876 return validate_and_copy_string_from_user(string.characters, string.length);
1877}
1878
1879int Process::sys$readlink(const Syscall::SC_readlink_params* user_params)
1880{
1881 REQUIRE_PROMISE(rpath);
1882
1883 Syscall::SC_readlink_params params;
1884 if (!validate_read_and_copy_typed(¶ms, user_params))
1885 return -EFAULT;
1886
1887 if (!validate(params.buffer))
1888 return -EFAULT;
1889
1890 auto path = get_syscall_path_argument(params.path);
1891 if (path.is_error())
1892 return path.error();
1893
1894 auto result = VFS::the().open(path.value(), O_RDONLY | O_NOFOLLOW_NOERROR, 0, current_directory());
1895 if (result.is_error())
1896 return result.error();
1897 auto description = result.value();
1898
1899 if (!description->metadata().is_symlink())
1900 return -EINVAL;
1901
1902 auto contents = description->read_entire_file();
1903 if (!contents)
1904 return -EIO; // FIXME: Get a more detailed error from VFS.
1905
1906 auto link_target = String::copy(contents);
1907 if (link_target.length() + 1 > params.buffer.size)
1908 return -ENAMETOOLONG;
1909 copy_to_user(params.buffer.data, link_target.characters(), link_target.length() + 1);
1910 return link_target.length() + 1;
1911}
1912
1913int Process::sys$chdir(const char* user_path, size_t path_length)
1914{
1915 REQUIRE_PROMISE(rpath);
1916 auto path = get_syscall_path_argument(user_path, path_length);
1917 if (path.is_error())
1918 return path.error();
1919 auto directory_or_error = VFS::the().open_directory(path.value(), current_directory());
1920 if (directory_or_error.is_error())
1921 return directory_or_error.error();
1922 m_cwd = *directory_or_error.value();
1923 return 0;
1924}
1925
1926int Process::sys$fchdir(int fd)
1927{
1928 REQUIRE_PROMISE(stdio);
1929 auto description = file_description(fd);
1930 if (!description)
1931 return -EBADF;
1932
1933 if (!description->is_directory())
1934 return -ENOTDIR;
1935
1936 if (!description->metadata().may_execute(*this))
1937 return -EACCES;
1938
1939 m_cwd = description->custody();
1940 return 0;
1941}
1942
1943int Process::sys$getcwd(char* buffer, ssize_t size)
1944{
1945 REQUIRE_PROMISE(rpath);
1946 if (size < 0)
1947 return -EINVAL;
1948 if (!validate_write(buffer, size))
1949 return -EFAULT;
1950 auto path = current_directory().absolute_path();
1951 if ((size_t)size < path.length() + 1)
1952 return -ERANGE;
1953 copy_to_user(buffer, path.characters(), path.length() + 1);
1954 return 0;
1955}
1956
1957int Process::number_of_open_file_descriptors() const
1958{
1959 int count = 0;
1960 for (auto& description : m_fds) {
1961 if (description)
1962 ++count;
1963 }
1964 return count;
1965}
1966
1967int Process::sys$open(const Syscall::SC_open_params* user_params)
1968{
1969 Syscall::SC_open_params params;
1970 if (!validate_read_and_copy_typed(¶ms, user_params))
1971 return -EFAULT;
1972
1973 int dirfd = params.dirfd;
1974 int options = params.options;
1975 u16 mode = params.mode;
1976
1977 if (options & O_NOFOLLOW_NOERROR)
1978 return -EINVAL;
1979
1980 if (options & O_UNLINK_INTERNAL)
1981 return -EINVAL;
1982
1983 if (options & O_WRONLY)
1984 REQUIRE_PROMISE(wpath);
1985 else if (options & O_RDONLY)
1986 REQUIRE_PROMISE(rpath);
1987
1988 if (options & O_CREAT)
1989 REQUIRE_PROMISE(cpath);
1990
1991 // Ignore everything except permission bits.
1992 mode &= 04777;
1993
1994 auto path = get_syscall_path_argument(params.path);
1995 if (path.is_error())
1996 return path.error();
1997#ifdef DEBUG_IO
1998 dbg() << "sys$open(dirfd=" << dirfd << ", path=\"" << path.value() << "\", options=" << options << ", mode=" << mode << ")";
1999#endif
2000 int fd = alloc_fd();
2001 if (fd < 0)
2002 return fd;
2003
2004 RefPtr<Custody> base;
2005 if (dirfd == AT_FDCWD) {
2006 base = current_directory();
2007 } else {
2008 auto base_description = file_description(dirfd);
2009 if (!base_description)
2010 return -EBADF;
2011 if (!base_description->is_directory())
2012 return -ENOTDIR;
2013 if (!base_description->custody())
2014 return -EINVAL;
2015 base = base_description->custody();
2016 }
2017
2018 auto result = VFS::the().open(path.value(), options, mode & ~umask(), *base);
2019 if (result.is_error())
2020 return result.error();
2021 auto description = result.value();
2022 u32 fd_flags = (options & O_CLOEXEC) ? FD_CLOEXEC : 0;
2023 m_fds[fd].set(move(description), fd_flags);
2024 return fd;
2025}
2026
2027int Process::alloc_fd(int first_candidate_fd)
2028{
2029 for (int i = first_candidate_fd; i < (int)m_max_open_file_descriptors; ++i) {
2030 if (!m_fds[i])
2031 return i;
2032 }
2033 return -EMFILE;
2034}
2035
2036int Process::sys$pipe(int pipefd[2], int flags)
2037{
2038 REQUIRE_PROMISE(stdio);
2039 if (!validate_write_typed(pipefd))
2040 return -EFAULT;
2041 if (number_of_open_file_descriptors() + 2 > max_open_file_descriptors())
2042 return -EMFILE;
2043 // Reject flags other than O_CLOEXEC.
2044 if ((flags & O_CLOEXEC) != flags)
2045 return -EINVAL;
2046
2047 u32 fd_flags = (flags & O_CLOEXEC) ? FD_CLOEXEC : 0;
2048 auto fifo = FIFO::create(m_uid);
2049
2050 int reader_fd = alloc_fd();
2051 m_fds[reader_fd].set(fifo->open_direction(FIFO::Direction::Reader), fd_flags);
2052 m_fds[reader_fd].description->set_readable(true);
2053 copy_to_user(&pipefd[0], &reader_fd);
2054
2055 int writer_fd = alloc_fd();
2056 m_fds[writer_fd].set(fifo->open_direction(FIFO::Direction::Writer), fd_flags);
2057 m_fds[writer_fd].description->set_writable(true);
2058 copy_to_user(&pipefd[1], &writer_fd);
2059
2060 return 0;
2061}
2062
2063int Process::sys$killpg(int pgrp, int signum)
2064{
2065 REQUIRE_PROMISE(proc);
2066 if (signum < 1 || signum >= 32)
2067 return -EINVAL;
2068 if (pgrp < 0)
2069 return -EINVAL;
2070
2071 InterruptDisabler disabler;
2072 return do_killpg(pgrp, signum);
2073}
2074
2075int Process::sys$setuid(uid_t uid)
2076{
2077 REQUIRE_PROMISE(id);
2078 if (uid != m_uid && !is_superuser())
2079 return -EPERM;
2080 m_uid = uid;
2081 m_euid = uid;
2082 return 0;
2083}
2084
2085int Process::sys$setgid(gid_t gid)
2086{
2087 REQUIRE_PROMISE(id);
2088 if (gid != m_gid && !is_superuser())
2089 return -EPERM;
2090 m_gid = gid;
2091 m_egid = gid;
2092 return 0;
2093}
2094
2095unsigned Process::sys$alarm(unsigned seconds)
2096{
2097 REQUIRE_PROMISE(stdio);
2098 unsigned previous_alarm_remaining = 0;
2099 if (m_alarm_deadline && m_alarm_deadline > g_uptime) {
2100 previous_alarm_remaining = (m_alarm_deadline - g_uptime) / TICKS_PER_SECOND;
2101 }
2102 if (!seconds) {
2103 m_alarm_deadline = 0;
2104 return previous_alarm_remaining;
2105 }
2106 m_alarm_deadline = g_uptime + seconds * TICKS_PER_SECOND;
2107 return previous_alarm_remaining;
2108}
2109
2110int Process::sys$uname(utsname* buf)
2111{
2112 REQUIRE_PROMISE(stdio);
2113 if (!validate_write_typed(buf))
2114 return -EFAULT;
2115 LOCKER(*s_hostname_lock);
2116 if (s_hostname->length() + 1 > sizeof(utsname::nodename))
2117 return -ENAMETOOLONG;
2118 copy_to_user(buf->sysname, "SerenityOS", 11);
2119 copy_to_user(buf->release, "1.0-dev", 8);
2120 copy_to_user(buf->version, "FIXME", 6);
2121 copy_to_user(buf->machine, "i686", 5);
2122 copy_to_user(buf->nodename, s_hostname->characters(), s_hostname->length() + 1);
2123 return 0;
2124}
2125
2126KResult Process::do_kill(Process& process, int signal)
2127{
2128 // FIXME: Allow sending SIGCONT to everyone in the process group.
2129 // FIXME: Should setuid processes have some special treatment here?
2130 if (!is_superuser() && m_euid != process.m_uid && m_uid != process.m_uid)
2131 return KResult(-EPERM);
2132 if (process.is_ring0() && signal == SIGKILL) {
2133 kprintf("%s(%u) attempted to send SIGKILL to ring 0 process %s(%u)\n", name().characters(), m_pid, process.name().characters(), process.pid());
2134 return KResult(-EPERM);
2135 }
2136 if (signal != 0)
2137 process.send_signal(signal, this);
2138 return KSuccess;
2139}
2140
2141KResult Process::do_killpg(pid_t pgrp, int signal)
2142{
2143 ASSERT(pgrp >= 0);
2144
2145 // Send the signal to all processes in the given group.
2146 if (pgrp == 0) {
2147 // Send the signal to our own pgrp.
2148 pgrp = pgid();
2149 }
2150
2151 bool group_was_empty = true;
2152 bool any_succeeded = false;
2153 KResult error = KSuccess;
2154
2155 Process::for_each_in_pgrp(pgrp, [&](auto& process) {
2156 group_was_empty = false;
2157
2158 KResult res = do_kill(process, signal);
2159 if (res.is_success())
2160 any_succeeded = true;
2161 else
2162 error = res;
2163
2164 return IterationDecision::Continue;
2165 });
2166
2167 if (group_was_empty)
2168 return KResult(-ESRCH);
2169 if (any_succeeded)
2170 return KSuccess;
2171 return error;
2172}
2173
2174int Process::sys$kill(pid_t pid, int signal)
2175{
2176 if (pid == m_pid)
2177 REQUIRE_PROMISE(stdio);
2178 else
2179 REQUIRE_PROMISE(proc);
2180
2181 if (signal < 0 || signal >= 32)
2182 return -EINVAL;
2183 if (pid <= 0)
2184 return do_killpg(-pid, signal);
2185 if (pid == -1) {
2186 // FIXME: Send to all processes.
2187 return -ENOTIMPL;
2188 }
2189 if (pid == m_pid) {
2190 if (signal == 0)
2191 return 0;
2192 if (!Thread::current->should_ignore_signal(signal)) {
2193 Thread::current->send_signal(signal, this);
2194 (void)Thread::current->block<Thread::SemiPermanentBlocker>(Thread::SemiPermanentBlocker::Reason::Signal);
2195 }
2196 return 0;
2197 }
2198 InterruptDisabler disabler;
2199 auto* peer = Process::from_pid(pid);
2200 if (!peer)
2201 return -ESRCH;
2202 return do_kill(*peer, signal);
2203}
2204
2205int Process::sys$usleep(useconds_t usec)
2206{
2207 REQUIRE_PROMISE(stdio);
2208 if (!usec)
2209 return 0;
2210 u64 wakeup_time = Thread::current->sleep(usec / 1000);
2211 if (wakeup_time > g_uptime)
2212 return -EINTR;
2213 return 0;
2214}
2215
2216int Process::sys$sleep(unsigned seconds)
2217{
2218 REQUIRE_PROMISE(stdio);
2219 if (!seconds)
2220 return 0;
2221 u64 wakeup_time = Thread::current->sleep(seconds * TICKS_PER_SECOND);
2222 if (wakeup_time > g_uptime) {
2223 u32 ticks_left_until_original_wakeup_time = wakeup_time - g_uptime;
2224 return ticks_left_until_original_wakeup_time / TICKS_PER_SECOND;
2225 }
2226 return 0;
2227}
2228
2229timeval kgettimeofday()
2230{
2231 return const_cast<const timeval&>(((KernelInfoPage*)s_info_page_address_for_kernel.as_ptr())->now);
2232}
2233
2234void kgettimeofday(timeval& tv)
2235{
2236 tv = kgettimeofday();
2237}
2238
2239int Process::sys$gettimeofday(timeval* tv)
2240{
2241 REQUIRE_PROMISE(stdio);
2242 if (!validate_write_typed(tv))
2243 return -EFAULT;
2244 *tv = kgettimeofday();
2245 return 0;
2246}
2247
2248uid_t Process::sys$getuid()
2249{
2250 REQUIRE_PROMISE(stdio);
2251 return m_uid;
2252}
2253
2254gid_t Process::sys$getgid()
2255{
2256 REQUIRE_PROMISE(stdio);
2257 return m_gid;
2258}
2259
2260uid_t Process::sys$geteuid()
2261{
2262 REQUIRE_PROMISE(stdio);
2263 return m_euid;
2264}
2265
2266gid_t Process::sys$getegid()
2267{
2268 REQUIRE_PROMISE(stdio);
2269 return m_egid;
2270}
2271
2272pid_t Process::sys$getpid()
2273{
2274 REQUIRE_PROMISE(stdio);
2275 return m_pid;
2276}
2277
2278pid_t Process::sys$getppid()
2279{
2280 REQUIRE_PROMISE(stdio);
2281 return m_ppid;
2282}
2283
2284mode_t Process::sys$umask(mode_t mask)
2285{
2286 REQUIRE_PROMISE(stdio);
2287 auto old_mask = m_umask;
2288 m_umask = mask & 0777;
2289 return old_mask;
2290}
2291
2292siginfo_t Process::reap(Process& process)
2293{
2294 siginfo_t siginfo;
2295 memset(&siginfo, 0, sizeof(siginfo));
2296 siginfo.si_signo = SIGCHLD;
2297 siginfo.si_pid = process.pid();
2298 siginfo.si_uid = process.uid();
2299
2300 if (process.m_termination_signal) {
2301 siginfo.si_status = process.m_termination_signal;
2302 siginfo.si_code = CLD_KILLED;
2303 } else {
2304 siginfo.si_status = process.m_termination_status;
2305 siginfo.si_code = CLD_EXITED;
2306 }
2307
2308 {
2309 InterruptDisabler disabler;
2310
2311 if (process.ppid()) {
2312 auto* parent = Process::from_pid(process.ppid());
2313 if (parent) {
2314 parent->m_ticks_in_user_for_dead_children += process.m_ticks_in_user + process.m_ticks_in_user_for_dead_children;
2315 parent->m_ticks_in_kernel_for_dead_children += process.m_ticks_in_kernel + process.m_ticks_in_kernel_for_dead_children;
2316 }
2317 }
2318
2319#ifdef PROCESS_DEBUG
2320 dbg() << "Reaping process " << process;
2321#endif
2322 ASSERT(process.is_dead());
2323 g_processes->remove(&process);
2324 }
2325 delete &process;
2326 return siginfo;
2327}
2328
2329KResultOr<siginfo_t> Process::do_waitid(idtype_t idtype, int id, int options)
2330{
2331 if (idtype == P_PID) {
2332 InterruptDisabler disabler;
2333 if (idtype == P_PID && !Process::from_pid(id))
2334 return KResult(-ECHILD);
2335 }
2336
2337 if (options & WNOHANG) {
2338 // FIXME: Figure out what WNOHANG should do with stopped children.
2339 if (idtype == P_ALL) {
2340 InterruptDisabler disabler;
2341 siginfo_t siginfo;
2342 memset(&siginfo, 0, sizeof(siginfo));
2343 for_each_child([&siginfo](Process& process) {
2344 if (process.is_dead())
2345 siginfo = reap(process);
2346 return IterationDecision::Continue;
2347 });
2348 return siginfo;
2349 } else if (idtype == P_PID) {
2350 InterruptDisabler disabler;
2351 auto* waitee_process = Process::from_pid(id);
2352 if (!waitee_process)
2353 return KResult(-ECHILD);
2354 if (waitee_process->is_dead())
2355 return reap(*waitee_process);
2356 } else {
2357 // FIXME: Implement other PID specs.
2358 return KResult(-EINVAL);
2359 }
2360 }
2361
2362 pid_t waitee_pid;
2363
2364 // FIXME: WaitBlocker should support idtype/id specs directly.
2365 if (idtype == P_ALL) {
2366 waitee_pid = -1;
2367 } else if (idtype == P_PID) {
2368 waitee_pid = id;
2369 } else {
2370 // FIXME: Implement other PID specs.
2371 return KResult(-EINVAL);
2372 }
2373
2374 if (Thread::current->block<Thread::WaitBlocker>(options, waitee_pid) != Thread::BlockResult::WokeNormally)
2375 return KResult(-EINTR);
2376
2377 InterruptDisabler disabler;
2378
2379 // NOTE: If waitee was -1, m_waitee_pid will have been filled in by the scheduler.
2380 Process* waitee_process = Process::from_pid(waitee_pid);
2381 if (!waitee_process)
2382 return KResult(-ECHILD);
2383
2384 ASSERT(waitee_process);
2385 if (waitee_process->is_dead()) {
2386 return reap(*waitee_process);
2387 } else {
2388 auto* waitee_thread = Thread::from_tid(waitee_pid);
2389 if (!waitee_thread)
2390 return KResult(-ECHILD);
2391 ASSERT(waitee_thread->state() == Thread::State::Stopped);
2392 siginfo_t siginfo;
2393 memset(&siginfo, 0, sizeof(siginfo));
2394 siginfo.si_signo = SIGCHLD;
2395 siginfo.si_pid = waitee_process->pid();
2396 siginfo.si_uid = waitee_process->uid();
2397 siginfo.si_status = CLD_STOPPED;
2398 siginfo.si_code = waitee_thread->m_stop_signal;
2399 return siginfo;
2400 }
2401}
2402
2403pid_t Process::sys$waitid(const Syscall::SC_waitid_params* user_params)
2404{
2405 REQUIRE_PROMISE(stdio);
2406
2407 Syscall::SC_waitid_params params;
2408 if (!validate_read_and_copy_typed(¶ms, user_params))
2409 return -EFAULT;
2410
2411 if (!validate_write_typed(params.infop))
2412 return -EFAULT;
2413
2414#ifdef PROCESS_DEBUG
2415 dbg() << "sys$waitid(" << params.idtype << ", " << params.id << ", " << params.infop << ", " << params.options << ")";
2416#endif
2417
2418 auto siginfo_or_error = do_waitid(static_cast<idtype_t>(params.idtype), params.id, params.options);
2419 if (siginfo_or_error.is_error())
2420 return siginfo_or_error.error();
2421
2422 copy_to_user(params.infop, &siginfo_or_error.value());
2423 return 0;
2424}
2425
2426bool Process::validate_read_from_kernel(VirtualAddress vaddr, size_t size) const
2427{
2428 if (vaddr.is_null())
2429 return false;
2430 return MM.validate_kernel_read(*this, vaddr, size);
2431}
2432
2433bool Process::validate_read(const void* address, size_t size) const
2434{
2435 if (!size)
2436 return false;
2437 return MM.validate_user_read(*this, VirtualAddress(address), size);
2438}
2439
2440bool Process::validate_write(void* address, size_t size) const
2441{
2442 if (!size)
2443 return false;
2444 return MM.validate_user_write(*this, VirtualAddress(address), size);
2445}
2446
2447pid_t Process::sys$getsid(pid_t pid)
2448{
2449 REQUIRE_PROMISE(stdio);
2450 if (pid == 0)
2451 return m_sid;
2452 InterruptDisabler disabler;
2453 auto* process = Process::from_pid(pid);
2454 if (!process)
2455 return -ESRCH;
2456 if (m_sid != process->m_sid)
2457 return -EPERM;
2458 return process->m_sid;
2459}
2460
2461pid_t Process::sys$setsid()
2462{
2463 REQUIRE_PROMISE(proc);
2464 InterruptDisabler disabler;
2465 bool found_process_with_same_pgid_as_my_pid = false;
2466 Process::for_each_in_pgrp(pid(), [&](auto&) {
2467 found_process_with_same_pgid_as_my_pid = true;
2468 return IterationDecision::Break;
2469 });
2470 if (found_process_with_same_pgid_as_my_pid)
2471 return -EPERM;
2472 m_sid = m_pid;
2473 m_pgid = m_pid;
2474 m_tty = nullptr;
2475 return m_sid;
2476}
2477
2478pid_t Process::sys$getpgid(pid_t pid)
2479{
2480 REQUIRE_PROMISE(stdio);
2481 if (pid == 0)
2482 return m_pgid;
2483 InterruptDisabler disabler; // FIXME: Use a ProcessHandle
2484 auto* process = Process::from_pid(pid);
2485 if (!process)
2486 return -ESRCH;
2487 return process->m_pgid;
2488}
2489
2490pid_t Process::sys$getpgrp()
2491{
2492 REQUIRE_PROMISE(stdio);
2493 return m_pgid;
2494}
2495
2496static pid_t get_sid_from_pgid(pid_t pgid)
2497{
2498 InterruptDisabler disabler;
2499 auto* group_leader = Process::from_pid(pgid);
2500 if (!group_leader)
2501 return -1;
2502 return group_leader->sid();
2503}
2504
2505int Process::sys$setpgid(pid_t specified_pid, pid_t specified_pgid)
2506{
2507 REQUIRE_PROMISE(proc);
2508 InterruptDisabler disabler; // FIXME: Use a ProcessHandle
2509 pid_t pid = specified_pid ? specified_pid : m_pid;
2510 if (specified_pgid < 0) {
2511 // The value of the pgid argument is less than 0, or is not a value supported by the implementation.
2512 return -EINVAL;
2513 }
2514 auto* process = Process::from_pid(pid);
2515 if (!process)
2516 return -ESRCH;
2517 if (process != this && process->ppid() != m_pid) {
2518 // The value of the pid argument does not match the process ID
2519 // of the calling process or of a child process of the calling process.
2520 return -ESRCH;
2521 }
2522 if (process->pid() == process->sid()) {
2523 // The process indicated by the pid argument is a session leader.
2524 return -EPERM;
2525 }
2526 if (process->ppid() == m_pid && process->sid() != sid()) {
2527 // The value of the pid argument matches the process ID of a child
2528 // process of the calling process and the child process is not in
2529 // the same session as the calling process.
2530 return -EPERM;
2531 }
2532
2533 pid_t new_pgid = specified_pgid ? specified_pgid : process->m_pid;
2534 pid_t current_sid = get_sid_from_pgid(process->m_pgid);
2535 pid_t new_sid = get_sid_from_pgid(new_pgid);
2536 if (current_sid != new_sid) {
2537 // Can't move a process between sessions.
2538 return -EPERM;
2539 }
2540 // FIXME: There are more EPERM conditions to check for here..
2541 process->m_pgid = new_pgid;
2542 return 0;
2543}
2544
2545int Process::sys$ioctl(int fd, unsigned request, unsigned arg)
2546{
2547 auto description = file_description(fd);
2548 if (!description)
2549 return -EBADF;
2550 SmapDisabler disabler;
2551 return description->file().ioctl(*description, request, arg);
2552}
2553
2554int Process::sys$getdtablesize()
2555{
2556 REQUIRE_PROMISE(stdio);
2557 return m_max_open_file_descriptors;
2558}
2559
2560int Process::sys$dup(int old_fd)
2561{
2562 REQUIRE_PROMISE(stdio);
2563 auto description = file_description(old_fd);
2564 if (!description)
2565 return -EBADF;
2566 int new_fd = alloc_fd();
2567 if (new_fd < 0)
2568 return new_fd;
2569 m_fds[new_fd].set(*description);
2570 return new_fd;
2571}
2572
2573int Process::sys$dup2(int old_fd, int new_fd)
2574{
2575 REQUIRE_PROMISE(stdio);
2576 auto description = file_description(old_fd);
2577 if (!description)
2578 return -EBADF;
2579 if (new_fd < 0 || new_fd >= m_max_open_file_descriptors)
2580 return -EINVAL;
2581 m_fds[new_fd].set(*description);
2582 return new_fd;
2583}
2584
2585int Process::sys$sigprocmask(int how, const sigset_t* set, sigset_t* old_set)
2586{
2587 REQUIRE_PROMISE(stdio);
2588 if (old_set) {
2589 if (!validate_write_typed(old_set))
2590 return -EFAULT;
2591 copy_to_user(old_set, &Thread::current->m_signal_mask);
2592 }
2593 if (set) {
2594 if (!validate_read_typed(set))
2595 return -EFAULT;
2596 sigset_t set_value;
2597 copy_from_user(&set_value, set);
2598 switch (how) {
2599 case SIG_BLOCK:
2600 Thread::current->m_signal_mask &= ~set_value;
2601 break;
2602 case SIG_UNBLOCK:
2603 Thread::current->m_signal_mask |= set_value;
2604 break;
2605 case SIG_SETMASK:
2606 Thread::current->m_signal_mask = set_value;
2607 break;
2608 default:
2609 return -EINVAL;
2610 }
2611 }
2612 return 0;
2613}
2614
2615int Process::sys$sigpending(sigset_t* set)
2616{
2617 REQUIRE_PROMISE(stdio);
2618 if (!validate_write_typed(set))
2619 return -EFAULT;
2620 copy_to_user(set, &Thread::current->m_pending_signals);
2621 return 0;
2622}
2623
2624int Process::sys$sigaction(int signum, const sigaction* act, sigaction* old_act)
2625{
2626 REQUIRE_PROMISE(stdio);
2627 if (signum < 1 || signum >= 32 || signum == SIGKILL || signum == SIGSTOP)
2628 return -EINVAL;
2629 if (!validate_read_typed(act))
2630 return -EFAULT;
2631 InterruptDisabler disabler; // FIXME: This should use a narrower lock. Maybe a way to ignore signals temporarily?
2632 auto& action = Thread::current->m_signal_action_data[signum];
2633 if (old_act) {
2634 if (!validate_write_typed(old_act))
2635 return -EFAULT;
2636 copy_to_user(&old_act->sa_flags, &action.flags);
2637 copy_to_user(&old_act->sa_sigaction, &action.handler_or_sigaction, sizeof(action.handler_or_sigaction));
2638 }
2639 copy_from_user(&action.flags, &act->sa_flags);
2640 copy_from_user(&action.handler_or_sigaction, &act->sa_sigaction, sizeof(action.handler_or_sigaction));
2641 return 0;
2642}
2643
2644int Process::sys$getgroups(ssize_t count, gid_t* user_gids)
2645{
2646 REQUIRE_PROMISE(stdio);
2647 if (count < 0)
2648 return -EINVAL;
2649 if (!count)
2650 return m_extra_gids.size();
2651 if (count != (int)m_extra_gids.size())
2652 return -EINVAL;
2653 if (!validate_write_typed(user_gids, m_extra_gids.size()))
2654 return -EFAULT;
2655
2656 Vector<gid_t> gids;
2657 for (auto gid : m_extra_gids)
2658 gids.append(gid);
2659
2660 copy_to_user(user_gids, gids.data(), sizeof(gid_t) * count);
2661 return 0;
2662}
2663
2664int Process::sys$setgroups(ssize_t count, const gid_t* user_gids)
2665{
2666 REQUIRE_PROMISE(id);
2667 if (count < 0)
2668 return -EINVAL;
2669 if (!is_superuser())
2670 return -EPERM;
2671 if (count && !validate_read(user_gids, count))
2672 return -EFAULT;
2673
2674 Vector<gid_t> gids;
2675 gids.resize(count);
2676 copy_from_user(gids.data(), user_gids, sizeof(gid_t) * count);
2677
2678 HashTable<gid_t> unique_extra_gids;
2679 for (auto& gid : gids) {
2680 if (gid != m_gid)
2681 unique_extra_gids.set(gid);
2682 }
2683
2684 m_extra_gids.resize(unique_extra_gids.size());
2685 size_t i = 0;
2686 for (auto& gid : unique_extra_gids) {
2687 if (gid == m_gid)
2688 continue;
2689 m_extra_gids[i++] = gid;
2690 }
2691 return 0;
2692}
2693
2694int Process::sys$mkdir(const char* user_path, size_t path_length, mode_t mode)
2695{
2696 REQUIRE_PROMISE(cpath);
2697 auto path = get_syscall_path_argument(user_path, path_length);
2698 if (path.is_error())
2699 return path.error();
2700 return VFS::the().mkdir(path.value(), mode & ~umask(), current_directory());
2701}
2702
2703int Process::sys$realpath(const Syscall::SC_realpath_params* user_params)
2704{
2705 REQUIRE_PROMISE(rpath);
2706
2707 Syscall::SC_realpath_params params;
2708 if (!validate_read_and_copy_typed(¶ms, user_params))
2709 return -EFAULT;
2710
2711 if (!validate_write(params.buffer.data, params.buffer.size))
2712 return -EFAULT;
2713
2714 auto path = get_syscall_path_argument(params.path);
2715 if (path.is_error())
2716 return path.error();
2717
2718 auto custody_or_error = VFS::the().resolve_path(path.value(), current_directory());
2719 if (custody_or_error.is_error())
2720 return custody_or_error.error();
2721 auto& custody = custody_or_error.value();
2722 auto absolute_path = custody->absolute_path();
2723
2724 if (absolute_path.length() + 1 > params.buffer.size)
2725 return -ENAMETOOLONG;
2726
2727 copy_to_user(params.buffer.data, absolute_path.characters(), absolute_path.length() + 1);
2728 return 0;
2729};
2730
2731clock_t Process::sys$times(tms* times)
2732{
2733 REQUIRE_PROMISE(stdio);
2734 if (!validate_write_typed(times))
2735 return -EFAULT;
2736 copy_to_user(×->tms_utime, &m_ticks_in_user);
2737 copy_to_user(×->tms_stime, &m_ticks_in_kernel);
2738 copy_to_user(×->tms_cutime, &m_ticks_in_user_for_dead_children);
2739 copy_to_user(×->tms_cstime, &m_ticks_in_kernel_for_dead_children);
2740 return g_uptime & 0x7fffffff;
2741}
2742
2743int Process::sys$select(const Syscall::SC_select_params* params)
2744{
2745 REQUIRE_PROMISE(stdio);
2746 // FIXME: Return -EINVAL if timeout is invalid.
2747 if (!validate_read_typed(params))
2748 return -EFAULT;
2749
2750 SmapDisabler disabler;
2751
2752 int nfds = params->nfds;
2753 fd_set* readfds = params->readfds;
2754 fd_set* writefds = params->writefds;
2755 fd_set* exceptfds = params->exceptfds;
2756 timeval* timeout = params->timeout;
2757
2758 if (writefds && !validate_write_typed(writefds))
2759 return -EFAULT;
2760 if (readfds && !validate_write_typed(readfds))
2761 return -EFAULT;
2762 if (exceptfds && !validate_write_typed(exceptfds))
2763 return -EFAULT;
2764 if (timeout && !validate_read_typed(timeout))
2765 return -EFAULT;
2766 if (nfds < 0)
2767 return -EINVAL;
2768
2769 timeval computed_timeout;
2770 bool select_has_timeout = false;
2771 if (timeout && (timeout->tv_sec || timeout->tv_usec)) {
2772 timeval_add(kgettimeofday(), *timeout, computed_timeout);
2773 select_has_timeout = true;
2774 }
2775
2776 Thread::SelectBlocker::FDVector rfds;
2777 Thread::SelectBlocker::FDVector wfds;
2778 Thread::SelectBlocker::FDVector efds;
2779
2780 auto transfer_fds = [&](auto* fds, auto& vector) -> int {
2781 vector.clear_with_capacity();
2782 if (!fds)
2783 return 0;
2784 for (int fd = 0; fd < nfds; ++fd) {
2785 if (FD_ISSET(fd, fds)) {
2786 if (!file_description(fd)) {
2787 dbg() << "sys$select: Bad fd number " << fd;
2788 return -EBADF;
2789 }
2790 vector.append(fd);
2791 }
2792 }
2793 return 0;
2794 };
2795 if (int error = transfer_fds(writefds, wfds))
2796 return error;
2797 if (int error = transfer_fds(readfds, rfds))
2798 return error;
2799 if (int error = transfer_fds(exceptfds, efds))
2800 return error;
2801
2802#if defined(DEBUG_IO) || defined(DEBUG_POLL_SELECT)
2803 dbgprintf("%s<%u> selecting on (read:%u, write:%u), timeout=%p\n", name().characters(), pid(), rfds.size(), wfds.size(), timeout);
2804#endif
2805
2806 if (!timeout || select_has_timeout) {
2807 if (Thread::current->block<Thread::SelectBlocker>(computed_timeout, select_has_timeout, rfds, wfds, efds) != Thread::BlockResult::WokeNormally)
2808 return -EINTR;
2809 }
2810
2811 int marked_fd_count = 0;
2812 auto mark_fds = [&](auto* fds, auto& vector, auto should_mark) {
2813 if (!fds)
2814 return;
2815 FD_ZERO(fds);
2816 for (int fd : vector) {
2817 if (auto description = file_description(fd); description && should_mark(*description)) {
2818 FD_SET(fd, fds);
2819 ++marked_fd_count;
2820 }
2821 }
2822 };
2823 mark_fds(readfds, rfds, [](auto& description) { return description.can_read(); });
2824 mark_fds(writefds, wfds, [](auto& description) { return description.can_write(); });
2825 // FIXME: We should also mark exceptfds as appropriate.
2826
2827 return marked_fd_count;
2828}
2829
2830int Process::sys$poll(pollfd* fds, int nfds, int timeout)
2831{
2832 REQUIRE_PROMISE(stdio);
2833 if (!validate_read_typed(fds))
2834 return -EFAULT;
2835
2836 SmapDisabler disabler;
2837
2838 Thread::SelectBlocker::FDVector rfds;
2839 Thread::SelectBlocker::FDVector wfds;
2840
2841 for (int i = 0; i < nfds; ++i) {
2842 if (fds[i].events & POLLIN)
2843 rfds.append(fds[i].fd);
2844 if (fds[i].events & POLLOUT)
2845 wfds.append(fds[i].fd);
2846 }
2847
2848 timeval actual_timeout;
2849 bool has_timeout = false;
2850 if (timeout >= 0) {
2851 // poll is in ms, we want s/us.
2852 struct timeval tvtimeout;
2853 tvtimeout.tv_sec = 0;
2854 while (timeout >= 1000) {
2855 tvtimeout.tv_sec += 1;
2856 timeout -= 1000;
2857 }
2858 tvtimeout.tv_usec = timeout * 1000;
2859 timeval_add(kgettimeofday(), tvtimeout, actual_timeout);
2860 has_timeout = true;
2861 }
2862
2863#if defined(DEBUG_IO) || defined(DEBUG_POLL_SELECT)
2864 dbgprintf("%s<%u> polling on (read:%u, write:%u), timeout=%d\n", name().characters(), pid(), rfds.size(), wfds.size(), timeout);
2865#endif
2866
2867 if (has_timeout || timeout < 0) {
2868 if (Thread::current->block<Thread::SelectBlocker>(actual_timeout, has_timeout, rfds, wfds, Thread::SelectBlocker::FDVector()) != Thread::BlockResult::WokeNormally)
2869 return -EINTR;
2870 }
2871
2872 int fds_with_revents = 0;
2873
2874 for (int i = 0; i < nfds; ++i) {
2875 auto description = file_description(fds[i].fd);
2876 if (!description) {
2877 fds[i].revents = POLLNVAL;
2878 continue;
2879 }
2880 fds[i].revents = 0;
2881 if (fds[i].events & POLLIN && description->can_read())
2882 fds[i].revents |= POLLIN;
2883 if (fds[i].events & POLLOUT && description->can_write())
2884 fds[i].revents |= POLLOUT;
2885
2886 if (fds[i].revents)
2887 ++fds_with_revents;
2888 }
2889
2890 return fds_with_revents;
2891}
2892
2893Custody& Process::current_directory()
2894{
2895 if (!m_cwd)
2896 m_cwd = VFS::the().root_custody();
2897 return *m_cwd;
2898}
2899
2900int Process::sys$link(const Syscall::SC_link_params* user_params)
2901{
2902 REQUIRE_PROMISE(cpath);
2903 Syscall::SC_link_params params;
2904 if (!validate_read_and_copy_typed(¶ms, user_params))
2905 return -EFAULT;
2906 auto old_path = validate_and_copy_string_from_user(params.old_path);
2907 auto new_path = validate_and_copy_string_from_user(params.new_path);
2908 if (old_path.is_null() || new_path.is_null())
2909 return -EFAULT;
2910 return VFS::the().link(old_path, new_path, current_directory());
2911}
2912
2913int Process::sys$unlink(const char* user_path, size_t path_length)
2914{
2915 REQUIRE_PROMISE(cpath);
2916 if (!validate_read(user_path, path_length))
2917 return -EFAULT;
2918 auto path = get_syscall_path_argument(user_path, path_length);
2919 if (path.is_error())
2920 return path.error();
2921 return VFS::the().unlink(path.value(), current_directory());
2922}
2923
2924int Process::sys$symlink(const Syscall::SC_symlink_params* user_params)
2925{
2926 REQUIRE_PROMISE(cpath);
2927 Syscall::SC_symlink_params params;
2928 if (!validate_read_and_copy_typed(¶ms, user_params))
2929 return -EFAULT;
2930 auto target = get_syscall_path_argument(params.target);
2931 if (target.is_error())
2932 return target.error();
2933 auto linkpath = get_syscall_path_argument(params.linkpath);
2934 if (linkpath.is_error())
2935 return linkpath.error();
2936 return VFS::the().symlink(target.value(), linkpath.value(), current_directory());
2937}
2938
2939KResultOr<String> Process::get_syscall_path_argument(const char* user_path, size_t path_length) const
2940{
2941 if (path_length == 0)
2942 return KResult(-EINVAL);
2943 if (path_length > PATH_MAX)
2944 return KResult(-ENAMETOOLONG);
2945 if (!validate_read(user_path, path_length))
2946 return KResult(-EFAULT);
2947 return copy_string_from_user(user_path, path_length);
2948}
2949
2950KResultOr<String> Process::get_syscall_path_argument(const Syscall::StringArgument& path) const
2951{
2952 return get_syscall_path_argument(path.characters, path.length);
2953}
2954
2955int Process::sys$rmdir(const char* user_path, size_t path_length)
2956{
2957 REQUIRE_PROMISE(cpath);
2958 auto path = get_syscall_path_argument(user_path, path_length);
2959 if (path.is_error())
2960 return path.error();
2961 return VFS::the().rmdir(path.value(), current_directory());
2962}
2963
2964int Process::sys$chmod(const char* user_path, size_t path_length, mode_t mode)
2965{
2966 REQUIRE_PROMISE(fattr);
2967 auto path = get_syscall_path_argument(user_path, path_length);
2968 if (path.is_error())
2969 return path.error();
2970 return VFS::the().chmod(path.value(), mode, current_directory());
2971}
2972
2973int Process::sys$fchmod(int fd, mode_t mode)
2974{
2975 REQUIRE_PROMISE(fattr);
2976 auto description = file_description(fd);
2977 if (!description)
2978 return -EBADF;
2979 return description->chmod(mode);
2980}
2981
2982int Process::sys$fchown(int fd, uid_t uid, gid_t gid)
2983{
2984 REQUIRE_PROMISE(chown);
2985 auto description = file_description(fd);
2986 if (!description)
2987 return -EBADF;
2988 return description->chown(uid, gid);
2989}
2990
2991int Process::sys$chown(const Syscall::SC_chown_params* user_params)
2992{
2993 REQUIRE_PROMISE(chown);
2994 Syscall::SC_chown_params params;
2995 if (!validate_read_and_copy_typed(¶ms, user_params))
2996 return -EFAULT;
2997 auto path = get_syscall_path_argument(params.path);
2998 if (path.is_error())
2999 return path.error();
3000 return VFS::the().chown(path.value(), params.uid, params.gid, current_directory());
3001}
3002
3003void Process::finalize()
3004{
3005 ASSERT(Thread::current == g_finalizer);
3006#ifdef PROCESS_DEBUG
3007 dbg() << "Finalizing process " << *this;
3008#endif
3009
3010 if (m_perf_event_buffer) {
3011 auto description_or_error = VFS::the().open("perfcore", O_CREAT | O_EXCL, 0400, current_directory(), UidAndGid { m_uid, m_gid });
3012 if (!description_or_error.is_error()) {
3013 auto& description = description_or_error.value();
3014 auto json = m_perf_event_buffer->to_json(m_pid, m_executable ? m_executable->absolute_path() : "");
3015 description->write(json.data(), json.size());
3016 }
3017 }
3018
3019 m_fds.clear();
3020 m_tty = nullptr;
3021 m_executable = nullptr;
3022 m_cwd = nullptr;
3023 m_root_directory = nullptr;
3024 m_root_directory_relative_to_global_root = nullptr;
3025 m_elf_loader = nullptr;
3026
3027 disown_all_shared_buffers();
3028 {
3029 InterruptDisabler disabler;
3030 if (auto* parent_thread = Thread::from_tid(m_ppid)) {
3031 if (parent_thread->m_signal_action_data[SIGCHLD].flags & SA_NOCLDWAIT) {
3032 // NOTE: If the parent doesn't care about this process, let it go.
3033 m_ppid = 0;
3034 } else {
3035 parent_thread->send_signal(SIGCHLD, this);
3036 }
3037 }
3038 }
3039
3040 m_regions.clear();
3041
3042 m_dead = true;
3043}
3044
3045void Process::die()
3046{
3047 // Let go of the TTY, otherwise a slave PTY may keep the master PTY from
3048 // getting an EOF when the last process using the slave PTY dies.
3049 // If the master PTY owner relies on an EOF to know when to wait() on a
3050 // slave owner, we have to allow the PTY pair to be torn down.
3051 m_tty = nullptr;
3052
3053 if (m_tracer)
3054 m_tracer->set_dead();
3055
3056 {
3057 // Tell the threads to unwind and die.
3058 InterruptDisabler disabler;
3059 for_each_thread([](Thread& thread) {
3060 thread.set_should_die();
3061 return IterationDecision::Continue;
3062 });
3063 }
3064}
3065
3066size_t Process::amount_dirty_private() const
3067{
3068 // FIXME: This gets a bit more complicated for Regions sharing the same underlying VMObject.
3069 // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping.
3070 // That's probably a situation that needs to be looked at in general.
3071 size_t amount = 0;
3072 for (auto& region : m_regions) {
3073 if (!region.is_shared())
3074 amount += region.amount_dirty();
3075 }
3076 return amount;
3077}
3078
3079size_t Process::amount_clean_inode() const
3080{
3081 HashTable<const InodeVMObject*> vmobjects;
3082 for (auto& region : m_regions) {
3083 if (region.vmobject().is_inode())
3084 vmobjects.set(&static_cast<const InodeVMObject&>(region.vmobject()));
3085 }
3086 size_t amount = 0;
3087 for (auto& vmobject : vmobjects)
3088 amount += vmobject->amount_clean();
3089 return amount;
3090}
3091
3092size_t Process::amount_virtual() const
3093{
3094 size_t amount = 0;
3095 for (auto& region : m_regions) {
3096 amount += region.size();
3097 }
3098 return amount;
3099}
3100
3101size_t Process::amount_resident() const
3102{
3103 // FIXME: This will double count if multiple regions use the same physical page.
3104 size_t amount = 0;
3105 for (auto& region : m_regions) {
3106 amount += region.amount_resident();
3107 }
3108 return amount;
3109}
3110
3111size_t Process::amount_shared() const
3112{
3113 // FIXME: This will double count if multiple regions use the same physical page.
3114 // FIXME: It doesn't work at the moment, since it relies on PhysicalPage ref counts,
3115 // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored
3116 // so that every Region contributes +1 ref to each of its PhysicalPages.
3117 size_t amount = 0;
3118 for (auto& region : m_regions) {
3119 amount += region.amount_shared();
3120 }
3121 return amount;
3122}
3123
3124size_t Process::amount_purgeable_volatile() const
3125{
3126 size_t amount = 0;
3127 for (auto& region : m_regions) {
3128 if (region.vmobject().is_purgeable() && static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile())
3129 amount += region.amount_resident();
3130 }
3131 return amount;
3132}
3133
3134size_t Process::amount_purgeable_nonvolatile() const
3135{
3136 size_t amount = 0;
3137 for (auto& region : m_regions) {
3138 if (region.vmobject().is_purgeable() && !static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile())
3139 amount += region.amount_resident();
3140 }
3141 return amount;
3142}
3143
3144#define REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(domain) \
3145 do { \
3146 if (domain == AF_INET) \
3147 REQUIRE_PROMISE(inet); \
3148 else if (domain == AF_LOCAL) \
3149 REQUIRE_PROMISE(unix); \
3150 } while (0)
3151
3152int Process::sys$socket(int domain, int type, int protocol)
3153{
3154 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(domain);
3155
3156 if ((type & SOCK_TYPE_MASK) == SOCK_RAW && !is_superuser())
3157 return -EACCES;
3158 int fd = alloc_fd();
3159 if (fd < 0)
3160 return fd;
3161 auto result = Socket::create(domain, type, protocol);
3162 if (result.is_error())
3163 return result.error();
3164 auto description = FileDescription::create(*result.value());
3165 description->set_readable(true);
3166 description->set_writable(true);
3167 unsigned flags = 0;
3168 if (type & SOCK_CLOEXEC)
3169 flags |= FD_CLOEXEC;
3170 if (type & SOCK_NONBLOCK)
3171 description->set_blocking(false);
3172 m_fds[fd].set(move(description), flags);
3173 return fd;
3174}
3175
3176int Process::sys$bind(int sockfd, const sockaddr* address, socklen_t address_length)
3177{
3178 if (!validate_read(address, address_length))
3179 return -EFAULT;
3180 auto description = file_description(sockfd);
3181 if (!description)
3182 return -EBADF;
3183 if (!description->is_socket())
3184 return -ENOTSOCK;
3185 auto& socket = *description->socket();
3186 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3187 return socket.bind(address, address_length);
3188}
3189
3190int Process::sys$listen(int sockfd, int backlog)
3191{
3192 if (backlog < 0)
3193 return -EINVAL;
3194 auto description = file_description(sockfd);
3195 if (!description)
3196 return -EBADF;
3197 if (!description->is_socket())
3198 return -ENOTSOCK;
3199 auto& socket = *description->socket();
3200 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3201 if (socket.is_connected())
3202 return -EINVAL;
3203 return socket.listen(backlog);
3204}
3205
3206int Process::sys$accept(int accepting_socket_fd, sockaddr* user_address, socklen_t* user_address_size)
3207{
3208 REQUIRE_PROMISE(accept);
3209 if (!validate_write_typed(user_address_size))
3210 return -EFAULT;
3211 socklen_t address_size = 0;
3212 copy_from_user(&address_size, user_address_size);
3213 if (!validate_write(user_address, address_size))
3214 return -EFAULT;
3215 int accepted_socket_fd = alloc_fd();
3216 if (accepted_socket_fd < 0)
3217 return accepted_socket_fd;
3218 auto accepting_socket_description = file_description(accepting_socket_fd);
3219 if (!accepting_socket_description)
3220 return -EBADF;
3221 if (!accepting_socket_description->is_socket())
3222 return -ENOTSOCK;
3223 auto& socket = *accepting_socket_description->socket();
3224 if (!socket.can_accept()) {
3225 if (accepting_socket_description->is_blocking()) {
3226 if (Thread::current->block<Thread::AcceptBlocker>(*accepting_socket_description) != Thread::BlockResult::WokeNormally)
3227 return -EINTR;
3228 } else {
3229 return -EAGAIN;
3230 }
3231 }
3232 auto accepted_socket = socket.accept();
3233 ASSERT(accepted_socket);
3234
3235 u8 address_buffer[sizeof(sockaddr_un)];
3236 address_size = min(sizeof(sockaddr_un), static_cast<size_t>(address_size));
3237 accepted_socket->get_peer_address((sockaddr*)address_buffer, &address_size);
3238 copy_to_user(user_address, address_buffer, address_size);
3239 copy_to_user(user_address_size, &address_size);
3240
3241 auto accepted_socket_description = FileDescription::create(*accepted_socket);
3242 accepted_socket_description->set_readable(true);
3243 accepted_socket_description->set_writable(true);
3244 // NOTE: The accepted socket inherits fd flags from the accepting socket.
3245 // I'm not sure if this matches other systems but it makes sense to me.
3246 accepted_socket_description->set_blocking(accepting_socket_description->is_blocking());
3247 m_fds[accepted_socket_fd].set(move(accepted_socket_description), m_fds[accepting_socket_fd].flags);
3248
3249 // NOTE: Moving this state to Completed is what causes connect() to unblock on the client side.
3250 accepted_socket->set_setup_state(Socket::SetupState::Completed);
3251 return accepted_socket_fd;
3252}
3253
3254int Process::sys$connect(int sockfd, const sockaddr* address, socklen_t address_size)
3255{
3256 if (!validate_read(address, address_size))
3257 return -EFAULT;
3258 int fd = alloc_fd();
3259 if (fd < 0)
3260 return fd;
3261 auto description = file_description(sockfd);
3262 if (!description)
3263 return -EBADF;
3264 if (!description->is_socket())
3265 return -ENOTSOCK;
3266
3267 auto& socket = *description->socket();
3268 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3269 SmapDisabler disabler;
3270 return socket.connect(*description, address, address_size, description->is_blocking() ? ShouldBlock::Yes : ShouldBlock::No);
3271}
3272
3273int Process::sys$shutdown(int sockfd, int how)
3274{
3275 REQUIRE_PROMISE(stdio);
3276 if (how & ~SHUT_RDWR)
3277 return -EINVAL;
3278 auto description = file_description(sockfd);
3279 if (!description)
3280 return -EBADF;
3281 if (!description->is_socket())
3282 return -ENOTSOCK;
3283
3284 auto& socket = *description->socket();
3285 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3286 return socket.shutdown(how);
3287}
3288
3289ssize_t Process::sys$sendto(const Syscall::SC_sendto_params* user_params)
3290{
3291 REQUIRE_PROMISE(stdio);
3292 Syscall::SC_sendto_params params;
3293 if (!validate_read_and_copy_typed(¶ms, user_params))
3294 return -EFAULT;
3295
3296 int flags = params.flags;
3297 const sockaddr* addr = params.addr;
3298 socklen_t addr_length = params.addr_length;
3299
3300 if (!validate(params.data))
3301 return -EFAULT;
3302 if (addr && !validate_read(addr, addr_length))
3303 return -EFAULT;
3304 auto description = file_description(params.sockfd);
3305 if (!description)
3306 return -EBADF;
3307 if (!description->is_socket())
3308 return -ENOTSOCK;
3309 auto& socket = *description->socket();
3310 if (socket.is_shut_down_for_writing())
3311 return -EPIPE;
3312 SmapDisabler disabler;
3313 return socket.sendto(*description, params.data.data, params.data.size, flags, addr, addr_length);
3314}
3315
3316ssize_t Process::sys$recvfrom(const Syscall::SC_recvfrom_params* user_params)
3317{
3318 REQUIRE_PROMISE(stdio);
3319
3320 Syscall::SC_recvfrom_params params;
3321 if (!validate_read_and_copy_typed(¶ms, user_params))
3322 return -EFAULT;
3323
3324 int flags = params.flags;
3325 sockaddr* addr = params.addr;
3326 socklen_t* addr_length = params.addr_length;
3327
3328 SmapDisabler disabler;
3329 if (!validate(params.buffer))
3330 return -EFAULT;
3331 if (addr_length) {
3332 if (!validate_write_typed(addr_length))
3333 return -EFAULT;
3334 if (!validate_write(addr, *addr_length))
3335 return -EFAULT;
3336 } else if (addr) {
3337 return -EINVAL;
3338 }
3339 auto description = file_description(params.sockfd);
3340 if (!description)
3341 return -EBADF;
3342 if (!description->is_socket())
3343 return -ENOTSOCK;
3344 auto& socket = *description->socket();
3345
3346 if (socket.is_shut_down_for_reading())
3347 return 0;
3348
3349 bool original_blocking = description->is_blocking();
3350 if (flags & MSG_DONTWAIT)
3351 description->set_blocking(false);
3352
3353 auto nrecv = socket.recvfrom(*description, params.buffer.data, params.buffer.size, flags, addr, addr_length);
3354 if (flags & MSG_DONTWAIT)
3355 description->set_blocking(original_blocking);
3356
3357 return nrecv;
3358}
3359
3360template<bool sockname, typename Params>
3361int Process::get_sock_or_peer_name(const Params& params)
3362{
3363 socklen_t addrlen_value;
3364 if (!validate_read_and_copy_typed(&addrlen_value, params.addrlen))
3365 return -EFAULT;
3366
3367 if (addrlen_value <= 0)
3368 return -EINVAL;
3369
3370 if (!validate_write(params.addr, addrlen_value))
3371 return -EFAULT;
3372
3373 if (!validate_write_typed(params.addrlen))
3374 return -EFAULT;
3375
3376 auto description = file_description(params.sockfd);
3377 if (!description)
3378 return -EBADF;
3379
3380 if (!description->is_socket())
3381 return -ENOTSOCK;
3382
3383 auto& socket = *description->socket();
3384 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3385
3386 u8 address_buffer[sizeof(sockaddr_un)];
3387 addrlen_value = min(sizeof(sockaddr_un), static_cast<size_t>(addrlen_value));
3388 if constexpr (sockname)
3389 socket.get_local_address((sockaddr*)address_buffer, &addrlen_value);
3390 else
3391 socket.get_peer_address((sockaddr*)address_buffer, &addrlen_value);
3392 copy_to_user(params.addr, address_buffer, addrlen_value);
3393 copy_to_user(params.addrlen, &addrlen_value);
3394 return 0;
3395}
3396
3397int Process::sys$getsockname(const Syscall::SC_getsockname_params* user_params)
3398{
3399 Syscall::SC_getsockname_params params;
3400 if (!validate_read_and_copy_typed(¶ms, user_params))
3401 return -EFAULT;
3402 return get_sock_or_peer_name<true>(params);
3403}
3404
3405int Process::sys$getpeername(const Syscall::SC_getpeername_params* user_params)
3406{
3407 Syscall::SC_getpeername_params params;
3408 if (!validate_read_and_copy_typed(¶ms, user_params))
3409 return -EFAULT;
3410 return get_sock_or_peer_name<false>(params);
3411}
3412
3413int Process::sys$sched_setparam(int tid, const struct sched_param* param)
3414{
3415 REQUIRE_PROMISE(proc);
3416 if (!validate_read_typed(param))
3417 return -EFAULT;
3418
3419 int desired_priority;
3420 copy_from_user(&desired_priority, ¶m->sched_priority);
3421
3422 InterruptDisabler disabler;
3423 auto* peer = Thread::current;
3424 if (tid != 0)
3425 peer = Thread::from_tid(tid);
3426
3427 if (!peer)
3428 return -ESRCH;
3429
3430 if (!is_superuser() && m_euid != peer->process().m_uid && m_uid != peer->process().m_uid)
3431 return -EPERM;
3432
3433 if (desired_priority < THREAD_PRIORITY_MIN || desired_priority > THREAD_PRIORITY_MAX)
3434 return -EINVAL;
3435
3436 peer->set_priority((u32)desired_priority);
3437 return 0;
3438}
3439
3440int Process::sys$sched_getparam(pid_t pid, struct sched_param* param)
3441{
3442 REQUIRE_PROMISE(proc);
3443 if (!validate_write_typed(param))
3444 return -EFAULT;
3445
3446 InterruptDisabler disabler;
3447 auto* peer = Thread::current;
3448 if (pid != 0)
3449 peer = Thread::from_tid(pid);
3450
3451 if (!peer)
3452 return -ESRCH;
3453
3454 if (!is_superuser() && m_euid != peer->process().m_uid && m_uid != peer->process().m_uid)
3455 return -EPERM;
3456
3457 int priority = peer->priority();
3458 copy_to_user(¶m->sched_priority, &priority);
3459 return 0;
3460}
3461
3462int Process::sys$getsockopt(const Syscall::SC_getsockopt_params* params)
3463{
3464 if (!validate_read_typed(params))
3465 return -EFAULT;
3466
3467 SmapDisabler disabler;
3468
3469 int sockfd = params->sockfd;
3470 int level = params->level;
3471 int option = params->option;
3472 void* value = params->value;
3473 socklen_t* value_size = params->value_size;
3474
3475 if (!validate_write_typed(value_size))
3476 return -EFAULT;
3477 if (!validate_write(value, *value_size))
3478 return -EFAULT;
3479 auto description = file_description(sockfd);
3480 if (!description)
3481 return -EBADF;
3482 if (!description->is_socket())
3483 return -ENOTSOCK;
3484 auto& socket = *description->socket();
3485
3486 if (has_promised(Pledge::accept) && socket.is_local() && level == SOL_SOCKET && option == SO_PEERCRED) {
3487 // We make an exception for SOL_SOCKET::SO_PEERCRED on local sockets if you've pledged "accept"
3488 } else {
3489 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3490 }
3491 return socket.getsockopt(*description, level, option, value, value_size);
3492}
3493
3494int Process::sys$setsockopt(const Syscall::SC_setsockopt_params* params)
3495{
3496 if (!validate_read_typed(params))
3497 return -EFAULT;
3498
3499 SmapDisabler disabler;
3500
3501 int sockfd = params->sockfd;
3502 int level = params->level;
3503 int option = params->option;
3504 const void* value = params->value;
3505 socklen_t value_size = params->value_size;
3506
3507 if (!validate_read(value, value_size))
3508 return -EFAULT;
3509 auto description = file_description(sockfd);
3510 if (!description)
3511 return -EBADF;
3512 if (!description->is_socket())
3513 return -ENOTSOCK;
3514 auto& socket = *description->socket();
3515 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain());
3516 return socket.setsockopt(level, option, value, value_size);
3517}
3518
3519void Process::disown_all_shared_buffers()
3520{
3521 LOCKER(shared_buffers().lock());
3522 Vector<SharedBuffer*, 32> buffers_to_disown;
3523 for (auto& it : shared_buffers().resource())
3524 buffers_to_disown.append(it.value.ptr());
3525 for (auto* shared_buffer : buffers_to_disown)
3526 shared_buffer->disown(m_pid);
3527}
3528
3529int Process::sys$create_shared_buffer(int size, void** buffer)
3530{
3531 REQUIRE_PROMISE(shared_buffer);
3532 if (!size || size < 0)
3533 return -EINVAL;
3534 size = PAGE_ROUND_UP(size);
3535 if (!validate_write_typed(buffer))
3536 return -EFAULT;
3537
3538 LOCKER(shared_buffers().lock());
3539 static int s_next_shared_buffer_id;
3540 int shared_buffer_id = ++s_next_shared_buffer_id;
3541 auto shared_buffer = make<SharedBuffer>(shared_buffer_id, size);
3542 shared_buffer->share_with(m_pid);
3543
3544 void* address = shared_buffer->ref_for_process_and_get_address(*this);
3545 copy_to_user(buffer, &address);
3546 ASSERT((int)shared_buffer->size() >= size);
3547#ifdef SHARED_BUFFER_DEBUG
3548 kprintf("%s(%u): Created shared buffer %d @ %p (%u bytes, vmobject is %u)\n", name().characters(), pid(), shared_buffer_id, *buffer, size, shared_buffer->size());
3549#endif
3550 shared_buffers().resource().set(shared_buffer_id, move(shared_buffer));
3551
3552 return shared_buffer_id;
3553}
3554
3555int Process::sys$share_buffer_with(int shared_buffer_id, pid_t peer_pid)
3556{
3557 REQUIRE_PROMISE(shared_buffer);
3558 if (!peer_pid || peer_pid < 0 || peer_pid == m_pid)
3559 return -EINVAL;
3560 LOCKER(shared_buffers().lock());
3561 auto it = shared_buffers().resource().find(shared_buffer_id);
3562 if (it == shared_buffers().resource().end())
3563 return -EINVAL;
3564 auto& shared_buffer = *(*it).value;
3565 if (!shared_buffer.is_shared_with(m_pid))
3566 return -EPERM;
3567 {
3568 InterruptDisabler disabler;
3569 auto* peer = Process::from_pid(peer_pid);
3570 if (!peer)
3571 return -ESRCH;
3572 }
3573 shared_buffer.share_with(peer_pid);
3574 return 0;
3575}
3576
3577int Process::sys$share_buffer_globally(int shared_buffer_id)
3578{
3579 REQUIRE_PROMISE(shared_buffer);
3580 LOCKER(shared_buffers().lock());
3581 auto it = shared_buffers().resource().find(shared_buffer_id);
3582 if (it == shared_buffers().resource().end())
3583 return -EINVAL;
3584 auto& shared_buffer = *(*it).value;
3585 if (!shared_buffer.is_shared_with(m_pid))
3586 return -EPERM;
3587 shared_buffer.share_globally();
3588 return 0;
3589}
3590
3591int Process::sys$release_shared_buffer(int shared_buffer_id)
3592{
3593 REQUIRE_PROMISE(shared_buffer);
3594 LOCKER(shared_buffers().lock());
3595 auto it = shared_buffers().resource().find(shared_buffer_id);
3596 if (it == shared_buffers().resource().end())
3597 return -EINVAL;
3598 auto& shared_buffer = *(*it).value;
3599 if (!shared_buffer.is_shared_with(m_pid))
3600 return -EPERM;
3601#ifdef SHARED_BUFFER_DEBUG
3602 kprintf("%s(%u): Releasing shared buffer %d, buffer count: %u\n", name().characters(), pid(), shared_buffer_id, shared_buffers().resource().size());
3603#endif
3604 shared_buffer.deref_for_process(*this);
3605 return 0;
3606}
3607
3608void* Process::sys$get_shared_buffer(int shared_buffer_id)
3609{
3610 REQUIRE_PROMISE(shared_buffer);
3611 LOCKER(shared_buffers().lock());
3612 auto it = shared_buffers().resource().find(shared_buffer_id);
3613 if (it == shared_buffers().resource().end())
3614 return (void*)-EINVAL;
3615 auto& shared_buffer = *(*it).value;
3616 if (!shared_buffer.is_shared_with(m_pid))
3617 return (void*)-EPERM;
3618#ifdef SHARED_BUFFER_DEBUG
3619 kprintf("%s(%u): Retaining shared buffer %d, buffer count: %u\n", name().characters(), pid(), shared_buffer_id, shared_buffers().resource().size());
3620#endif
3621 return shared_buffer.ref_for_process_and_get_address(*this);
3622}
3623
3624int Process::sys$seal_shared_buffer(int shared_buffer_id)
3625{
3626 REQUIRE_PROMISE(shared_buffer);
3627 LOCKER(shared_buffers().lock());
3628 auto it = shared_buffers().resource().find(shared_buffer_id);
3629 if (it == shared_buffers().resource().end())
3630 return -EINVAL;
3631 auto& shared_buffer = *(*it).value;
3632 if (!shared_buffer.is_shared_with(m_pid))
3633 return -EPERM;
3634#ifdef SHARED_BUFFER_DEBUG
3635 kprintf("%s(%u): Sealing shared buffer %d\n", name().characters(), pid(), shared_buffer_id);
3636#endif
3637 shared_buffer.seal();
3638 return 0;
3639}
3640
3641int Process::sys$get_shared_buffer_size(int shared_buffer_id)
3642{
3643 REQUIRE_PROMISE(shared_buffer);
3644 LOCKER(shared_buffers().lock());
3645 auto it = shared_buffers().resource().find(shared_buffer_id);
3646 if (it == shared_buffers().resource().end())
3647 return -EINVAL;
3648 auto& shared_buffer = *(*it).value;
3649 if (!shared_buffer.is_shared_with(m_pid))
3650 return -EPERM;
3651#ifdef SHARED_BUFFER_DEBUG
3652 kprintf("%s(%u): Get shared buffer %d size: %u\n", name().characters(), pid(), shared_buffer_id, shared_buffers().resource().size());
3653#endif
3654 return shared_buffer.size();
3655}
3656
3657int Process::sys$set_shared_buffer_volatile(int shared_buffer_id, bool state)
3658{
3659 REQUIRE_PROMISE(shared_buffer);
3660 LOCKER(shared_buffers().lock());
3661 auto it = shared_buffers().resource().find(shared_buffer_id);
3662 if (it == shared_buffers().resource().end())
3663 return -EINVAL;
3664 auto& shared_buffer = *(*it).value;
3665 if (!shared_buffer.is_shared_with(m_pid))
3666 return -EPERM;
3667#ifdef SHARED_BUFFER_DEBUG
3668 kprintf("%s(%u): Set shared buffer %d volatile: %u\n", name().characters(), pid(), shared_buffer_id, state);
3669#endif
3670 if (!state) {
3671 bool was_purged = shared_buffer.vmobject().was_purged();
3672 shared_buffer.vmobject().set_volatile(state);
3673 shared_buffer.vmobject().set_was_purged(false);
3674 return was_purged ? 1 : 0;
3675 }
3676 shared_buffer.vmobject().set_volatile(true);
3677 return 0;
3678}
3679
3680void Process::terminate_due_to_signal(u8 signal)
3681{
3682 ASSERT_INTERRUPTS_DISABLED();
3683 ASSERT(signal < 32);
3684 dbgprintf("terminate_due_to_signal %s(%u) <- %u\n", name().characters(), pid(), signal);
3685 m_termination_status = 0;
3686 m_termination_signal = signal;
3687 die();
3688}
3689
3690void Process::send_signal(u8 signal, Process* sender)
3691{
3692 InterruptDisabler disabler;
3693 auto* thread = Thread::from_tid(m_pid);
3694 if (!thread)
3695 thread = &any_thread();
3696 thread->send_signal(signal, sender);
3697}
3698
3699int Process::sys$create_thread(void* (*entry)(void*), void* argument, const Syscall::SC_create_thread_params* user_params)
3700{
3701 REQUIRE_PROMISE(thread);
3702 if (!validate_read((const void*)entry, sizeof(void*)))
3703 return -EFAULT;
3704
3705 Syscall::SC_create_thread_params params;
3706 if (!validate_read_and_copy_typed(¶ms, user_params))
3707 return -EFAULT;
3708
3709 unsigned detach_state = params.m_detach_state;
3710 int schedule_priority = params.m_schedule_priority;
3711 void* stack_location = params.m_stack_location;
3712 unsigned stack_size = params.m_stack_size;
3713
3714 if (!validate_write(stack_location, stack_size))
3715 return -EFAULT;
3716
3717 u32 user_stack_address = reinterpret_cast<u32>(stack_location) + stack_size;
3718
3719 if (!MM.validate_user_stack(*this, VirtualAddress(user_stack_address - 4)))
3720 return -EFAULT;
3721
3722 // FIXME: return EAGAIN if Thread::all_threads().size() is greater than PTHREAD_THREADS_MAX
3723
3724 int requested_thread_priority = schedule_priority;
3725 if (requested_thread_priority < THREAD_PRIORITY_MIN || requested_thread_priority > THREAD_PRIORITY_MAX)
3726 return -EINVAL;
3727
3728 bool is_thread_joinable = (0 == detach_state);
3729
3730 // FIXME: Do something with guard pages?
3731
3732 auto* thread = new Thread(*this);
3733
3734 // We know this thread is not the main_thread,
3735 // So give it a unique name until the user calls $set_thread_name on it
3736 // length + 4 to give space for our extra junk at the end
3737 StringBuilder builder(m_name.length() + 4);
3738 builder.append(m_name);
3739 builder.appendf("[%d]", thread->tid());
3740 thread->set_name(builder.to_string());
3741
3742 thread->set_priority(requested_thread_priority);
3743 thread->set_joinable(is_thread_joinable);
3744
3745 auto& tss = thread->tss();
3746 tss.eip = (uintptr_t)entry;
3747 tss.eflags = 0x0202;
3748 tss.cr3 = page_directory().cr3();
3749 tss.esp = user_stack_address;
3750
3751 // NOTE: The stack needs to be 16-byte aligned.
3752 thread->push_value_on_stack((uintptr_t)argument);
3753 thread->push_value_on_stack(0);
3754
3755 thread->make_thread_specific_region({});
3756 thread->set_state(Thread::State::Runnable);
3757 return thread->tid();
3758}
3759
3760void Process::sys$exit_thread(void* exit_value)
3761{
3762 REQUIRE_PROMISE(thread);
3763 cli();
3764 Thread::current->m_exit_value = exit_value;
3765 Thread::current->set_should_die();
3766 big_lock().force_unlock_if_locked();
3767 Thread::current->die_if_needed();
3768 ASSERT_NOT_REACHED();
3769}
3770
3771int Process::sys$detach_thread(int tid)
3772{
3773 REQUIRE_PROMISE(thread);
3774 InterruptDisabler disabler;
3775 auto* thread = Thread::from_tid(tid);
3776 if (!thread || thread->pid() != pid())
3777 return -ESRCH;
3778
3779 if (!thread->is_joinable())
3780 return -EINVAL;
3781
3782 thread->set_joinable(false);
3783 return 0;
3784}
3785
3786int Process::sys$join_thread(int tid, void** exit_value)
3787{
3788 REQUIRE_PROMISE(thread);
3789 if (exit_value && !validate_write_typed(exit_value))
3790 return -EFAULT;
3791
3792 InterruptDisabler disabler;
3793 auto* thread = Thread::from_tid(tid);
3794 if (!thread || thread->pid() != pid())
3795 return -ESRCH;
3796
3797 if (thread == Thread::current)
3798 return -EDEADLK;
3799
3800 if (thread->m_joinee == Thread::current)
3801 return -EDEADLK;
3802
3803 ASSERT(thread->m_joiner != Thread::current);
3804 if (thread->m_joiner)
3805 return -EINVAL;
3806
3807 if (!thread->is_joinable())
3808 return -EINVAL;
3809
3810 void* joinee_exit_value = nullptr;
3811
3812 // NOTE: pthread_join() cannot be interrupted by signals. Only by death.
3813 for (;;) {
3814 auto result = Thread::current->block<Thread::JoinBlocker>(*thread, joinee_exit_value);
3815 if (result == Thread::BlockResult::InterruptedByDeath) {
3816 // NOTE: This cleans things up so that Thread::finalize() won't
3817 // get confused about a missing joiner when finalizing the joinee.
3818 InterruptDisabler disabler;
3819 Thread::current->m_joinee->m_joiner = nullptr;
3820 Thread::current->m_joinee = nullptr;
3821 return 0;
3822 }
3823 }
3824
3825 // NOTE: 'thread' is very possibly deleted at this point. Clear it just to be safe.
3826 thread = nullptr;
3827
3828 if (exit_value)
3829 copy_to_user(exit_value, &joinee_exit_value);
3830 return 0;
3831}
3832
3833int Process::sys$set_thread_name(int tid, const char* user_name, size_t user_name_length)
3834{
3835 REQUIRE_PROMISE(thread);
3836 auto name = validate_and_copy_string_from_user(user_name, user_name_length);
3837 if (name.is_null())
3838 return -EFAULT;
3839
3840 const size_t max_thread_name_size = 64;
3841 if (name.length() > max_thread_name_size)
3842 return -EINVAL;
3843
3844 InterruptDisabler disabler;
3845 auto* thread = Thread::from_tid(tid);
3846 if (!thread || thread->pid() != pid())
3847 return -ESRCH;
3848
3849 thread->set_name(name);
3850 return 0;
3851}
3852int Process::sys$get_thread_name(int tid, char* buffer, size_t buffer_size)
3853{
3854 REQUIRE_PROMISE(thread);
3855 if (buffer_size == 0)
3856 return -EINVAL;
3857
3858 if (!validate_write(buffer, buffer_size))
3859 return -EFAULT;
3860
3861 InterruptDisabler disabler;
3862 auto* thread = Thread::from_tid(tid);
3863 if (!thread || thread->pid() != pid())
3864 return -ESRCH;
3865
3866 if (thread->name().length() + 1 > (size_t)buffer_size)
3867 return -ENAMETOOLONG;
3868
3869 copy_to_user(buffer, thread->name().characters(), thread->name().length() + 1);
3870 return 0;
3871}
3872
3873int Process::sys$gettid()
3874{
3875 REQUIRE_PROMISE(stdio);
3876 return Thread::current->tid();
3877}
3878
3879int Process::sys$donate(int tid)
3880{
3881 REQUIRE_PROMISE(stdio);
3882 if (tid < 0)
3883 return -EINVAL;
3884 InterruptDisabler disabler;
3885 auto* thread = Thread::from_tid(tid);
3886 if (!thread || thread->pid() != pid())
3887 return -ESRCH;
3888 Scheduler::donate_to(thread, "sys$donate");
3889 return 0;
3890}
3891
3892int Process::sys$rename(const Syscall::SC_rename_params* user_params)
3893{
3894 REQUIRE_PROMISE(cpath);
3895 Syscall::SC_rename_params params;
3896 if (!validate_read_and_copy_typed(¶ms, user_params))
3897 return -EFAULT;
3898 auto old_path = get_syscall_path_argument(params.old_path);
3899 if (old_path.is_error())
3900 return old_path.error();
3901 auto new_path = get_syscall_path_argument(params.new_path);
3902 if (new_path.is_error())
3903 return new_path.error();
3904 return VFS::the().rename(old_path.value(), new_path.value(), current_directory());
3905}
3906
3907int Process::sys$ftruncate(int fd, off_t length)
3908{
3909 REQUIRE_PROMISE(stdio);
3910 if (length < 0)
3911 return -EINVAL;
3912 auto description = file_description(fd);
3913 if (!description)
3914 return -EBADF;
3915 if (!description->is_writable())
3916 return -EBADF;
3917 return description->truncate(static_cast<u64>(length));
3918}
3919
3920int Process::sys$watch_file(const char* user_path, size_t path_length)
3921{
3922 REQUIRE_PROMISE(rpath);
3923 auto path = get_syscall_path_argument(user_path, path_length);
3924 if (path.is_error())
3925 return path.error();
3926
3927 auto custody_or_error = VFS::the().resolve_path(path.value(), current_directory());
3928 if (custody_or_error.is_error())
3929 return custody_or_error.error();
3930
3931 auto& custody = custody_or_error.value();
3932 auto& inode = custody->inode();
3933
3934 if (!inode.fs().supports_watchers())
3935 return -ENOTSUP;
3936
3937 int fd = alloc_fd();
3938 if (fd < 0)
3939 return fd;
3940
3941 m_fds[fd].set(FileDescription::create(*InodeWatcher::create(inode)));
3942 m_fds[fd].description->set_readable(true);
3943 return fd;
3944}
3945
3946int Process::sys$systrace(pid_t pid)
3947{
3948 REQUIRE_PROMISE(proc);
3949 InterruptDisabler disabler;
3950 auto* peer = Process::from_pid(pid);
3951 if (!peer)
3952 return -ESRCH;
3953 if (peer->uid() != m_euid)
3954 return -EACCES;
3955 int fd = alloc_fd();
3956 if (fd < 0)
3957 return fd;
3958 auto description = FileDescription::create(peer->ensure_tracer());
3959 description->set_readable(true);
3960 m_fds[fd].set(move(description), 0);
3961 return fd;
3962}
3963
3964int Process::sys$halt()
3965{
3966 if (!is_superuser())
3967 return -EPERM;
3968
3969 REQUIRE_NO_PROMISES;
3970
3971 dbgprintf("acquiring FS locks...\n");
3972 FS::lock_all();
3973 dbgprintf("syncing mounted filesystems...\n");
3974 FS::sync();
3975 dbgprintf("attempting system shutdown...\n");
3976 IO::out16(0x604, 0x2000);
3977
3978 return ESUCCESS;
3979}
3980
3981int Process::sys$reboot()
3982{
3983 if (!is_superuser())
3984 return -EPERM;
3985
3986 REQUIRE_NO_PROMISES;
3987
3988 dbgprintf("acquiring FS locks...\n");
3989 FS::lock_all();
3990 dbgprintf("syncing mounted filesystems...\n");
3991 FS::sync();
3992 dbgprintf("attempting reboot via KB Controller...\n");
3993 IO::out8(0x64, 0xFE);
3994
3995 return ESUCCESS;
3996}
3997
3998int Process::sys$mount(const Syscall::SC_mount_params* user_params)
3999{
4000 if (!is_superuser())
4001 return -EPERM;
4002
4003 REQUIRE_NO_PROMISES;
4004
4005 Syscall::SC_mount_params params;
4006 if (!validate_read_and_copy_typed(¶ms, user_params))
4007 return -EFAULT;
4008
4009 auto source = validate_and_copy_string_from_user(params.source);
4010 auto target = validate_and_copy_string_from_user(params.target);
4011 auto fs_type = validate_and_copy_string_from_user(params.fs_type);
4012
4013 if (source.is_null() || target.is_null() || fs_type.is_null())
4014 return -EFAULT;
4015
4016 dbg() << "mount " << fs_type << ": source " << source << " @ " << target;
4017
4018 auto custody_or_error = VFS::the().resolve_path(target, current_directory());
4019 if (custody_or_error.is_error())
4020 return custody_or_error.error();
4021
4022 auto& target_custody = custody_or_error.value();
4023
4024 RefPtr<FS> fs;
4025
4026 if (params.flags & MS_BIND) {
4027 // We're doing a bind mount.
4028 auto source_or_error = VFS::the().resolve_path(source, current_directory());
4029 if (source_or_error.is_error())
4030 return source_or_error.error();
4031 auto& source_custody = source_or_error.value();
4032 return VFS::the().bind_mount(source_custody, target_custody, params.flags);
4033 }
4034
4035 if (fs_type == "ext2" || fs_type == "Ext2FS") {
4036 auto source_or_error = VFS::the().open(source, O_RDWR, 0, current_directory());
4037 if (source_or_error.is_error())
4038 return source_or_error.error();
4039
4040 auto* device = source_or_error.value()->device();
4041 if (!device || !device->is_block_device()) {
4042 dbg() << "mount: this is not a BlockDevice";
4043 return -ENODEV;
4044 }
4045 auto& block_device = static_cast<BlockDevice&>(*device);
4046
4047 dbg() << "mount: attempting to mount " << block_device.absolute_path() << " on " << target;
4048
4049 fs = Ext2FS::create(block_device);
4050 } else if (fs_type == "proc" || fs_type == "ProcFS") {
4051 fs = ProcFS::create();
4052 } else if (fs_type == "devpts" || fs_type == "DevPtsFS") {
4053 fs = DevPtsFS::create();
4054 } else if (fs_type == "tmp" || fs_type == "TmpFS") {
4055 fs = TmpFS::create();
4056 } else {
4057 return -ENODEV;
4058 }
4059
4060 if (!fs->initialize()) {
4061 dbg() << "mount: failed to initialize " << fs_type << " filesystem on " << source;
4062 return -ENODEV;
4063 }
4064
4065 auto result = VFS::the().mount(fs.release_nonnull(), target_custody, params.flags);
4066 dbg() << "mount: successfully mounted " << source << " on " << target;
4067 return result;
4068}
4069
4070int Process::sys$umount(const char* user_mountpoint, size_t mountpoint_length)
4071{
4072 if (!is_superuser())
4073 return -EPERM;
4074
4075 REQUIRE_NO_PROMISES;
4076
4077 if (!validate_read(user_mountpoint, mountpoint_length))
4078 return -EFAULT;
4079
4080 auto mountpoint = get_syscall_path_argument(user_mountpoint, mountpoint_length);
4081 if (mountpoint.is_error())
4082 return mountpoint.error();
4083
4084 auto metadata_or_error = VFS::the().lookup_metadata(mountpoint.value(), current_directory());
4085 if (metadata_or_error.is_error())
4086 return metadata_or_error.error();
4087
4088 auto guest_inode_id = metadata_or_error.value().inode;
4089 return VFS::the().unmount(guest_inode_id);
4090}
4091
4092ProcessTracer& Process::ensure_tracer()
4093{
4094 if (!m_tracer)
4095 m_tracer = ProcessTracer::create(m_pid);
4096 return *m_tracer;
4097}
4098
4099void Process::FileDescriptionAndFlags::clear()
4100{
4101 description = nullptr;
4102 flags = 0;
4103}
4104
4105void Process::FileDescriptionAndFlags::set(NonnullRefPtr<FileDescription>&& d, u32 f)
4106{
4107 description = move(d);
4108 flags = f;
4109}
4110
4111int Process::sys$mknod(const Syscall::SC_mknod_params* user_params)
4112{
4113 REQUIRE_PROMISE(dpath);
4114 Syscall::SC_mknod_params params;
4115 if (!validate_read_and_copy_typed(¶ms, user_params))
4116 return -EFAULT;
4117 if (!is_superuser() && !is_regular_file(params.mode) && !is_fifo(params.mode) && !is_socket(params.mode))
4118 return -EPERM;
4119 auto path = get_syscall_path_argument(params.path);
4120 if (path.is_error())
4121 return path.error();
4122 return VFS::the().mknod(path.value(), params.mode & ~umask(), params.dev, current_directory());
4123}
4124
4125int Process::sys$dump_backtrace()
4126{
4127 dump_backtrace();
4128 return 0;
4129}
4130
4131int Process::sys$dbgputch(u8 ch)
4132{
4133 IO::out8(0xe9, ch);
4134 return 0;
4135}
4136
4137int Process::sys$dbgputstr(const u8* characters, int length)
4138{
4139 if (!length)
4140 return 0;
4141 if (!validate_read(characters, length))
4142 return -EFAULT;
4143 SmapDisabler disabler;
4144 for (int i = 0; i < length; ++i)
4145 IO::out8(0xe9, characters[i]);
4146 return 0;
4147}
4148
4149KBuffer Process::backtrace(ProcessInspectionHandle& handle) const
4150{
4151 KBufferBuilder builder;
4152 for_each_thread([&](Thread& thread) {
4153 builder.appendf("Thread %d (%s):\n", thread.tid(), thread.name().characters());
4154 builder.append(thread.backtrace(handle));
4155 return IterationDecision::Continue;
4156 });
4157 return builder.build();
4158}
4159
4160int Process::sys$set_process_icon(int icon_id)
4161{
4162 REQUIRE_PROMISE(shared_buffer);
4163 LOCKER(shared_buffers().lock());
4164 auto it = shared_buffers().resource().find(icon_id);
4165 if (it == shared_buffers().resource().end())
4166 return -EINVAL;
4167 auto& shared_buffer = *(*it).value;
4168 if (!shared_buffer.is_shared_with(m_pid))
4169 return -EPERM;
4170 m_icon_id = icon_id;
4171 return 0;
4172}
4173
4174int Process::sys$get_process_name(char* buffer, int buffer_size)
4175{
4176 REQUIRE_PROMISE(stdio);
4177 if (buffer_size <= 0)
4178 return -EINVAL;
4179
4180 if (!validate_write(buffer, buffer_size))
4181 return -EFAULT;
4182
4183 if (m_name.length() + 1 > (size_t)buffer_size)
4184 return -ENAMETOOLONG;
4185
4186 copy_to_user(buffer, m_name.characters(), m_name.length() + 1);
4187 return 0;
4188}
4189
4190// We don't use the flag yet, but we could use it for distinguishing
4191// random source like Linux, unlike the OpenBSD equivalent. However, if we
4192// do, we should be able of the caveats that Linux has dealt with.
4193int Process::sys$getrandom(void* buffer, size_t buffer_size, unsigned int flags __attribute__((unused)))
4194{
4195 REQUIRE_PROMISE(stdio);
4196 if (buffer_size <= 0)
4197 return -EINVAL;
4198
4199 if (!validate_write(buffer, buffer_size))
4200 return -EFAULT;
4201
4202 SmapDisabler disabler;
4203 get_good_random_bytes((u8*)buffer, buffer_size);
4204 return 0;
4205}
4206
4207int Process::sys$setkeymap(const Syscall::SC_setkeymap_params* user_params)
4208{
4209 if (!is_superuser())
4210 return -EPERM;
4211
4212 REQUIRE_NO_PROMISES;
4213 Syscall::SC_setkeymap_params params;
4214 if (!validate_read_and_copy_typed(¶ms, user_params))
4215 return -EFAULT;
4216
4217 const char* map = params.map;
4218 const char* shift_map = params.shift_map;
4219 const char* alt_map = params.alt_map;
4220 const char* altgr_map = params.altgr_map;
4221
4222 if (!validate_read(map, 0x80))
4223 return -EFAULT;
4224 if (!validate_read(shift_map, 0x80))
4225 return -EFAULT;
4226 if (!validate_read(alt_map, 0x80))
4227 return -EFAULT;
4228 if (!validate_read(altgr_map, 0x80))
4229 return -EFAULT;
4230
4231 SmapDisabler disabler;
4232 KeyboardDevice::the().set_maps(map, shift_map, alt_map, altgr_map);
4233 return 0;
4234}
4235
4236int Process::sys$clock_gettime(clockid_t clock_id, timespec* user_ts)
4237{
4238 REQUIRE_PROMISE(stdio);
4239 if (!validate_write_typed(user_ts))
4240 return -EFAULT;
4241
4242 timespec ts;
4243 memset(&ts, 0, sizeof(ts));
4244
4245 switch (clock_id) {
4246 case CLOCK_MONOTONIC:
4247 ts.tv_sec = g_uptime / TICKS_PER_SECOND;
4248 ts.tv_nsec = (g_uptime % TICKS_PER_SECOND) * 1000000;
4249 break;
4250 default:
4251 return -EINVAL;
4252 }
4253
4254 copy_to_user(user_ts, &ts);
4255 return 0;
4256}
4257
4258int Process::sys$clock_nanosleep(const Syscall::SC_clock_nanosleep_params* user_params)
4259{
4260 REQUIRE_PROMISE(stdio);
4261
4262 Syscall::SC_clock_nanosleep_params params;
4263 if (!validate_read_and_copy_typed(¶ms, user_params))
4264 return -EFAULT;
4265
4266 if (params.requested_sleep && !validate_read_typed(params.requested_sleep))
4267 return -EFAULT;
4268
4269 timespec requested_sleep;
4270 copy_from_user(&requested_sleep, params.requested_sleep);
4271
4272 if (params.remaining_sleep && !validate_write_typed(params.remaining_sleep))
4273 return -EFAULT;
4274
4275 bool is_absolute = params.flags & TIMER_ABSTIME;
4276
4277 switch (params.clock_id) {
4278 case CLOCK_MONOTONIC: {
4279 u64 wakeup_time;
4280 if (is_absolute) {
4281 u64 time_to_wake = (requested_sleep.tv_sec * 1000 + requested_sleep.tv_nsec / 1000000);
4282 wakeup_time = Thread::current->sleep_until(time_to_wake);
4283 } else {
4284 u32 ticks_to_sleep = (requested_sleep.tv_sec * 1000 + requested_sleep.tv_nsec / 1000000);
4285 if (!ticks_to_sleep)
4286 return 0;
4287 wakeup_time = Thread::current->sleep(ticks_to_sleep);
4288 }
4289 if (wakeup_time > g_uptime) {
4290 u32 ticks_left = wakeup_time - g_uptime;
4291 if (!is_absolute && params.remaining_sleep) {
4292 timespec remaining_sleep;
4293 memset(&remaining_sleep, 0, sizeof(timespec));
4294 remaining_sleep.tv_sec = ticks_left / TICKS_PER_SECOND;
4295 ticks_left -= remaining_sleep.tv_sec * TICKS_PER_SECOND;
4296 remaining_sleep.tv_nsec = ticks_left * 1000000;
4297 copy_to_user(params.remaining_sleep, &remaining_sleep);
4298 }
4299 return -EINTR;
4300 }
4301 return 0;
4302 }
4303 default:
4304 return -EINVAL;
4305 }
4306}
4307
4308int Process::sys$sync()
4309{
4310 REQUIRE_PROMISE(stdio);
4311 VFS::the().sync();
4312 return 0;
4313}
4314
4315int Process::sys$yield()
4316{
4317 REQUIRE_PROMISE(stdio);
4318 Thread::current->yield_without_holding_big_lock();
4319 return 0;
4320}
4321
4322int Process::sys$beep()
4323{
4324 PCSpeaker::tone_on(440);
4325 u64 wakeup_time = Thread::current->sleep(100);
4326 PCSpeaker::tone_off();
4327 if (wakeup_time > g_uptime)
4328 return -EINTR;
4329 return 0;
4330}
4331
4332int Process::sys$module_load(const char* user_path, size_t path_length)
4333{
4334 if (!is_superuser())
4335 return -EPERM;
4336
4337 REQUIRE_NO_PROMISES;
4338
4339 auto path = get_syscall_path_argument(user_path, path_length);
4340 if (path.is_error())
4341 return path.error();
4342 auto description_or_error = VFS::the().open(path.value(), O_RDONLY, 0, current_directory());
4343 if (description_or_error.is_error())
4344 return description_or_error.error();
4345 auto& description = description_or_error.value();
4346 auto payload = description->read_entire_file();
4347 auto storage = KBuffer::create_with_size(payload.size());
4348 memcpy(storage.data(), payload.data(), payload.size());
4349 payload.clear();
4350
4351 auto elf_image = make<ELFImage>(storage.data(), storage.size());
4352 if (!elf_image->parse())
4353 return -ENOEXEC;
4354
4355 HashMap<String, u8*> section_storage_by_name;
4356
4357 auto module = make<Module>();
4358
4359 elf_image->for_each_section_of_type(SHT_PROGBITS, [&](const ELFImage::Section& section) {
4360 auto section_storage = KBuffer::copy(section.raw_data(), section.size(), Region::Access::Read | Region::Access::Write | Region::Access::Execute);
4361 section_storage_by_name.set(section.name(), section_storage.data());
4362 module->sections.append(move(section_storage));
4363 return IterationDecision::Continue;
4364 });
4365
4366 bool missing_symbols = false;
4367
4368 elf_image->for_each_section_of_type(SHT_PROGBITS, [&](const ELFImage::Section& section) {
4369 auto* section_storage = section_storage_by_name.get(section.name()).value_or(nullptr);
4370 ASSERT(section_storage);
4371 section.relocations().for_each_relocation([&](const ELFImage::Relocation& relocation) {
4372 auto& patch_ptr = *reinterpret_cast<ptrdiff_t*>(section_storage + relocation.offset());
4373 switch (relocation.type()) {
4374 case R_386_PC32: {
4375 // PC-relative relocation
4376 dbg() << "PC-relative relocation: " << relocation.symbol().name();
4377 u32 symbol_address = address_for_kernel_symbol(relocation.symbol().name());
4378 if (symbol_address == 0)
4379 missing_symbols = true;
4380 dbg() << " Symbol address: " << (void*)symbol_address;
4381 ptrdiff_t relative_offset = (char*)symbol_address - ((char*)&patch_ptr + 4);
4382 patch_ptr = relative_offset;
4383 break;
4384 }
4385 case R_386_32: // Absolute relocation
4386 dbg() << "Absolute relocation: '" << relocation.symbol().name() << "' value:" << relocation.symbol().value() << ", index:" << relocation.symbol_index();
4387
4388 if (relocation.symbol().bind() == STB_LOCAL) {
4389 auto* section_storage_containing_symbol = section_storage_by_name.get(relocation.symbol().section().name()).value_or(nullptr);
4390 ASSERT(section_storage_containing_symbol);
4391 u32 symbol_address = (ptrdiff_t)(section_storage_containing_symbol + relocation.symbol().value());
4392 if (symbol_address == 0)
4393 missing_symbols = true;
4394 dbg() << " Symbol address: " << (void*)symbol_address;
4395 patch_ptr += symbol_address;
4396 } else if (relocation.symbol().bind() == STB_GLOBAL) {
4397 u32 symbol_address = address_for_kernel_symbol(relocation.symbol().name());
4398 if (symbol_address == 0)
4399 missing_symbols = true;
4400 dbg() << " Symbol address: " << (void*)symbol_address;
4401 patch_ptr += symbol_address;
4402 } else {
4403 ASSERT_NOT_REACHED();
4404 }
4405 break;
4406 }
4407 return IterationDecision::Continue;
4408 });
4409
4410 return IterationDecision::Continue;
4411 });
4412
4413 if (missing_symbols)
4414 return -EINVAL;
4415
4416 auto* text_base = section_storage_by_name.get(".text").value_or(nullptr);
4417 if (!text_base) {
4418 dbg() << "No .text section found in module!";
4419 return -EINVAL;
4420 }
4421
4422 elf_image->for_each_symbol([&](const ELFImage::Symbol& symbol) {
4423 dbg() << " - " << symbol.type() << " '" << symbol.name() << "' @ " << (void*)symbol.value() << ", size=" << symbol.size();
4424 if (symbol.name() == "module_init") {
4425 module->module_init = (ModuleInitPtr)(text_base + symbol.value());
4426 } else if (symbol.name() == "module_fini") {
4427 module->module_fini = (ModuleFiniPtr)(text_base + symbol.value());
4428 } else if (symbol.name() == "module_name") {
4429 const u8* storage = section_storage_by_name.get(symbol.section().name()).value_or(nullptr);
4430 if (storage)
4431 module->name = String((const char*)(storage + symbol.value()));
4432 }
4433 return IterationDecision::Continue;
4434 });
4435
4436 if (!module->module_init)
4437 return -EINVAL;
4438
4439 if (g_modules->contains(module->name)) {
4440 dbg() << "a module with the name " << module->name << " is already loaded; please unload it first";
4441 return -EEXIST;
4442 }
4443
4444 module->module_init();
4445
4446 auto name = module->name;
4447 g_modules->set(name, move(module));
4448
4449 return 0;
4450}
4451
4452int Process::sys$module_unload(const char* user_name, size_t name_length)
4453{
4454 if (!is_superuser())
4455 return -EPERM;
4456
4457 REQUIRE_NO_PROMISES;
4458
4459 auto module_name = validate_and_copy_string_from_user(user_name, name_length);
4460 if (module_name.is_null())
4461 return -EFAULT;
4462
4463 auto it = g_modules->find(module_name);
4464 if (it == g_modules->end())
4465 return -ENOENT;
4466
4467 if (it->value->module_fini)
4468 it->value->module_fini();
4469
4470 g_modules->remove(it);
4471 return 0;
4472}
4473
4474int Process::sys$profiling_enable(pid_t pid)
4475{
4476 REQUIRE_NO_PROMISES;
4477 InterruptDisabler disabler;
4478 auto* process = Process::from_pid(pid);
4479 if (!process)
4480 return -ESRCH;
4481 if (!is_superuser() && process->uid() != m_uid)
4482 return -EPERM;
4483 Profiling::start(*process);
4484 process->set_profiling(true);
4485 return 0;
4486}
4487
4488int Process::sys$profiling_disable(pid_t pid)
4489{
4490 InterruptDisabler disabler;
4491 auto* process = Process::from_pid(pid);
4492 if (!process)
4493 return -ESRCH;
4494 if (!is_superuser() && process->uid() != m_uid)
4495 return -EPERM;
4496 process->set_profiling(false);
4497 Profiling::stop();
4498 return 0;
4499}
4500
4501void* Process::sys$get_kernel_info_page()
4502{
4503 REQUIRE_PROMISE(stdio);
4504 return s_info_page_address_for_userspace.as_ptr();
4505}
4506
4507Thread& Process::any_thread()
4508{
4509 Thread* found_thread = nullptr;
4510 for_each_thread([&](auto& thread) {
4511 found_thread = &thread;
4512 return IterationDecision::Break;
4513 });
4514 ASSERT(found_thread);
4515 return *found_thread;
4516}
4517
4518WaitQueue& Process::futex_queue(i32* userspace_address)
4519{
4520 auto& queue = m_futex_queues.ensure((uintptr_t)userspace_address);
4521 if (!queue)
4522 queue = make<WaitQueue>();
4523 return *queue;
4524}
4525
4526int Process::sys$futex(const Syscall::SC_futex_params* user_params)
4527{
4528 REQUIRE_PROMISE(thread);
4529
4530 Syscall::SC_futex_params params;
4531 if (!validate_read_and_copy_typed(¶ms, user_params))
4532 return -EFAULT;
4533
4534 i32* userspace_address = params.userspace_address;
4535 int futex_op = params.futex_op;
4536 i32 value = params.val;
4537 const timespec* user_timeout = params.timeout;
4538
4539 if (!validate_read_typed(userspace_address))
4540 return -EFAULT;
4541
4542 if (user_timeout && !validate_read_typed(user_timeout))
4543 return -EFAULT;
4544
4545 timespec timeout { 0, 0 };
4546 if (user_timeout)
4547 copy_from_user(&timeout, user_timeout);
4548
4549 i32 user_value;
4550
4551 switch (futex_op) {
4552 case FUTEX_WAIT:
4553 copy_from_user(&user_value, userspace_address);
4554 if (user_value != value)
4555 return -EAGAIN;
4556 // FIXME: This is supposed to be interruptible by a signal, but right now WaitQueue cannot be interrupted.
4557 // FIXME: Support timeout!
4558 Thread::current->wait_on(futex_queue(userspace_address));
4559 break;
4560 case FUTEX_WAKE:
4561 if (value == 0)
4562 return 0;
4563 if (value == 1) {
4564 futex_queue(userspace_address).wake_one();
4565 } else {
4566 // FIXME: Wake exactly (value) waiters.
4567 futex_queue(userspace_address).wake_all();
4568 }
4569 break;
4570 }
4571
4572 return 0;
4573}
4574
4575int Process::sys$set_thread_boost(int tid, int amount)
4576{
4577 REQUIRE_PROMISE(proc);
4578 if (amount < 0 || amount > 20)
4579 return -EINVAL;
4580 InterruptDisabler disabler;
4581 auto* thread = Thread::from_tid(tid);
4582 if (!thread)
4583 return -ESRCH;
4584 if (thread->state() == Thread::State::Dead || thread->state() == Thread::State::Dying)
4585 return -ESRCH;
4586 if (!is_superuser() && thread->process().uid() != euid())
4587 return -EPERM;
4588 thread->set_priority_boost(amount);
4589 return 0;
4590}
4591
4592int Process::sys$set_process_boost(pid_t pid, int amount)
4593{
4594 REQUIRE_PROMISE(proc);
4595 if (amount < 0 || amount > 20)
4596 return -EINVAL;
4597 InterruptDisabler disabler;
4598 auto* process = Process::from_pid(pid);
4599 if (!process || process->is_dead())
4600 return -ESRCH;
4601 if (!is_superuser() && process->uid() != euid())
4602 return -EPERM;
4603 process->m_priority_boost = amount;
4604 return 0;
4605}
4606
4607int Process::sys$chroot(const char* user_path, size_t path_length, int mount_flags)
4608{
4609 if (!is_superuser())
4610 return -EPERM;
4611 REQUIRE_PROMISE(chroot);
4612 auto path = get_syscall_path_argument(user_path, path_length);
4613 if (path.is_error())
4614 return path.error();
4615 auto directory_or_error = VFS::the().open_directory(path.value(), current_directory());
4616 if (directory_or_error.is_error())
4617 return directory_or_error.error();
4618 auto directory = directory_or_error.value();
4619 m_root_directory_relative_to_global_root = directory;
4620 int chroot_mount_flags = mount_flags == -1 ? directory->mount_flags() : mount_flags;
4621 set_root_directory(Custody::create(nullptr, "", directory->inode(), chroot_mount_flags));
4622 return 0;
4623}
4624
4625Custody& Process::root_directory()
4626{
4627 if (!m_root_directory)
4628 m_root_directory = VFS::the().root_custody();
4629 return *m_root_directory;
4630}
4631
4632Custody& Process::root_directory_relative_to_global_root()
4633{
4634 if (!m_root_directory_relative_to_global_root)
4635 m_root_directory_relative_to_global_root = root_directory();
4636 return *m_root_directory_relative_to_global_root;
4637}
4638
4639void Process::set_root_directory(const Custody& root)
4640{
4641 m_root_directory = root;
4642}
4643
4644int Process::sys$pledge(const Syscall::SC_pledge_params* user_params)
4645{
4646 Syscall::SC_pledge_params params;
4647 if (!validate_read_and_copy_typed(¶ms, user_params))
4648 return -EFAULT;
4649
4650 if (params.promises.length > 1024 || params.execpromises.length > 1024)
4651 return -E2BIG;
4652
4653 String promises;
4654 if (params.promises.characters) {
4655 promises = validate_and_copy_string_from_user(params.promises);
4656 if (promises.is_null())
4657 return -EFAULT;
4658 }
4659
4660 String execpromises;
4661 if (params.execpromises.characters) {
4662 execpromises = validate_and_copy_string_from_user(params.execpromises);
4663 if (execpromises.is_null())
4664 return -EFAULT;
4665 }
4666
4667 auto parse_pledge = [&](auto& pledge_spec, u32& mask) {
4668 auto parts = pledge_spec.split_view(' ');
4669 for (auto& part : parts) {
4670#define __ENUMERATE_PLEDGE_PROMISE(x) \
4671 if (part == #x) { \
4672 mask |= (1u << (u32)Pledge::x); \
4673 continue; \
4674 }
4675 ENUMERATE_PLEDGE_PROMISES
4676#undef __ENUMERATE_PLEDGE_PROMISE
4677 if (part == "dns") {
4678 // "dns" is an alias for "unix" since DNS queries go via LookupServer
4679 mask |= (1u << (u32)Pledge::unix);
4680 continue;
4681 }
4682 return false;
4683 }
4684 return true;
4685 };
4686
4687 if (!promises.is_null()) {
4688 u32 new_promises = 0;
4689 if (!parse_pledge(promises, new_promises))
4690 return -EINVAL;
4691 if (m_promises && (!new_promises || new_promises & ~m_promises))
4692 return -EPERM;
4693 m_promises = new_promises;
4694 }
4695
4696 if (!execpromises.is_null()) {
4697 u32 new_execpromises = 0;
4698 if (!parse_pledge(execpromises, new_execpromises))
4699 return -EINVAL;
4700 if (m_execpromises && (!new_execpromises || new_execpromises & ~m_execpromises))
4701 return -EPERM;
4702 m_execpromises = new_execpromises;
4703 }
4704
4705 return 0;
4706}
4707
4708Region& Process::add_region(NonnullOwnPtr<Region> region)
4709{
4710 auto* ptr = region.ptr();
4711 m_regions.append(move(region));
4712 return *ptr;
4713}
4714
4715int Process::sys$unveil(const Syscall::SC_unveil_params* user_params)
4716{
4717 Syscall::SC_unveil_params params;
4718 if (!validate_read_and_copy_typed(¶ms, user_params))
4719 return -EFAULT;
4720
4721 if (!params.path.characters && !params.permissions.characters) {
4722 m_veil_state = VeilState::Locked;
4723 return 0;
4724 }
4725
4726 if (m_veil_state == VeilState::Locked)
4727 return -EPERM;
4728
4729 if (!params.path.characters || !params.permissions.characters)
4730 return -EINVAL;
4731
4732 if (params.permissions.length > 4)
4733 return -EINVAL;
4734
4735 auto path = get_syscall_path_argument(params.path);
4736 if (path.is_error())
4737 return path.error();
4738
4739 if (path.value().is_empty() || path.value().characters()[0] != '/')
4740 return -EINVAL;
4741
4742 auto permissions = validate_and_copy_string_from_user(params.permissions);
4743 if (permissions.is_null())
4744 return -EFAULT;
4745
4746 unsigned new_permissions = 0;
4747 for (size_t i = 0; i < permissions.length(); ++i) {
4748 switch (permissions[i]) {
4749 case 'r':
4750 new_permissions |= UnveiledPath::Access::Read;
4751 break;
4752 case 'w':
4753 new_permissions |= UnveiledPath::Access::Write;
4754 break;
4755 case 'x':
4756 new_permissions |= UnveiledPath::Access::Execute;
4757 break;
4758 case 'c':
4759 new_permissions |= UnveiledPath::Access::CreateOrRemove;
4760 break;
4761 default:
4762 return -EINVAL;
4763 }
4764 }
4765
4766 for (size_t i = 0; i < m_unveiled_paths.size(); ++i) {
4767 auto& unveiled_path = m_unveiled_paths[i];
4768 if (unveiled_path.path == path.value()) {
4769 if (new_permissions & ~unveiled_path.permissions)
4770 return -EPERM;
4771 unveiled_path.permissions = new_permissions;
4772 return 0;
4773 }
4774 }
4775
4776 m_unveiled_paths.append({ path.value(), new_permissions });
4777 ASSERT(m_veil_state != VeilState::Locked);
4778 m_veil_state = VeilState::Dropped;
4779 return 0;
4780}
4781
4782int Process::sys$perf_event(int type, uintptr_t arg1, uintptr_t arg2)
4783{
4784 if (!m_perf_event_buffer)
4785 m_perf_event_buffer = make<PerformanceEventBuffer>();
4786 return m_perf_event_buffer->append(type, arg1, arg2);
4787}
4788
4789void Process::set_tty(TTY* tty)
4790{
4791 m_tty = tty;
4792}
4793
4794}