Serenity Operating System
at portability 4794 lines 154 kB view raw
1/* 2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <AK/Demangle.h> 28#include <AK/FileSystemPath.h> 29#include <AK/ScopeGuard.h> 30#include <AK/StdLibExtras.h> 31#include <AK/StringBuilder.h> 32#include <AK/Time.h> 33#include <AK/Types.h> 34#include <Kernel/Arch/i386/CPU.h> 35#include <Kernel/Devices/BlockDevice.h> 36#include <Kernel/Devices/KeyboardDevice.h> 37#include <Kernel/Devices/NullDevice.h> 38#include <Kernel/Devices/PCSpeaker.h> 39#include <Kernel/Devices/PIT.h> 40#include <Kernel/Devices/RandomDevice.h> 41#include <Kernel/FileSystem/Custody.h> 42#include <Kernel/FileSystem/DevPtsFS.h> 43#include <Kernel/FileSystem/Ext2FileSystem.h> 44#include <Kernel/FileSystem/FIFO.h> 45#include <Kernel/FileSystem/FileDescription.h> 46#include <Kernel/FileSystem/InodeWatcher.h> 47#include <Kernel/FileSystem/ProcFS.h> 48#include <Kernel/FileSystem/TmpFS.h> 49#include <Kernel/FileSystem/VirtualFileSystem.h> 50#include <Kernel/Heap/kmalloc.h> 51#include <Kernel/KBufferBuilder.h> 52#include <Kernel/KSyms.h> 53#include <Kernel/KernelInfoPage.h> 54#include <Kernel/Module.h> 55#include <Kernel/Multiboot.h> 56#include <Kernel/Net/Socket.h> 57#include <Kernel/PerformanceEventBuffer.h> 58#include <Kernel/Process.h> 59#include <Kernel/ProcessTracer.h> 60#include <Kernel/Profiling.h> 61#include <Kernel/RTC.h> 62#include <Kernel/Random.h> 63#include <Kernel/Scheduler.h> 64#include <Kernel/SharedBuffer.h> 65#include <Kernel/Syscall.h> 66#include <Kernel/TTY/MasterPTY.h> 67#include <Kernel/TTY/TTY.h> 68#include <Kernel/Thread.h> 69#include <Kernel/VM/InodeVMObject.h> 70#include <Kernel/VM/PageDirectory.h> 71#include <Kernel/VM/PurgeableVMObject.h> 72#include <LibBareMetal/IO.h> 73#include <LibBareMetal/Output/Console.h> 74#include <LibBareMetal/StdLib.h> 75#include <LibC/errno_numbers.h> 76#include <LibC/limits.h> 77#include <LibC/signal_numbers.h> 78#include <LibELF/ELFLoader.h> 79 80//#define PROCESS_DEBUG 81//#define DEBUG_POLL_SELECT 82//#define DEBUG_IO 83//#define TASK_DEBUG 84//#define FORK_DEBUG 85//#define EXEC_DEBUG 86//#define SIGNAL_DEBUG 87//#define SHARED_BUFFER_DEBUG 88 89namespace Kernel { 90 91static void create_signal_trampolines(); 92static void create_kernel_info_page(); 93 94Process* Process::current; 95 96static pid_t next_pid; 97InlineLinkedList<Process>* g_processes; 98static String* s_hostname; 99static Lock* s_hostname_lock; 100static VirtualAddress s_info_page_address_for_userspace; 101static VirtualAddress s_info_page_address_for_kernel; 102VirtualAddress g_return_to_ring3_from_signal_trampoline; 103HashMap<String, OwnPtr<Module>>* g_modules; 104 105pid_t Process::allocate_pid() 106{ 107 InterruptDisabler disabler; 108 return next_pid++; 109} 110 111void Process::initialize() 112{ 113 g_modules = new HashMap<String, OwnPtr<Module>>; 114 115 next_pid = 0; 116 g_processes = new InlineLinkedList<Process>; 117 s_hostname = new String("courage"); 118 s_hostname_lock = new Lock; 119 120 create_signal_trampolines(); 121 create_kernel_info_page(); 122} 123 124void Process::update_info_page_timestamp(const timeval& tv) 125{ 126 auto* info_page = (KernelInfoPage*)s_info_page_address_for_kernel.as_ptr(); 127 info_page->serial++; 128 const_cast<timeval&>(info_page->now) = tv; 129} 130 131Vector<pid_t> Process::all_pids() 132{ 133 Vector<pid_t> pids; 134 InterruptDisabler disabler; 135 pids.ensure_capacity((int)g_processes->size_slow()); 136 for (auto& process : *g_processes) 137 pids.append(process.pid()); 138 return pids; 139} 140 141Vector<Process*> Process::all_processes() 142{ 143 Vector<Process*> processes; 144 InterruptDisabler disabler; 145 processes.ensure_capacity((int)g_processes->size_slow()); 146 for (auto& process : *g_processes) 147 processes.append(&process); 148 return processes; 149} 150 151bool Process::in_group(gid_t gid) const 152{ 153 return m_gid == gid || m_extra_gids.contains(gid); 154} 155 156Range Process::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) 157{ 158 vaddr.mask(PAGE_MASK); 159 size = PAGE_ROUND_UP(size); 160 if (vaddr.is_null()) 161 return page_directory().range_allocator().allocate_anywhere(size, alignment); 162 return page_directory().range_allocator().allocate_specific(vaddr, size); 163} 164 165static unsigned prot_to_region_access_flags(int prot) 166{ 167 unsigned access = 0; 168 if (prot & PROT_READ) 169 access |= Region::Access::Read; 170 if (prot & PROT_WRITE) 171 access |= Region::Access::Write; 172 if (prot & PROT_EXEC) 173 access |= Region::Access::Execute; 174 return access; 175} 176 177Region& Process::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) 178{ 179 auto& region = add_region(Region::create_user_accessible(range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access())); 180 region.set_mmap(source_region.is_mmap()); 181 region.set_stack(source_region.is_stack()); 182 size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; 183 for (size_t i = 0; i < region.page_count(); ++i) { 184 if (source_region.should_cow(page_offset_in_source_region + i)) 185 region.set_should_cow(i, true); 186 } 187 return region; 188} 189 190Region* Process::allocate_region(const Range& range, const String& name, int prot, bool commit) 191{ 192 ASSERT(range.is_valid()); 193 auto& region = add_region(Region::create_user_accessible(range, name, prot_to_region_access_flags(prot))); 194 region.map(page_directory()); 195 if (commit) 196 region.commit(); 197 return &region; 198} 199 200Region* Process::allocate_region(VirtualAddress vaddr, size_t size, const String& name, int prot, bool commit) 201{ 202 auto range = allocate_range(vaddr, size); 203 if (!range.is_valid()) 204 return nullptr; 205 return allocate_region(range, name, prot, commit); 206} 207 208Region* Process::allocate_file_backed_region(VirtualAddress vaddr, size_t size, NonnullRefPtr<Inode> inode, const String& name, int prot) 209{ 210 auto range = allocate_range(vaddr, size); 211 if (!range.is_valid()) 212 return nullptr; 213 auto& region = add_region(Region::create_user_accessible(range, inode, name, prot_to_region_access_flags(prot))); 214 region.map(page_directory()); 215 return &region; 216} 217 218Region* Process::allocate_region_with_vmobject(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool user_accessible) 219{ 220 ASSERT(range.is_valid()); 221 size_t end_in_vmobject = offset_in_vmobject + range.size(); 222 if (end_in_vmobject <= offset_in_vmobject) { 223 dbgprintf("allocate_region_with_vmobject: Overflow (offset + size)\n"); 224 return nullptr; 225 } 226 if (offset_in_vmobject >= vmobject->size()) { 227 dbgprintf("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.\n"); 228 return nullptr; 229 } 230 if (end_in_vmobject > vmobject->size()) { 231 dbgprintf("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.\n"); 232 return nullptr; 233 } 234 offset_in_vmobject &= PAGE_MASK; 235 Region* region; 236 if (user_accessible) 237 region = &add_region(Region::create_user_accessible(range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot))); 238 else 239 region = &add_region(Region::create_kernel_only(range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot))); 240 region->map(page_directory()); 241 return region; 242} 243 244 245Region* Process::allocate_region_with_vmobject(VirtualAddress vaddr, size_t size, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool user_accessible) 246{ 247 auto range = allocate_range(vaddr, size); 248 if (!range.is_valid()) 249 return nullptr; 250 return allocate_region_with_vmobject(range, move(vmobject), offset_in_vmobject, name, prot, user_accessible); 251} 252 253bool Process::deallocate_region(Region& region) 254{ 255 InterruptDisabler disabler; 256 if (m_region_lookup_cache.region == &region) 257 m_region_lookup_cache.region = nullptr; 258 for (size_t i = 0; i < m_regions.size(); ++i) { 259 if (&m_regions[i] == &region) { 260 m_regions.unstable_remove(i); 261 return true; 262 } 263 } 264 return false; 265} 266 267Region* Process::region_from_range(const Range& range) 268{ 269 if (m_region_lookup_cache.range == range && m_region_lookup_cache.region) 270 return m_region_lookup_cache.region; 271 272 size_t size = PAGE_ROUND_UP(range.size()); 273 for (auto& region : m_regions) { 274 if (region.vaddr() == range.base() && region.size() == size) { 275 m_region_lookup_cache.range = range; 276 m_region_lookup_cache.region = region.make_weak_ptr(); 277 return &region; 278 } 279 } 280 return nullptr; 281} 282 283Region* Process::region_containing(const Range& range) 284{ 285 for (auto& region : m_regions) { 286 if (region.contains(range)) 287 return &region; 288 } 289 return nullptr; 290} 291 292int Process::sys$set_mmap_name(const Syscall::SC_set_mmap_name_params* user_params) 293{ 294 REQUIRE_PROMISE(stdio); 295 296 Syscall::SC_set_mmap_name_params params; 297 if (!validate_read_and_copy_typed(&params, user_params)) 298 return -EFAULT; 299 300 if (params.name.length > PATH_MAX) 301 return -ENAMETOOLONG; 302 303 auto name = validate_and_copy_string_from_user(params.name); 304 if (name.is_null()) 305 return -EFAULT; 306 307 auto* region = region_from_range({ VirtualAddress(params.addr), params.size }); 308 if (!region) 309 return -EINVAL; 310 if (!region->is_mmap()) 311 return -EPERM; 312 region->set_name(name); 313 return 0; 314} 315 316static bool validate_mmap_prot(int prot, bool map_stack) 317{ 318 bool readable = prot & PROT_READ; 319 bool writable = prot & PROT_WRITE; 320 bool executable = prot & PROT_EXEC; 321 322 if (writable && executable) 323 return false; 324 325 if (map_stack) { 326 if (executable) 327 return false; 328 if (!readable || !writable) 329 return false; 330 } 331 332 return true; 333} 334 335static bool validate_inode_mmap_prot(const Process& process, int prot, const Inode& inode) 336{ 337 auto metadata = inode.metadata(); 338 if ((prot & PROT_WRITE) && !metadata.may_write(process)) 339 return false; 340 if ((prot & PROT_READ) && !metadata.may_read(process)) 341 return false; 342 InterruptDisabler disabler; 343 if (inode.vmobject()) { 344 if ((prot & PROT_EXEC) && inode.vmobject()->writable_mappings()) 345 return false; 346 if ((prot & PROT_WRITE) && inode.vmobject()->executable_mappings()) 347 return false; 348 } 349 return true; 350} 351 352// Carve out a virtual address range from a region and return the two regions on either side 353Vector<Region*, 2> Process::split_region_around_range(const Region& source_region, const Range& desired_range) 354{ 355 Range old_region_range = source_region.range(); 356 auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); 357 358 ASSERT(!remaining_ranges_after_unmap.is_empty()); 359 auto make_replacement_region = [&](const Range& new_range) -> Region& { 360 ASSERT(new_range.base() >= old_region_range.base()); 361 ASSERT(new_range.end() <= old_region_range.end()); 362 size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); 363 return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); 364 }; 365 Vector<Region*, 2> new_regions; 366 for (auto& new_range : remaining_ranges_after_unmap) { 367 new_regions.unchecked_append(&make_replacement_region(new_range)); 368 } 369 return new_regions; 370} 371 372void* Process::sys$mmap(const Syscall::SC_mmap_params* user_params) 373{ 374 REQUIRE_PROMISE(stdio); 375 376 Syscall::SC_mmap_params params; 377 if (!validate_read_and_copy_typed(&params, user_params)) 378 return (void*)-EFAULT; 379 380 void* addr = (void*)params.addr; 381 size_t size = params.size; 382 size_t alignment = params.alignment; 383 int prot = params.prot; 384 int flags = params.flags; 385 int fd = params.fd; 386 int offset = params.offset; 387 388 if (alignment & ~PAGE_MASK) 389 return (void*)-EINVAL; 390 391 if (!is_user_range(VirtualAddress(addr), size)) 392 return (void*)-EFAULT; 393 394 String name; 395 if (params.name.characters) { 396 if (params.name.length > PATH_MAX) 397 return (void*)-ENAMETOOLONG; 398 name = validate_and_copy_string_from_user(params.name); 399 if (name.is_null()) 400 return (void*)-EFAULT; 401 } 402 403 if (size == 0) 404 return (void*)-EINVAL; 405 if ((uintptr_t)addr & ~PAGE_MASK) 406 return (void*)-EINVAL; 407 408 bool map_shared = flags & MAP_SHARED; 409 bool map_anonymous = flags & MAP_ANONYMOUS; 410 bool map_purgeable = flags & MAP_PURGEABLE; 411 bool map_private = flags & MAP_PRIVATE; 412 bool map_stack = flags & MAP_STACK; 413 bool map_fixed = flags & MAP_FIXED; 414 415 if (map_shared && map_private) 416 return (void*)-EINVAL; 417 418 if (!map_shared && !map_private) 419 return (void*)-EINVAL; 420 421 if (!validate_mmap_prot(prot, map_stack)) 422 return (void*)-EINVAL; 423 424 if (map_stack && (!map_private || !map_anonymous)) 425 return (void*)-EINVAL; 426 427 Region* region = nullptr; 428 429 auto range = allocate_range(VirtualAddress(addr), size, alignment); 430 if (!range.is_valid()) 431 return (void*)-ENOMEM; 432 433 if (map_purgeable) { 434 auto vmobject = PurgeableVMObject::create_with_size(size); 435 region = allocate_region_with_vmobject(range, vmobject, 0, !name.is_null() ? name : "mmap (purgeable)", prot); 436 if (!region && (!map_fixed && addr != 0)) 437 region = allocate_region_with_vmobject({}, size, vmobject, 0, !name.is_null() ? name : "mmap (purgeable)", prot); 438 } else if (map_anonymous) { 439 region = allocate_region(range, !name.is_null() ? name : "mmap", prot, false); 440 if (!region && (!map_fixed && addr != 0)) 441 region = allocate_region(allocate_range({}, size), !name.is_null() ? name : "mmap", prot, false); 442 } else { 443 if (offset < 0) 444 return (void*)-EINVAL; 445 if (static_cast<size_t>(offset) & ~PAGE_MASK) 446 return (void*)-EINVAL; 447 // FIXME: Implement MAP_PRIVATE for FileDescription-backed mmap 448 if (map_private) 449 return (void*)-ENOTSUP; 450 auto description = file_description(fd); 451 if (!description) 452 return (void*)-EBADF; 453 if (description->is_directory()) 454 return (void*)-ENODEV; 455 if ((prot & PROT_READ) && !description->is_readable()) 456 return (void*)-EACCES; 457 if ((prot & PROT_WRITE) && !description->is_writable()) 458 return (void*)-EACCES; 459 if (description->inode()) { 460 if (!validate_inode_mmap_prot(*this, prot, *description->inode())) 461 return (void*)-EACCES; 462 } 463 auto region_or_error = description->mmap(*this, VirtualAddress(addr), static_cast<size_t>(offset), size, prot); 464 if (region_or_error.is_error()) { 465 // Fail if MAP_FIXED or address is 0, retry otherwise 466 if (map_fixed || addr == 0) 467 return (void*)(int)region_or_error.error(); 468 region_or_error = description->mmap(*this, {}, static_cast<size_t>(offset), size, prot); 469 } 470 if (region_or_error.is_error()) 471 return (void*)(int)region_or_error.error(); 472 region = region_or_error.value(); 473 } 474 475 if (!region) 476 return (void*)-ENOMEM; 477 region->set_mmap(true); 478 if (map_shared) 479 region->set_shared(true); 480 if (map_stack) 481 region->set_stack(true); 482 if (!name.is_null()) 483 region->set_name(name); 484 return region->vaddr().as_ptr(); 485} 486 487int Process::sys$munmap(void* addr, size_t size) 488{ 489 REQUIRE_PROMISE(stdio); 490 491 if (!size) 492 return -EINVAL; 493 494 if (!is_user_range(VirtualAddress(addr), size)) 495 return -EFAULT; 496 497 Range range_to_unmap { VirtualAddress(addr), size }; 498 if (auto* whole_region = region_from_range(range_to_unmap)) { 499 if (!whole_region->is_mmap()) 500 return -EPERM; 501 bool success = deallocate_region(*whole_region); 502 ASSERT(success); 503 return 0; 504 } 505 506 if (auto* old_region = region_containing(range_to_unmap)) { 507 if (!old_region->is_mmap()) 508 return -EPERM; 509 510 auto new_regions = split_region_around_range(*old_region, range_to_unmap); 511 512 // We manually unmap the old region here, specifying that we *don't* want the VM deallocated. 513 old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); 514 deallocate_region(*old_region); 515 516 // Instead we give back the unwanted VM manually. 517 page_directory().range_allocator().deallocate(range_to_unmap); 518 519 // And finally we map the new region(s) using our page directory (they were just allocated and don't have one). 520 for (auto* new_region : new_regions) { 521 new_region->map(page_directory()); 522 } 523 return 0; 524 } 525 526 // FIXME: We should also support munmap() across multiple regions. (#175) 527 528 return -EINVAL; 529} 530 531int Process::sys$mprotect(void* addr, size_t size, int prot) 532{ 533 REQUIRE_PROMISE(stdio); 534 535 if (!size) 536 return -EINVAL; 537 538 if (!is_user_range(VirtualAddress(addr), size)) 539 return -EFAULT; 540 541 Range range_to_mprotect = { VirtualAddress(addr), size }; 542 543 if (auto* whole_region = region_from_range(range_to_mprotect)) { 544 if (!whole_region->is_mmap()) 545 return -EPERM; 546 if (!validate_mmap_prot(prot, whole_region->is_stack())) 547 return -EINVAL; 548 if (whole_region->access() == prot_to_region_access_flags(prot)) 549 return 0; 550 if (whole_region->vmobject().is_inode() 551 && !validate_inode_mmap_prot(*this, prot, static_cast<const InodeVMObject&>(whole_region->vmobject()).inode())) { 552 return -EACCES; 553 } 554 whole_region->set_readable(prot & PROT_READ); 555 whole_region->set_writable(prot & PROT_WRITE); 556 whole_region->set_executable(prot & PROT_EXEC); 557 whole_region->remap(); 558 return 0; 559 } 560 561 // Check if we can carve out the desired range from an existing region 562 if (auto* old_region = region_containing(range_to_mprotect)) { 563 if (!old_region->is_mmap()) 564 return -EPERM; 565 if (!validate_mmap_prot(prot, old_region->is_stack())) 566 return -EINVAL; 567 if (old_region->access() == prot_to_region_access_flags(prot)) 568 return 0; 569 if (old_region->vmobject().is_inode() 570 && !validate_inode_mmap_prot(*this, prot, static_cast<const InodeVMObject&>(old_region->vmobject()).inode())) { 571 return -EACCES; 572 } 573 574 // This vector is the region(s) adjacent to our range. 575 // We need to allocate a new region for the range we wanted to change permission bits on. 576 auto adjacent_regions = split_region_around_range(*old_region, range_to_mprotect); 577 578 size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get()); 579 auto& new_region = allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); 580 new_region.set_readable(prot & PROT_READ); 581 new_region.set_writable(prot & PROT_WRITE); 582 new_region.set_executable(prot & PROT_EXEC); 583 584 // Unmap the old region here, specifying that we *don't* want the VM deallocated. 585 old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); 586 deallocate_region(*old_region); 587 588 // Map the new regions using our page directory (they were just allocated and don't have one). 589 for (auto* adjacent_region : adjacent_regions) { 590 adjacent_region->map(page_directory()); 591 } 592 new_region.map(page_directory()); 593 return 0; 594 } 595 596 // FIXME: We should also support mprotect() across multiple regions. (#175) (#964) 597 598 return -EINVAL; 599} 600 601int Process::sys$madvise(void* address, size_t size, int advice) 602{ 603 REQUIRE_PROMISE(stdio); 604 605 if (!size) 606 return -EINVAL; 607 608 if (!is_user_range(VirtualAddress(address), size)) 609 return -EFAULT; 610 611 auto* region = region_from_range({ VirtualAddress(address), size }); 612 if (!region) 613 return -EINVAL; 614 if (!region->is_mmap()) 615 return -EPERM; 616 if ((advice & MADV_SET_VOLATILE) && (advice & MADV_SET_NONVOLATILE)) 617 return -EINVAL; 618 if (advice & MADV_SET_VOLATILE) { 619 if (!region->vmobject().is_purgeable()) 620 return -EPERM; 621 auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject()); 622 vmobject.set_volatile(true); 623 return 0; 624 } 625 if (advice & MADV_SET_NONVOLATILE) { 626 if (!region->vmobject().is_purgeable()) 627 return -EPERM; 628 auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject()); 629 if (!vmobject.is_volatile()) 630 return 0; 631 vmobject.set_volatile(false); 632 bool was_purged = vmobject.was_purged(); 633 vmobject.set_was_purged(false); 634 return was_purged ? 1 : 0; 635 } 636 if (advice & MADV_GET_VOLATILE) { 637 if (!region->vmobject().is_purgeable()) 638 return -EPERM; 639 auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject()); 640 return vmobject.is_volatile() ? 0 : 1; 641 } 642 return -EINVAL; 643} 644 645int Process::sys$purge(int mode) 646{ 647 REQUIRE_NO_PROMISES; 648 if (!is_superuser()) 649 return -EPERM; 650 int purged_page_count = 0; 651 if (mode & PURGE_ALL_VOLATILE) { 652 NonnullRefPtrVector<PurgeableVMObject> vmobjects; 653 { 654 InterruptDisabler disabler; 655 MM.for_each_vmobject([&](auto& vmobject) { 656 if (vmobject.is_purgeable()) 657 vmobjects.append(static_cast<PurgeableVMObject&>(vmobject)); 658 return IterationDecision::Continue; 659 }); 660 } 661 for (auto& vmobject : vmobjects) { 662 purged_page_count += vmobject.purge(); 663 } 664 } 665 if (mode & PURGE_ALL_CLEAN_INODE) { 666 NonnullRefPtrVector<InodeVMObject> vmobjects; 667 { 668 InterruptDisabler disabler; 669 MM.for_each_vmobject([&](auto& vmobject) { 670 if (vmobject.is_inode()) 671 vmobjects.append(static_cast<InodeVMObject&>(vmobject)); 672 return IterationDecision::Continue; 673 }); 674 } 675 for (auto& vmobject : vmobjects) { 676 purged_page_count += vmobject.release_all_clean_pages(); 677 } 678 } 679 return purged_page_count; 680} 681 682int Process::sys$gethostname(char* buffer, ssize_t size) 683{ 684 REQUIRE_PROMISE(stdio); 685 if (size < 0) 686 return -EINVAL; 687 if (!validate_write(buffer, size)) 688 return -EFAULT; 689 LOCKER(*s_hostname_lock); 690 if ((size_t)size < (s_hostname->length() + 1)) 691 return -ENAMETOOLONG; 692 copy_to_user(buffer, s_hostname->characters(), s_hostname->length() + 1); 693 return 0; 694} 695 696pid_t Process::sys$fork(RegisterState& regs) 697{ 698 REQUIRE_PROMISE(proc); 699 Thread* child_first_thread = nullptr; 700 auto* child = new Process(child_first_thread, m_name, m_uid, m_gid, m_pid, m_ring, m_cwd, m_executable, m_tty, this); 701 child->m_root_directory = m_root_directory; 702 child->m_root_directory_relative_to_global_root = m_root_directory_relative_to_global_root; 703 child->m_promises = m_promises; 704 child->m_execpromises = m_execpromises; 705 child->m_veil_state = m_veil_state; 706 child->m_unveiled_paths = m_unveiled_paths; 707 child->m_fds = m_fds; 708 child->m_sid = m_sid; 709 child->m_pgid = m_pgid; 710 child->m_umask = m_umask; 711 712#ifdef FORK_DEBUG 713 dbgprintf("fork: child=%p\n", child); 714#endif 715 716 for (auto& region : m_regions) { 717#ifdef FORK_DEBUG 718 dbg() << "fork: cloning Region{" << &region << "} '" << region.name() << "' @ " << region.vaddr(); 719#endif 720 auto& child_region = child->add_region(region.clone()); 721 child_region.map(child->page_directory()); 722 723 if (&region == m_master_tls_region) 724 child->m_master_tls_region = &child_region; 725 } 726 727 child->m_extra_gids = m_extra_gids; 728 729 auto& child_tss = child_first_thread->m_tss; 730 child_tss.eax = 0; // fork() returns 0 in the child :^) 731 child_tss.ebx = regs.ebx; 732 child_tss.ecx = regs.ecx; 733 child_tss.edx = regs.edx; 734 child_tss.ebp = regs.ebp; 735 child_tss.esp = regs.userspace_esp; 736 child_tss.esi = regs.esi; 737 child_tss.edi = regs.edi; 738 child_tss.eflags = regs.eflags; 739 child_tss.eip = regs.eip; 740 child_tss.cs = regs.cs; 741 child_tss.ds = regs.ds; 742 child_tss.es = regs.es; 743 child_tss.fs = regs.fs; 744 child_tss.gs = regs.gs; 745 child_tss.ss = regs.userspace_ss; 746 747#ifdef FORK_DEBUG 748 dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x, kstack %w:%x\n", child_tss.cs, child_tss.eip, child_tss.ss, child_tss.esp, child_tss.ss0, child_tss.esp0); 749#endif 750 751 { 752 InterruptDisabler disabler; 753 g_processes->prepend(child); 754 } 755#ifdef TASK_DEBUG 756 kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child_tss.eip); 757#endif 758 759 child_first_thread->set_state(Thread::State::Skip1SchedulerPass); 760 return child->pid(); 761} 762 763int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment, RefPtr<FileDescription> interpreter_description) 764{ 765 ASSERT(is_ring3()); 766 auto path = main_program_description->absolute_path(); 767 dbg() << "do_exec(" << path << ")"; 768 // FIXME(Thread): Kill any threads the moment we commit to the exec(). 769 if (thread_count() != 1) { 770 dbgprintf("Gonna die because I have many threads! These are the threads:\n"); 771 for_each_thread([](Thread& thread) { 772 dbgprintf("Thread{%p}: TID=%d, PID=%d\n", &thread, thread.tid(), thread.pid()); 773 return IterationDecision::Continue; 774 }); 775 ASSERT(thread_count() == 1); 776 ASSERT_NOT_REACHED(); 777 } 778 779 size_t total_blob_size = 0; 780 for (auto& a : arguments) 781 total_blob_size += a.length() + 1; 782 for (auto& e : environment) 783 total_blob_size += e.length() + 1; 784 785 size_t total_meta_size = sizeof(char*) * (arguments.size() + 1) + sizeof(char*) * (environment.size() + 1); 786 787 // FIXME: How much stack space does process startup need? 788 if ((total_blob_size + total_meta_size) >= Thread::default_userspace_stack_size) 789 return -E2BIG; 790 791 auto parts = path.split('/'); 792 if (parts.is_empty()) 793 return -ENOENT; 794 795 RefPtr<InodeVMObject> vmobject; 796 if (interpreter_description) { 797 vmobject = InodeVMObject::create_with_inode(*interpreter_description->inode()); 798 } else { 799 vmobject = InodeVMObject::create_with_inode(*main_program_description->inode()); 800 } 801 802 if (static_cast<const InodeVMObject&>(*vmobject).writable_mappings()) { 803 dbg() << "Refusing to execute a write-mapped program"; 804 return -ETXTBSY; 805 } 806 807 // Disable profiling temporarily in case it's running on this process. 808 bool was_profiling = is_profiling(); 809 TemporaryChange profiling_disabler(m_profiling, false); 810 811 auto old_page_directory = move(m_page_directory); 812 auto old_regions = move(m_regions); 813 m_page_directory = PageDirectory::create_for_userspace(*this); 814#ifdef MM_DEBUG 815 dbgprintf("Process %u exec: PD=%x created\n", pid(), m_page_directory.ptr()); 816#endif 817 818 MM.enter_process_paging_scope(*this); 819 820 Region* region { nullptr }; 821 822 InodeMetadata loader_metadata; 823 824 // FIXME: Hoooo boy this is a hack if I ever saw one. 825 // This is the 'random' offset we're giving to our ET_DYN exectuables to start as. 826 // It also happens to be the static Virtual Addresss offset every static exectuable gets :) 827 // Without this, some assumptions by the ELF loading hooks below are severely broken. 828 // 0x08000000 is a verified random number chosen by random dice roll https://xkcd.com/221/ 829 u32 totally_random_offset = interpreter_description ? 0x08000000 : 0; 830 831 // FIXME: We should be able to load both the PT_INTERP interpreter and the main program... once the RTLD is smart enough 832 if (interpreter_description) { 833 loader_metadata = interpreter_description->metadata(); 834 region = allocate_region_with_vmobject(VirtualAddress(), loader_metadata.size, *vmobject, 0, interpreter_description->absolute_path(), PROT_READ, false); 835 // we don't need the interpreter file desciption after we've loaded (or not) it into memory 836 interpreter_description = nullptr; 837 } else { 838 loader_metadata = main_program_description->metadata(); 839 region = allocate_region_with_vmobject(VirtualAddress(), loader_metadata.size, *vmobject, 0, main_program_description->absolute_path(), PROT_READ, false); 840 } 841 842 ASSERT(region); 843 844 Region* master_tls_region { nullptr }; 845 size_t master_tls_size = 0; 846 size_t master_tls_alignment = 0; 847 u32 entry_eip = 0; 848 849 OwnPtr<ELFLoader> loader; 850 { 851 ArmedScopeGuard rollback_regions_guard([&]() { 852 ASSERT(Process::current == this); 853 m_page_directory = move(old_page_directory); 854 m_regions = move(old_regions); 855 MM.enter_process_paging_scope(*this); 856 }); 857 858 loader = make<ELFLoader>(region->vaddr().as_ptr(), loader_metadata.size); 859 860 // Load the correct executable -- either interp or main program. 861 // FIXME: Once we actually load both interp and main, we'll need to be more clever about this. 862 // In that case, both will be ET_DYN objects, so they'll both be completely relocatable. 863 // That means, we can put them literally anywhere in User VM space (ASLR anyone?). 864 // ALSO FIXME: Reminder to really really fix that 'totally random offset' business. 865 loader->map_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, size_t offset_in_image, bool is_readable, bool is_writable, bool is_executable, const String& name) -> u8* { 866 ASSERT(size); 867 ASSERT(alignment == PAGE_SIZE); 868 int prot = 0; 869 if (is_readable) 870 prot |= PROT_READ; 871 if (is_writable) 872 prot |= PROT_WRITE; 873 if (is_executable) 874 prot |= PROT_EXEC; 875 if (auto* region = allocate_region_with_vmobject(vaddr.offset(totally_random_offset), size, *vmobject, offset_in_image, String(name), prot)) 876 return region->vaddr().as_ptr(); 877 return nullptr; 878 }; 879 loader->alloc_section_hook = [&](VirtualAddress vaddr, size_t size, size_t alignment, bool is_readable, bool is_writable, const String& name) -> u8* { 880 ASSERT(size); 881 ASSERT(alignment == PAGE_SIZE); 882 int prot = 0; 883 if (is_readable) 884 prot |= PROT_READ; 885 if (is_writable) 886 prot |= PROT_WRITE; 887 if (auto* region = allocate_region(vaddr.offset(totally_random_offset), size, String(name), prot)) 888 return region->vaddr().as_ptr(); 889 return nullptr; 890 }; 891 892 // FIXME: Move TLS region allocation to userspace: LibC and the dynamic loader. 893 // LibC if we end up with a statically linked executable, and the 894 // dynamic loader so that it can create new TLS blocks for each shared libarary 895 // that gets loaded as part of DT_NEEDED processing, and via dlopen() 896 // If that doesn't happen quickly, at least pass the location of the TLS region 897 // some ELF Auxilliary Vector so the loader can use it/create new ones as necessary. 898 loader->tls_section_hook = [&](size_t size, size_t alignment) { 899 ASSERT(size); 900 master_tls_region = allocate_region({}, size, String(), PROT_READ | PROT_WRITE); 901 master_tls_size = size; 902 master_tls_alignment = alignment; 903 return master_tls_region->vaddr().as_ptr(); 904 }; 905 bool success = loader->load(); 906 if (!success) { 907 kprintf("do_exec: Failure loading %s\n", path.characters()); 908 return -ENOEXEC; 909 } 910 // FIXME: Validate that this virtual address is within executable region, 911 // instead of just non-null. You could totally have a DSO with entry point of 912 // the beginning of the text segement. 913 if (!loader->entry().offset(totally_random_offset).get()) { 914 kprintf("do_exec: Failure loading %s, entry pointer is invalid! (%p)\n", path.characters(), loader->entry().offset(totally_random_offset).get()); 915 return -ENOEXEC; 916 } 917 918 rollback_regions_guard.disarm(); 919 920 // NOTE: At this point, we've committed to the new executable. 921 entry_eip = loader->entry().offset(totally_random_offset).get(); 922 923#ifdef EXEC_DEBUG 924 kprintf("Memory layout after ELF load:"); 925 dump_regions(); 926#endif 927 } 928 929 m_elf_loader = move(loader); 930 m_executable = main_program_description->custody(); 931 932 m_promises = m_execpromises; 933 934 m_veil_state = VeilState::None; 935 m_unveiled_paths.clear(); 936 937 // Copy of the master TLS region that we will clone for new threads 938 m_master_tls_region = master_tls_region; 939 940 auto main_program_metadata = main_program_description->metadata(); 941 942 if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { 943 if (main_program_metadata.is_setuid()) 944 m_euid = main_program_metadata.uid; 945 if (main_program_metadata.is_setgid()) 946 m_egid = main_program_metadata.gid; 947 } 948 949 Thread::current->set_default_signal_dispositions(); 950 Thread::current->m_signal_mask = 0; 951 Thread::current->m_pending_signals = 0; 952 953 m_futex_queues.clear(); 954 955 m_region_lookup_cache = {}; 956 957 disown_all_shared_buffers(); 958 959 for (size_t i = 0; i < m_fds.size(); ++i) { 960 auto& daf = m_fds[i]; 961 if (daf.description && daf.flags & FD_CLOEXEC) { 962 daf.description->close(); 963 daf = {}; 964 } 965 } 966 967 Thread* new_main_thread = nullptr; 968 if (Process::current == this) { 969 new_main_thread = Thread::current; 970 } else { 971 for_each_thread([&](auto& thread) { 972 new_main_thread = &thread; 973 return IterationDecision::Break; 974 }); 975 } 976 ASSERT(new_main_thread); 977 978 // NOTE: We create the new stack before disabling interrupts since it will zero-fault 979 // and we don't want to deal with faults after this point. 980 u32 new_userspace_esp = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment)); 981 982 // We cli() manually here because we don't want to get interrupted between do_exec() and Schedule::yield(). 983 // The reason is that the task redirection we've set up above will be clobbered by the timer IRQ. 984 // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec(). 985 if (Process::current == this) 986 cli(); 987 988 // NOTE: Be careful to not trigger any page faults below! 989 990 Scheduler::prepare_to_modify_tss(*new_main_thread); 991 992 m_name = parts.take_last(); 993 new_main_thread->set_name(m_name); 994 995 auto& tss = new_main_thread->m_tss; 996 997 u32 old_esp0 = tss.esp0; 998 999 m_master_tls_size = master_tls_size; 1000 m_master_tls_alignment = master_tls_alignment; 1001 1002 new_main_thread->make_thread_specific_region({}); 1003 new_main_thread->reset_fpu_state(); 1004 1005 memset(&tss, 0, sizeof(TSS32)); 1006 tss.iomapbase = sizeof(TSS32); 1007 1008 tss.eflags = 0x0202; 1009 tss.eip = entry_eip; 1010 tss.cs = 0x1b; 1011 tss.ds = 0x23; 1012 tss.es = 0x23; 1013 tss.fs = 0x23; 1014 tss.gs = thread_specific_selector() | 3; 1015 tss.ss = 0x23; 1016 tss.cr3 = page_directory().cr3(); 1017 tss.esp = new_userspace_esp; 1018 tss.ss0 = 0x10; 1019 tss.esp0 = old_esp0; 1020 tss.ss2 = m_pid; 1021 1022#ifdef TASK_DEBUG 1023 kprintf("Process %u (%s) exec'd %s @ %p\n", pid(), name().characters(), path.characters(), tss.eip); 1024#endif 1025 1026 if (was_profiling) 1027 Profiling::did_exec(path); 1028 1029 new_main_thread->set_state(Thread::State::Skip1SchedulerPass); 1030 big_lock().force_unlock_if_locked(); 1031 return 0; 1032} 1033 1034static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread) 1035{ 1036 int word_start = 2; 1037 int word_length = 0; 1038 if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') { 1039 Vector<String> interpreter_words; 1040 1041 for (int i = 2; i < nread; ++i) { 1042 if (first_page[i] == '\n') { 1043 break; 1044 } 1045 1046 if (first_page[i] != ' ') { 1047 ++word_length; 1048 } 1049 1050 if (first_page[i] == ' ') { 1051 if (word_length > 0) { 1052 interpreter_words.append(String(&first_page[word_start], word_length)); 1053 } 1054 word_length = 0; 1055 word_start = i + 1; 1056 } 1057 } 1058 1059 if (word_length > 0) 1060 interpreter_words.append(String(&first_page[word_start], word_length)); 1061 1062 if (!interpreter_words.is_empty()) 1063 return interpreter_words; 1064 } 1065 1066 return KResult(-ENOEXEC); 1067} 1068 1069KResultOr<NonnullRefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, char (&first_page)[PAGE_SIZE], int nread, size_t file_size) 1070{ 1071 if (nread < (int)sizeof(Elf32_Ehdr)) 1072 return KResult(-ENOEXEC); 1073 1074 auto elf_header = (Elf32_Ehdr*)first_page; 1075 if (!ELFImage::validate_elf_header(*elf_header, file_size)) { 1076 dbgprintf("%s(%d) exec(%s): File has invalid ELF header\n", m_name.characters(), m_pid, path.characters()); 1077 return KResult(-ENOEXEC); 1078 } 1079 1080 // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD 1081 String interpreter_path; 1082 if (!ELFImage::validate_program_headers(*elf_header, file_size, (u8*)first_page, nread, interpreter_path)) { 1083 dbgprintf("%s(%d) exec(%s): File has invalid ELF Program headers\n", m_name.characters(), m_pid, path.characters()); 1084 return KResult(-ENOEXEC); 1085 } 1086 1087 if (!interpreter_path.is_empty()) { 1088 // Programs with an interpreter better be relocatable executables or we don't know what to do... 1089 if (elf_header->e_type != ET_DYN) 1090 return KResult(-ENOEXEC); 1091 1092 dbgprintf("%s(%d) exec(%s): Using program interpreter %s\n", m_name.characters(), m_pid, path.characters(), interpreter_path.characters()); 1093 auto interp_result = VFS::the().open(interpreter_path, O_EXEC, 0, current_directory()); 1094 if (interp_result.is_error()) { 1095 dbgprintf("%s(%d) exec(%s): Unable to open program interpreter %s\n", m_name.characters(), m_pid, path.characters(), interpreter_path.characters()); 1096 return interp_result.error(); 1097 } 1098 auto interpreter_description = interp_result.value(); 1099 auto interp_metadata = interpreter_description->metadata(); 1100 1101 ASSERT(interpreter_description->inode()); 1102 1103 // Validate the program interpreter as a valid elf binary. 1104 // If your program interpreter is a #! file or something, it's time to stop playing games :) 1105 if (interp_metadata.size < (int)sizeof(Elf32_Ehdr)) 1106 return KResult(-ENOEXEC); 1107 1108 memset(first_page, 0, sizeof(first_page)); 1109 nread = interpreter_description->read((u8*)&first_page, sizeof(first_page)); 1110 1111 if (nread < (int)sizeof(Elf32_Ehdr)) 1112 return KResult(-ENOEXEC); 1113 1114 elf_header = (Elf32_Ehdr*)first_page; 1115 if (!ELFImage::validate_elf_header(*elf_header, interp_metadata.size)) { 1116 dbgprintf("%s(%d) exec(%s): Interpreter (%s) has invalid ELF header\n", m_name.characters(), m_pid, path.characters(), interpreter_description->absolute_path().characters()); 1117 return KResult(-ENOEXEC); 1118 } 1119 1120 // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD 1121 String interpreter_interpreter_path; 1122 if (!ELFImage::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, interpreter_interpreter_path)) { 1123 dbgprintf("%s(%d) exec(%s): Interpreter (%s) has invalid ELF Program headers\n", m_name.characters(), m_pid, path.characters(), interpreter_description->absolute_path().characters()); 1124 return KResult(-ENOEXEC); 1125 } 1126 1127 if (!interpreter_interpreter_path.is_empty()) { 1128 dbgprintf("%s(%d) exec(%s): Interpreter (%s) has its own interpreter (%s)! No thank you!\n", 1129 m_name.characters(), m_pid, path.characters(), interpreter_description->absolute_path().characters(), interpreter_interpreter_path.characters()); 1130 return KResult(-ELOOP); 1131 } 1132 1133 return interpreter_description; 1134 } 1135 1136 if (elf_header->e_type != ET_EXEC) { 1137 // We can't exec an ET_REL, that's just an object file from the compiler 1138 // If it's ET_DYN with no PT_INTERP, then we can't load it properly either 1139 return KResult(-ENOEXEC); 1140 } 1141 1142 // No interpreter, but, path refers to a valid elf image 1143 return KResult(KSuccess); 1144} 1145 1146int Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth) 1147{ 1148 if (recursion_depth > 2) { 1149 dbgprintf("%s(%d) exec(%s): SHENANIGANS! recursed too far trying to find #! interpreter\n", m_name.characters(), m_pid, path.characters()); 1150 return -ELOOP; 1151 } 1152 1153 // Open the file to check what kind of binary format it is 1154 // Currently supported formats: 1155 // - #! interpreted file 1156 // - ELF32 1157 // * ET_EXEC binary that just gets loaded 1158 // * ET_DYN binary that requires a program interpreter 1159 // 1160 auto result = VFS::the().open(path, O_EXEC, 0, current_directory()); 1161 if (result.is_error()) 1162 return result.error(); 1163 auto description = result.value(); 1164 auto metadata = description->metadata(); 1165 1166 // Always gonna need at least 3 bytes. these are for #!X 1167 if (metadata.size < 3) 1168 return -ENOEXEC; 1169 1170 ASSERT(description->inode()); 1171 1172 // Read the first page of the program into memory so we can validate the binfmt of it 1173 char first_page[PAGE_SIZE]; 1174 int nread = description->read((u8*)&first_page, sizeof(first_page)); 1175 1176 // 1) #! interpreted file 1177 auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread); 1178 if (!shebang_result.is_error()) { 1179 Vector<String> new_arguments(shebang_result.value()); 1180 1181 new_arguments.append(path); 1182 1183 arguments.remove(0); 1184 new_arguments.append(move(arguments)); 1185 1186 return exec(shebang_result.value().first(), move(new_arguments), move(environment), ++recursion_depth); 1187 } 1188 1189 // #2) ELF32 for i386 1190 auto elf_result = find_elf_interpreter_for_executable(path, first_page, nread, metadata.size); 1191 RefPtr<FileDescription> interpreter_description; 1192 // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out) 1193 if (!elf_result.is_error()) 1194 interpreter_description = elf_result.value(); 1195 else if (elf_result.error().is_error()) 1196 return elf_result.error(); 1197 1198 // The bulk of exec() is done by do_exec(), which ensures that all locals 1199 // are cleaned up by the time we yield-teleport below. 1200 int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description)); 1201 if (rc < 0) 1202 return rc; 1203 1204 if (Process::current == this) { 1205 Scheduler::yield(); 1206 ASSERT_NOT_REACHED(); 1207 } 1208 return 0; 1209} 1210 1211int Process::sys$execve(const Syscall::SC_execve_params* user_params) 1212{ 1213 REQUIRE_PROMISE(exec); 1214 // NOTE: Be extremely careful with allocating any kernel memory in exec(). 1215 // On success, the kernel stack will be lost. 1216 Syscall::SC_execve_params params; 1217 if (!validate_read_and_copy_typed(&params, user_params)) 1218 return -EFAULT; 1219 1220 if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX) 1221 return -E2BIG; 1222 1223 String path; 1224 { 1225 auto path_arg = get_syscall_path_argument(params.path); 1226 if (path_arg.is_error()) 1227 return path_arg.error(); 1228 path = path_arg.value(); 1229 } 1230 1231 auto copy_user_strings = [&](const auto& list, auto& output) { 1232 if (!list.length) 1233 return true; 1234 if (!validate_read_typed(list.strings, list.length)) 1235 return false; 1236 Vector<Syscall::StringArgument, 32> strings; 1237 strings.resize(list.length); 1238 copy_from_user(strings.data(), list.strings, list.length * sizeof(Syscall::StringArgument)); 1239 for (size_t i = 0; i < list.length; ++i) { 1240 auto string = validate_and_copy_string_from_user(strings[i]); 1241 if (string.is_null()) 1242 return false; 1243 output.append(move(string)); 1244 } 1245 return true; 1246 }; 1247 1248 Vector<String> arguments; 1249 if (!copy_user_strings(params.arguments, arguments)) 1250 return -EFAULT; 1251 1252 Vector<String> environment; 1253 if (!copy_user_strings(params.environment, environment)) 1254 return -EFAULT; 1255 1256 int rc = exec(move(path), move(arguments), move(environment)); 1257 ASSERT(rc < 0); // We should never continue after a successful exec! 1258 return rc; 1259} 1260 1261Process* Process::create_user_process(Thread*& first_thread, const String& path, uid_t uid, gid_t gid, pid_t parent_pid, int& error, Vector<String>&& arguments, Vector<String>&& environment, TTY* tty) 1262{ 1263 auto parts = path.split('/'); 1264 if (arguments.is_empty()) { 1265 arguments.append(parts.last()); 1266 } 1267 RefPtr<Custody> cwd; 1268 RefPtr<Custody> root; 1269 { 1270 InterruptDisabler disabler; 1271 if (auto* parent = Process::from_pid(parent_pid)) { 1272 cwd = parent->m_cwd; 1273 root = parent->m_root_directory; 1274 } 1275 } 1276 1277 if (!cwd) 1278 cwd = VFS::the().root_custody(); 1279 1280 if (!root) 1281 root = VFS::the().root_custody(); 1282 1283 auto* process = new Process(first_thread, parts.take_last(), uid, gid, parent_pid, Ring3, move(cwd), nullptr, tty); 1284 process->m_fds.resize(m_max_open_file_descriptors); 1285 auto& device_to_use_as_tty = tty ? (CharacterDevice&)*tty : NullDevice::the(); 1286 auto description = device_to_use_as_tty.open(O_RDWR).value(); 1287 process->m_fds[0].set(*description); 1288 process->m_fds[1].set(*description); 1289 process->m_fds[2].set(*description); 1290 1291 error = process->exec(path, move(arguments), move(environment)); 1292 if (error != 0) { 1293 delete process; 1294 return nullptr; 1295 } 1296 1297 { 1298 InterruptDisabler disabler; 1299 g_processes->prepend(process); 1300 } 1301#ifdef TASK_DEBUG 1302 kprintf("Process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), first_thread->tss().eip); 1303#endif 1304 error = 0; 1305 return process; 1306} 1307 1308Process* Process::create_kernel_process(Thread*& first_thread, String&& name, void (*e)()) 1309{ 1310 auto* process = new Process(first_thread, move(name), (uid_t)0, (gid_t)0, (pid_t)0, Ring0); 1311 first_thread->tss().eip = (uintptr_t)e; 1312 1313 if (process->pid() != 0) { 1314 InterruptDisabler disabler; 1315 g_processes->prepend(process); 1316#ifdef TASK_DEBUG 1317 kprintf("Kernel process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), first_thread->tss().eip); 1318#endif 1319 } 1320 1321 first_thread->set_state(Thread::State::Runnable); 1322 return process; 1323} 1324 1325Process::Process(Thread*& first_thread, const String& name, uid_t uid, gid_t gid, pid_t ppid, RingLevel ring, RefPtr<Custody> cwd, RefPtr<Custody> executable, TTY* tty, Process* fork_parent) 1326 : m_name(move(name)) 1327 , m_pid(allocate_pid()) 1328 , m_uid(uid) 1329 , m_gid(gid) 1330 , m_euid(uid) 1331 , m_egid(gid) 1332 , m_ring(ring) 1333 , m_executable(move(executable)) 1334 , m_cwd(move(cwd)) 1335 , m_tty(tty) 1336 , m_ppid(ppid) 1337{ 1338#ifdef PROCESS_DEBUG 1339 dbg() << "Created new process " << m_name << "(" << m_pid << ")"; 1340#endif 1341 1342 m_page_directory = PageDirectory::create_for_userspace(*this, fork_parent ? &fork_parent->page_directory().range_allocator() : nullptr); 1343#ifdef MM_DEBUG 1344 dbgprintf("Process %u ctor: PD=%x created\n", pid(), m_page_directory.ptr()); 1345#endif 1346 1347 if (fork_parent) { 1348 // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process. 1349 first_thread = Thread::current->clone(*this); 1350 } else { 1351 // NOTE: This non-forked code path is only taken when the kernel creates a process "manually" (at boot.) 1352 first_thread = new Thread(*this); 1353 } 1354} 1355 1356Process::~Process() 1357{ 1358 ASSERT(thread_count() == 0); 1359} 1360 1361void Process::dump_regions() 1362{ 1363 kprintf("Process %s(%u) regions:\n", name().characters(), pid()); 1364 kprintf("BEGIN END SIZE ACCESS NAME\n"); 1365 for (auto& region : m_regions) { 1366 kprintf("%08x -- %08x %08x %c%c%c%c%c%c %s\n", 1367 region.vaddr().get(), 1368 region.vaddr().offset(region.size() - 1).get(), 1369 region.size(), 1370 region.is_readable() ? 'R' : ' ', 1371 region.is_writable() ? 'W' : ' ', 1372 region.is_executable() ? 'X' : ' ', 1373 region.is_shared() ? 'S' : ' ', 1374 region.is_stack() ? 'T' : ' ', 1375 region.vmobject().is_purgeable() ? 'P' : ' ', 1376 region.name().characters()); 1377 } 1378 1379 MM.dump_kernel_regions(); 1380} 1381 1382void Process::sys$exit(int status) 1383{ 1384 cli(); 1385#ifdef TASK_DEBUG 1386 kprintf("sys$exit: %s(%u) exit with status %d\n", name().characters(), pid(), status); 1387#endif 1388 1389 if (status != 0) 1390 dump_backtrace(); 1391 1392 m_termination_status = status; 1393 m_termination_signal = 0; 1394 die(); 1395 Thread::current->die_if_needed(); 1396 ASSERT_NOT_REACHED(); 1397} 1398 1399void signal_trampoline_dummy(void) 1400{ 1401 // The trampoline preserves the current eax, pushes the signal code and 1402 // then calls the signal handler. We do this because, when interrupting a 1403 // blocking syscall, that syscall may return some special error code in eax; 1404 // This error code would likely be overwritten by the signal handler, so it's 1405 // neccessary to preserve it here. 1406 asm( 1407 ".intel_syntax noprefix\n" 1408 "asm_signal_trampoline:\n" 1409 "push ebp\n" 1410 "mov ebp, esp\n" 1411 "push eax\n" // we have to store eax 'cause it might be the return value from a syscall 1412 "sub esp, 4\n" // align the stack to 16 bytes 1413 "mov eax, [ebp+12]\n" // push the signal code 1414 "push eax\n" 1415 "call [ebp+8]\n" // call the signal handler 1416 "add esp, 8\n" 1417 "mov eax, %P0\n" 1418 "int 0x82\n" // sigreturn syscall 1419 "asm_signal_trampoline_end:\n" 1420 ".att_syntax" ::"i"(Syscall::SC_sigreturn)); 1421} 1422 1423extern "C" void asm_signal_trampoline(void); 1424extern "C" void asm_signal_trampoline_end(void); 1425 1426void create_signal_trampolines() 1427{ 1428 InterruptDisabler disabler; 1429 // NOTE: We leak this region. 1430 auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines", Region::Access::Read | Region::Access::Write | Region::Access::Execute, false).leak_ptr(); 1431 g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr(); 1432 1433 u8* trampoline = (u8*)asm_signal_trampoline; 1434 u8* trampoline_end = (u8*)asm_signal_trampoline_end; 1435 size_t trampoline_size = trampoline_end - trampoline; 1436 1437 { 1438 SmapDisabler disabler; 1439 u8* code_ptr = (u8*)trampoline_region->vaddr().as_ptr(); 1440 memcpy(code_ptr, trampoline, trampoline_size); 1441 } 1442 1443 trampoline_region->set_writable(false); 1444 trampoline_region->remap(); 1445} 1446 1447void create_kernel_info_page() 1448{ 1449 auto* info_page_region_for_userspace = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Kernel info page", Region::Access::Read).leak_ptr(); 1450 auto* info_page_region_for_kernel = MM.allocate_kernel_region_with_vmobject(info_page_region_for_userspace->vmobject(), PAGE_SIZE, "Kernel info page", Region::Access::Read | Region::Access::Write).leak_ptr(); 1451 s_info_page_address_for_userspace = info_page_region_for_userspace->vaddr(); 1452 s_info_page_address_for_kernel = info_page_region_for_kernel->vaddr(); 1453 memset(s_info_page_address_for_kernel.as_ptr(), 0, PAGE_SIZE); 1454} 1455 1456int Process::sys$sigreturn(RegisterState& registers) 1457{ 1458 REQUIRE_PROMISE(stdio); 1459 SmapDisabler disabler; 1460 1461 //Here, we restore the state pushed by dispatch signal and asm_signal_trampoline. 1462 u32* stack_ptr = (u32*)registers.userspace_esp; 1463 u32 smuggled_eax = *stack_ptr; 1464 1465 //pop the stored eax, ebp, return address, handler and signal code 1466 stack_ptr += 5; 1467 1468 Thread::current->m_signal_mask = *stack_ptr; 1469 stack_ptr++; 1470 1471 //pop edi, esi, ebp, esp, ebx, edx, ecx and eax 1472 memcpy(&registers.edi, stack_ptr, 8 * sizeof(uintptr_t)); 1473 stack_ptr += 8; 1474 1475 registers.eip = *stack_ptr; 1476 stack_ptr++; 1477 1478 registers.eflags = *stack_ptr; 1479 stack_ptr++; 1480 1481 registers.userspace_esp = registers.esp; 1482 return smuggled_eax; 1483} 1484 1485void Process::crash(int signal, u32 eip) 1486{ 1487 ASSERT_INTERRUPTS_DISABLED(); 1488 ASSERT(!is_dead()); 1489 ASSERT(Process::current == this); 1490 1491 if (eip >= 0xc0000000 && ksyms_ready) { 1492 auto* ksym = ksymbolicate(eip); 1493 dbgprintf("\033[31;1m%p %s +%d\033[0m\n", eip, ksym ? demangle(ksym->name).characters() : "(k?)", ksym ? eip - ksym->address : 0); 1494 } else if (m_elf_loader) { 1495 dbgprintf("\033[31;1m%p %s\033[0m\n", eip, m_elf_loader->symbolicate(eip).characters()); 1496 } else { 1497 dbgprintf("\033[31;1m%p (?)\033[0m\n", eip); 1498 } 1499 dump_backtrace(); 1500 1501 m_termination_signal = signal; 1502 dump_regions(); 1503 ASSERT(is_ring3()); 1504 die(); 1505 // We can not return from here, as there is nowhere 1506 // to unwind to, so die right away. 1507 Thread::current->die_if_needed(); 1508 ASSERT_NOT_REACHED(); 1509} 1510 1511Process* Process::from_pid(pid_t pid) 1512{ 1513 ASSERT_INTERRUPTS_DISABLED(); 1514 for (auto& process : *g_processes) { 1515 if (process.pid() == pid) 1516 return &process; 1517 } 1518 return nullptr; 1519} 1520 1521RefPtr<FileDescription> Process::file_description(int fd) const 1522{ 1523 if (fd < 0) 1524 return nullptr; 1525 if (static_cast<size_t>(fd) < m_fds.size()) 1526 return m_fds[fd].description.ptr(); 1527 return nullptr; 1528} 1529 1530int Process::fd_flags(int fd) const 1531{ 1532 if (fd < 0) 1533 return -1; 1534 if (static_cast<size_t>(fd) < m_fds.size()) 1535 return m_fds[fd].flags; 1536 return -1; 1537} 1538 1539ssize_t Process::sys$get_dir_entries(int fd, void* buffer, ssize_t size) 1540{ 1541 REQUIRE_PROMISE(stdio); 1542 if (size < 0) 1543 return -EINVAL; 1544 if (!validate_write(buffer, size)) 1545 return -EFAULT; 1546 auto description = file_description(fd); 1547 if (!description) 1548 return -EBADF; 1549 return description->get_dir_entries((u8*)buffer, size); 1550} 1551 1552int Process::sys$lseek(int fd, off_t offset, int whence) 1553{ 1554 REQUIRE_PROMISE(stdio); 1555 auto description = file_description(fd); 1556 if (!description) 1557 return -EBADF; 1558 return description->seek(offset, whence); 1559} 1560 1561int Process::sys$ttyname_r(int fd, char* buffer, ssize_t size) 1562{ 1563 REQUIRE_PROMISE(tty); 1564 if (size < 0) 1565 return -EINVAL; 1566 if (!validate_write(buffer, size)) 1567 return -EFAULT; 1568 auto description = file_description(fd); 1569 if (!description) 1570 return -EBADF; 1571 if (!description->is_tty()) 1572 return -ENOTTY; 1573 String tty_name = description->tty()->tty_name(); 1574 if ((size_t)size < tty_name.length() + 1) 1575 return -ERANGE; 1576 copy_to_user(buffer, tty_name.characters(), tty_name.length() + 1); 1577 return 0; 1578} 1579 1580int Process::sys$ptsname_r(int fd, char* buffer, ssize_t size) 1581{ 1582 REQUIRE_PROMISE(tty); 1583 if (size < 0) 1584 return -EINVAL; 1585 if (!validate_write(buffer, size)) 1586 return -EFAULT; 1587 auto description = file_description(fd); 1588 if (!description) 1589 return -EBADF; 1590 auto* master_pty = description->master_pty(); 1591 if (!master_pty) 1592 return -ENOTTY; 1593 auto pts_name = master_pty->pts_name(); 1594 if ((size_t)size < pts_name.length() + 1) 1595 return -ERANGE; 1596 copy_to_user(buffer, pts_name.characters(), pts_name.length() + 1); 1597 return 0; 1598} 1599 1600ssize_t Process::sys$writev(int fd, const struct iovec* iov, int iov_count) 1601{ 1602 REQUIRE_PROMISE(stdio); 1603 if (iov_count < 0) 1604 return -EINVAL; 1605 1606 if (!validate_read_typed(iov, iov_count)) 1607 return -EFAULT; 1608 1609 u64 total_length = 0; 1610 Vector<iovec, 32> vecs; 1611 vecs.resize(iov_count); 1612 copy_from_user(vecs.data(), iov, iov_count * sizeof(iovec)); 1613 for (auto& vec : vecs) { 1614 if (!validate_read(vec.iov_base, vec.iov_len)) 1615 return -EFAULT; 1616 total_length += vec.iov_len; 1617 if (total_length > INT32_MAX) 1618 return -EINVAL; 1619 } 1620 1621 auto description = file_description(fd); 1622 if (!description) 1623 return -EBADF; 1624 1625 if (!description->is_writable()) 1626 return -EBADF; 1627 1628 int nwritten = 0; 1629 for (auto& vec : vecs) { 1630 int rc = do_write(*description, (const u8*)vec.iov_base, vec.iov_len); 1631 if (rc < 0) { 1632 if (nwritten == 0) 1633 return rc; 1634 return nwritten; 1635 } 1636 nwritten += rc; 1637 } 1638 1639 return nwritten; 1640} 1641 1642ssize_t Process::do_write(FileDescription& description, const u8* data, int data_size) 1643{ 1644 ssize_t nwritten = 0; 1645 if (!description.is_blocking()) { 1646 if (!description.can_write()) 1647 return -EAGAIN; 1648 } 1649 1650 if (description.should_append()) { 1651#ifdef IO_DEBUG 1652 dbgprintf("seeking to end (O_APPEND)\n"); 1653#endif 1654 description.seek(0, SEEK_END); 1655 } 1656 1657 while (nwritten < data_size) { 1658#ifdef IO_DEBUG 1659 dbgprintf("while %u < %u\n", nwritten, size); 1660#endif 1661 if (!description.can_write()) { 1662#ifdef IO_DEBUG 1663 dbgprintf("block write on %d\n", fd); 1664#endif 1665 if (Thread::current->block<Thread::WriteBlocker>(description) != Thread::BlockResult::WokeNormally) { 1666 if (nwritten == 0) 1667 return -EINTR; 1668 } 1669 } 1670 ssize_t rc = description.write(data + nwritten, data_size - nwritten); 1671#ifdef IO_DEBUG 1672 dbgprintf(" -> write returned %d\n", rc); 1673#endif 1674 if (rc < 0) { 1675 // FIXME: Support returning partial nwritten with errno. 1676 ASSERT(nwritten == 0); 1677 return rc; 1678 } 1679 if (rc == 0) 1680 break; 1681 nwritten += rc; 1682 } 1683 return nwritten; 1684} 1685 1686ssize_t Process::sys$write(int fd, const u8* data, ssize_t size) 1687{ 1688 REQUIRE_PROMISE(stdio); 1689 if (size < 0) 1690 return -EINVAL; 1691 if (size == 0) 1692 return 0; 1693 if (!validate_read(data, size)) 1694 return -EFAULT; 1695#ifdef DEBUG_IO 1696 dbgprintf("%s(%u): sys$write(%d, %p, %u)\n", name().characters(), pid(), fd, data, size); 1697#endif 1698 auto description = file_description(fd); 1699 if (!description) 1700 return -EBADF; 1701 if (!description->is_writable()) 1702 return -EBADF; 1703 1704 return do_write(*description, data, size); 1705} 1706 1707ssize_t Process::sys$read(int fd, u8* buffer, ssize_t size) 1708{ 1709 REQUIRE_PROMISE(stdio); 1710 if (size < 0) 1711 return -EINVAL; 1712 if (size == 0) 1713 return 0; 1714 if (!validate_write(buffer, size)) 1715 return -EFAULT; 1716#ifdef DEBUG_IO 1717 dbgprintf("%s(%u) sys$read(%d, %p, %u)\n", name().characters(), pid(), fd, buffer, size); 1718#endif 1719 auto description = file_description(fd); 1720 if (!description) 1721 return -EBADF; 1722 if (!description->is_readable()) 1723 return -EBADF; 1724 if (description->is_directory()) 1725 return -EISDIR; 1726 if (description->is_blocking()) { 1727 if (!description->can_read()) { 1728 if (Thread::current->block<Thread::ReadBlocker>(*description) != Thread::BlockResult::WokeNormally) 1729 return -EINTR; 1730 if (!description->can_read()) 1731 return -EAGAIN; 1732 } 1733 } 1734 return description->read(buffer, size); 1735} 1736 1737int Process::sys$close(int fd) 1738{ 1739 REQUIRE_PROMISE(stdio); 1740 auto description = file_description(fd); 1741#ifdef DEBUG_IO 1742 dbgprintf("%s(%u) sys$close(%d) %p\n", name().characters(), pid(), fd, description.ptr()); 1743#endif 1744 if (!description) 1745 return -EBADF; 1746 int rc = description->close(); 1747 m_fds[fd] = {}; 1748 return rc; 1749} 1750 1751int Process::sys$utime(const char* user_path, size_t path_length, const utimbuf* user_buf) 1752{ 1753 REQUIRE_PROMISE(fattr); 1754 if (user_buf && !validate_read_typed(user_buf)) 1755 return -EFAULT; 1756 auto path = get_syscall_path_argument(user_path, path_length); 1757 if (path.is_error()) 1758 return path.error(); 1759 utimbuf buf; 1760 if (user_buf) { 1761 copy_from_user(&buf, user_buf); 1762 } else { 1763 auto now = kgettimeofday(); 1764 buf = { now.tv_sec, now.tv_sec }; 1765 } 1766 return VFS::the().utime(path.value(), current_directory(), buf.actime, buf.modtime); 1767} 1768 1769int Process::sys$access(const char* user_path, size_t path_length, int mode) 1770{ 1771 REQUIRE_PROMISE(rpath); 1772 auto path = get_syscall_path_argument(user_path, path_length); 1773 if (path.is_error()) 1774 return path.error(); 1775 return VFS::the().access(path.value(), mode, current_directory()); 1776} 1777 1778int Process::sys$fcntl(int fd, int cmd, u32 arg) 1779{ 1780 REQUIRE_PROMISE(stdio); 1781#ifdef DEBUG_IO 1782 dbgprintf("sys$fcntl: fd=%d, cmd=%d, arg=%u\n", fd, cmd, arg); 1783#endif 1784 auto description = file_description(fd); 1785 if (!description) 1786 return -EBADF; 1787 // NOTE: The FD flags are not shared between FileDescription objects. 1788 // This means that dup() doesn't copy the FD_CLOEXEC flag! 1789 switch (cmd) { 1790 case F_DUPFD: { 1791 int arg_fd = (int)arg; 1792 if (arg_fd < 0) 1793 return -EINVAL; 1794 int new_fd = alloc_fd(arg_fd); 1795 if (new_fd < 0) 1796 return new_fd; 1797 m_fds[new_fd].set(*description); 1798 return new_fd; 1799 } 1800 case F_GETFD: 1801 return m_fds[fd].flags; 1802 case F_SETFD: 1803 m_fds[fd].flags = arg; 1804 break; 1805 case F_GETFL: 1806 return description->file_flags(); 1807 case F_SETFL: 1808 description->set_file_flags(arg); 1809 break; 1810 default: 1811 ASSERT_NOT_REACHED(); 1812 } 1813 return 0; 1814} 1815 1816int Process::sys$fstat(int fd, stat* statbuf) 1817{ 1818 REQUIRE_PROMISE(stdio); 1819 if (!validate_write_typed(statbuf)) 1820 return -EFAULT; 1821 auto description = file_description(fd); 1822 if (!description) 1823 return -EBADF; 1824 return description->fstat(*statbuf); 1825} 1826 1827int Process::sys$stat(const Syscall::SC_stat_params* user_params) 1828{ 1829 REQUIRE_PROMISE(rpath); 1830 Syscall::SC_stat_params params; 1831 if (!validate_read_and_copy_typed(&params, user_params)) 1832 return -EFAULT; 1833 if (!validate_write_typed(params.statbuf)) 1834 return -EFAULT; 1835 auto path = get_syscall_path_argument(params.path); 1836 if (path.is_error()) 1837 return path.error(); 1838 auto metadata_or_error = VFS::the().lookup_metadata(path.value(), current_directory(), params.follow_symlinks ? 0 : O_NOFOLLOW_NOERROR); 1839 if (metadata_or_error.is_error()) 1840 return metadata_or_error.error(); 1841 stat statbuf; 1842 auto result = metadata_or_error.value().stat(statbuf); 1843 if (result.is_error()) 1844 return result; 1845 copy_to_user(params.statbuf, &statbuf); 1846 return 0; 1847} 1848 1849template<typename DataType, typename SizeType> 1850bool Process::validate(const Syscall::MutableBufferArgument<DataType, SizeType>& buffer) 1851{ 1852 return validate_write(buffer.data, buffer.size); 1853} 1854 1855template<typename DataType, typename SizeType> 1856bool Process::validate(const Syscall::ImmutableBufferArgument<DataType, SizeType>& buffer) 1857{ 1858 return validate_read(buffer.data, buffer.size); 1859} 1860 1861String Process::validate_and_copy_string_from_user(const char* user_characters, size_t user_length) const 1862{ 1863 if (user_length == 0) 1864 return String::empty(); 1865 if (!user_characters) 1866 return {}; 1867 if (!validate_read(user_characters, user_length)) 1868 return {}; 1869 SmapDisabler disabler; 1870 size_t measured_length = strnlen(user_characters, user_length); 1871 return String(user_characters, measured_length); 1872} 1873 1874String Process::validate_and_copy_string_from_user(const Syscall::StringArgument& string) const 1875{ 1876 return validate_and_copy_string_from_user(string.characters, string.length); 1877} 1878 1879int Process::sys$readlink(const Syscall::SC_readlink_params* user_params) 1880{ 1881 REQUIRE_PROMISE(rpath); 1882 1883 Syscall::SC_readlink_params params; 1884 if (!validate_read_and_copy_typed(&params, user_params)) 1885 return -EFAULT; 1886 1887 if (!validate(params.buffer)) 1888 return -EFAULT; 1889 1890 auto path = get_syscall_path_argument(params.path); 1891 if (path.is_error()) 1892 return path.error(); 1893 1894 auto result = VFS::the().open(path.value(), O_RDONLY | O_NOFOLLOW_NOERROR, 0, current_directory()); 1895 if (result.is_error()) 1896 return result.error(); 1897 auto description = result.value(); 1898 1899 if (!description->metadata().is_symlink()) 1900 return -EINVAL; 1901 1902 auto contents = description->read_entire_file(); 1903 if (!contents) 1904 return -EIO; // FIXME: Get a more detailed error from VFS. 1905 1906 auto link_target = String::copy(contents); 1907 if (link_target.length() + 1 > params.buffer.size) 1908 return -ENAMETOOLONG; 1909 copy_to_user(params.buffer.data, link_target.characters(), link_target.length() + 1); 1910 return link_target.length() + 1; 1911} 1912 1913int Process::sys$chdir(const char* user_path, size_t path_length) 1914{ 1915 REQUIRE_PROMISE(rpath); 1916 auto path = get_syscall_path_argument(user_path, path_length); 1917 if (path.is_error()) 1918 return path.error(); 1919 auto directory_or_error = VFS::the().open_directory(path.value(), current_directory()); 1920 if (directory_or_error.is_error()) 1921 return directory_or_error.error(); 1922 m_cwd = *directory_or_error.value(); 1923 return 0; 1924} 1925 1926int Process::sys$fchdir(int fd) 1927{ 1928 REQUIRE_PROMISE(stdio); 1929 auto description = file_description(fd); 1930 if (!description) 1931 return -EBADF; 1932 1933 if (!description->is_directory()) 1934 return -ENOTDIR; 1935 1936 if (!description->metadata().may_execute(*this)) 1937 return -EACCES; 1938 1939 m_cwd = description->custody(); 1940 return 0; 1941} 1942 1943int Process::sys$getcwd(char* buffer, ssize_t size) 1944{ 1945 REQUIRE_PROMISE(rpath); 1946 if (size < 0) 1947 return -EINVAL; 1948 if (!validate_write(buffer, size)) 1949 return -EFAULT; 1950 auto path = current_directory().absolute_path(); 1951 if ((size_t)size < path.length() + 1) 1952 return -ERANGE; 1953 copy_to_user(buffer, path.characters(), path.length() + 1); 1954 return 0; 1955} 1956 1957int Process::number_of_open_file_descriptors() const 1958{ 1959 int count = 0; 1960 for (auto& description : m_fds) { 1961 if (description) 1962 ++count; 1963 } 1964 return count; 1965} 1966 1967int Process::sys$open(const Syscall::SC_open_params* user_params) 1968{ 1969 Syscall::SC_open_params params; 1970 if (!validate_read_and_copy_typed(&params, user_params)) 1971 return -EFAULT; 1972 1973 int dirfd = params.dirfd; 1974 int options = params.options; 1975 u16 mode = params.mode; 1976 1977 if (options & O_NOFOLLOW_NOERROR) 1978 return -EINVAL; 1979 1980 if (options & O_UNLINK_INTERNAL) 1981 return -EINVAL; 1982 1983 if (options & O_WRONLY) 1984 REQUIRE_PROMISE(wpath); 1985 else if (options & O_RDONLY) 1986 REQUIRE_PROMISE(rpath); 1987 1988 if (options & O_CREAT) 1989 REQUIRE_PROMISE(cpath); 1990 1991 // Ignore everything except permission bits. 1992 mode &= 04777; 1993 1994 auto path = get_syscall_path_argument(params.path); 1995 if (path.is_error()) 1996 return path.error(); 1997#ifdef DEBUG_IO 1998 dbg() << "sys$open(dirfd=" << dirfd << ", path=\"" << path.value() << "\", options=" << options << ", mode=" << mode << ")"; 1999#endif 2000 int fd = alloc_fd(); 2001 if (fd < 0) 2002 return fd; 2003 2004 RefPtr<Custody> base; 2005 if (dirfd == AT_FDCWD) { 2006 base = current_directory(); 2007 } else { 2008 auto base_description = file_description(dirfd); 2009 if (!base_description) 2010 return -EBADF; 2011 if (!base_description->is_directory()) 2012 return -ENOTDIR; 2013 if (!base_description->custody()) 2014 return -EINVAL; 2015 base = base_description->custody(); 2016 } 2017 2018 auto result = VFS::the().open(path.value(), options, mode & ~umask(), *base); 2019 if (result.is_error()) 2020 return result.error(); 2021 auto description = result.value(); 2022 u32 fd_flags = (options & O_CLOEXEC) ? FD_CLOEXEC : 0; 2023 m_fds[fd].set(move(description), fd_flags); 2024 return fd; 2025} 2026 2027int Process::alloc_fd(int first_candidate_fd) 2028{ 2029 for (int i = first_candidate_fd; i < (int)m_max_open_file_descriptors; ++i) { 2030 if (!m_fds[i]) 2031 return i; 2032 } 2033 return -EMFILE; 2034} 2035 2036int Process::sys$pipe(int pipefd[2], int flags) 2037{ 2038 REQUIRE_PROMISE(stdio); 2039 if (!validate_write_typed(pipefd)) 2040 return -EFAULT; 2041 if (number_of_open_file_descriptors() + 2 > max_open_file_descriptors()) 2042 return -EMFILE; 2043 // Reject flags other than O_CLOEXEC. 2044 if ((flags & O_CLOEXEC) != flags) 2045 return -EINVAL; 2046 2047 u32 fd_flags = (flags & O_CLOEXEC) ? FD_CLOEXEC : 0; 2048 auto fifo = FIFO::create(m_uid); 2049 2050 int reader_fd = alloc_fd(); 2051 m_fds[reader_fd].set(fifo->open_direction(FIFO::Direction::Reader), fd_flags); 2052 m_fds[reader_fd].description->set_readable(true); 2053 copy_to_user(&pipefd[0], &reader_fd); 2054 2055 int writer_fd = alloc_fd(); 2056 m_fds[writer_fd].set(fifo->open_direction(FIFO::Direction::Writer), fd_flags); 2057 m_fds[writer_fd].description->set_writable(true); 2058 copy_to_user(&pipefd[1], &writer_fd); 2059 2060 return 0; 2061} 2062 2063int Process::sys$killpg(int pgrp, int signum) 2064{ 2065 REQUIRE_PROMISE(proc); 2066 if (signum < 1 || signum >= 32) 2067 return -EINVAL; 2068 if (pgrp < 0) 2069 return -EINVAL; 2070 2071 InterruptDisabler disabler; 2072 return do_killpg(pgrp, signum); 2073} 2074 2075int Process::sys$setuid(uid_t uid) 2076{ 2077 REQUIRE_PROMISE(id); 2078 if (uid != m_uid && !is_superuser()) 2079 return -EPERM; 2080 m_uid = uid; 2081 m_euid = uid; 2082 return 0; 2083} 2084 2085int Process::sys$setgid(gid_t gid) 2086{ 2087 REQUIRE_PROMISE(id); 2088 if (gid != m_gid && !is_superuser()) 2089 return -EPERM; 2090 m_gid = gid; 2091 m_egid = gid; 2092 return 0; 2093} 2094 2095unsigned Process::sys$alarm(unsigned seconds) 2096{ 2097 REQUIRE_PROMISE(stdio); 2098 unsigned previous_alarm_remaining = 0; 2099 if (m_alarm_deadline && m_alarm_deadline > g_uptime) { 2100 previous_alarm_remaining = (m_alarm_deadline - g_uptime) / TICKS_PER_SECOND; 2101 } 2102 if (!seconds) { 2103 m_alarm_deadline = 0; 2104 return previous_alarm_remaining; 2105 } 2106 m_alarm_deadline = g_uptime + seconds * TICKS_PER_SECOND; 2107 return previous_alarm_remaining; 2108} 2109 2110int Process::sys$uname(utsname* buf) 2111{ 2112 REQUIRE_PROMISE(stdio); 2113 if (!validate_write_typed(buf)) 2114 return -EFAULT; 2115 LOCKER(*s_hostname_lock); 2116 if (s_hostname->length() + 1 > sizeof(utsname::nodename)) 2117 return -ENAMETOOLONG; 2118 copy_to_user(buf->sysname, "SerenityOS", 11); 2119 copy_to_user(buf->release, "1.0-dev", 8); 2120 copy_to_user(buf->version, "FIXME", 6); 2121 copy_to_user(buf->machine, "i686", 5); 2122 copy_to_user(buf->nodename, s_hostname->characters(), s_hostname->length() + 1); 2123 return 0; 2124} 2125 2126KResult Process::do_kill(Process& process, int signal) 2127{ 2128 // FIXME: Allow sending SIGCONT to everyone in the process group. 2129 // FIXME: Should setuid processes have some special treatment here? 2130 if (!is_superuser() && m_euid != process.m_uid && m_uid != process.m_uid) 2131 return KResult(-EPERM); 2132 if (process.is_ring0() && signal == SIGKILL) { 2133 kprintf("%s(%u) attempted to send SIGKILL to ring 0 process %s(%u)\n", name().characters(), m_pid, process.name().characters(), process.pid()); 2134 return KResult(-EPERM); 2135 } 2136 if (signal != 0) 2137 process.send_signal(signal, this); 2138 return KSuccess; 2139} 2140 2141KResult Process::do_killpg(pid_t pgrp, int signal) 2142{ 2143 ASSERT(pgrp >= 0); 2144 2145 // Send the signal to all processes in the given group. 2146 if (pgrp == 0) { 2147 // Send the signal to our own pgrp. 2148 pgrp = pgid(); 2149 } 2150 2151 bool group_was_empty = true; 2152 bool any_succeeded = false; 2153 KResult error = KSuccess; 2154 2155 Process::for_each_in_pgrp(pgrp, [&](auto& process) { 2156 group_was_empty = false; 2157 2158 KResult res = do_kill(process, signal); 2159 if (res.is_success()) 2160 any_succeeded = true; 2161 else 2162 error = res; 2163 2164 return IterationDecision::Continue; 2165 }); 2166 2167 if (group_was_empty) 2168 return KResult(-ESRCH); 2169 if (any_succeeded) 2170 return KSuccess; 2171 return error; 2172} 2173 2174int Process::sys$kill(pid_t pid, int signal) 2175{ 2176 if (pid == m_pid) 2177 REQUIRE_PROMISE(stdio); 2178 else 2179 REQUIRE_PROMISE(proc); 2180 2181 if (signal < 0 || signal >= 32) 2182 return -EINVAL; 2183 if (pid <= 0) 2184 return do_killpg(-pid, signal); 2185 if (pid == -1) { 2186 // FIXME: Send to all processes. 2187 return -ENOTIMPL; 2188 } 2189 if (pid == m_pid) { 2190 if (signal == 0) 2191 return 0; 2192 if (!Thread::current->should_ignore_signal(signal)) { 2193 Thread::current->send_signal(signal, this); 2194 (void)Thread::current->block<Thread::SemiPermanentBlocker>(Thread::SemiPermanentBlocker::Reason::Signal); 2195 } 2196 return 0; 2197 } 2198 InterruptDisabler disabler; 2199 auto* peer = Process::from_pid(pid); 2200 if (!peer) 2201 return -ESRCH; 2202 return do_kill(*peer, signal); 2203} 2204 2205int Process::sys$usleep(useconds_t usec) 2206{ 2207 REQUIRE_PROMISE(stdio); 2208 if (!usec) 2209 return 0; 2210 u64 wakeup_time = Thread::current->sleep(usec / 1000); 2211 if (wakeup_time > g_uptime) 2212 return -EINTR; 2213 return 0; 2214} 2215 2216int Process::sys$sleep(unsigned seconds) 2217{ 2218 REQUIRE_PROMISE(stdio); 2219 if (!seconds) 2220 return 0; 2221 u64 wakeup_time = Thread::current->sleep(seconds * TICKS_PER_SECOND); 2222 if (wakeup_time > g_uptime) { 2223 u32 ticks_left_until_original_wakeup_time = wakeup_time - g_uptime; 2224 return ticks_left_until_original_wakeup_time / TICKS_PER_SECOND; 2225 } 2226 return 0; 2227} 2228 2229timeval kgettimeofday() 2230{ 2231 return const_cast<const timeval&>(((KernelInfoPage*)s_info_page_address_for_kernel.as_ptr())->now); 2232} 2233 2234void kgettimeofday(timeval& tv) 2235{ 2236 tv = kgettimeofday(); 2237} 2238 2239int Process::sys$gettimeofday(timeval* tv) 2240{ 2241 REQUIRE_PROMISE(stdio); 2242 if (!validate_write_typed(tv)) 2243 return -EFAULT; 2244 *tv = kgettimeofday(); 2245 return 0; 2246} 2247 2248uid_t Process::sys$getuid() 2249{ 2250 REQUIRE_PROMISE(stdio); 2251 return m_uid; 2252} 2253 2254gid_t Process::sys$getgid() 2255{ 2256 REQUIRE_PROMISE(stdio); 2257 return m_gid; 2258} 2259 2260uid_t Process::sys$geteuid() 2261{ 2262 REQUIRE_PROMISE(stdio); 2263 return m_euid; 2264} 2265 2266gid_t Process::sys$getegid() 2267{ 2268 REQUIRE_PROMISE(stdio); 2269 return m_egid; 2270} 2271 2272pid_t Process::sys$getpid() 2273{ 2274 REQUIRE_PROMISE(stdio); 2275 return m_pid; 2276} 2277 2278pid_t Process::sys$getppid() 2279{ 2280 REQUIRE_PROMISE(stdio); 2281 return m_ppid; 2282} 2283 2284mode_t Process::sys$umask(mode_t mask) 2285{ 2286 REQUIRE_PROMISE(stdio); 2287 auto old_mask = m_umask; 2288 m_umask = mask & 0777; 2289 return old_mask; 2290} 2291 2292siginfo_t Process::reap(Process& process) 2293{ 2294 siginfo_t siginfo; 2295 memset(&siginfo, 0, sizeof(siginfo)); 2296 siginfo.si_signo = SIGCHLD; 2297 siginfo.si_pid = process.pid(); 2298 siginfo.si_uid = process.uid(); 2299 2300 if (process.m_termination_signal) { 2301 siginfo.si_status = process.m_termination_signal; 2302 siginfo.si_code = CLD_KILLED; 2303 } else { 2304 siginfo.si_status = process.m_termination_status; 2305 siginfo.si_code = CLD_EXITED; 2306 } 2307 2308 { 2309 InterruptDisabler disabler; 2310 2311 if (process.ppid()) { 2312 auto* parent = Process::from_pid(process.ppid()); 2313 if (parent) { 2314 parent->m_ticks_in_user_for_dead_children += process.m_ticks_in_user + process.m_ticks_in_user_for_dead_children; 2315 parent->m_ticks_in_kernel_for_dead_children += process.m_ticks_in_kernel + process.m_ticks_in_kernel_for_dead_children; 2316 } 2317 } 2318 2319#ifdef PROCESS_DEBUG 2320 dbg() << "Reaping process " << process; 2321#endif 2322 ASSERT(process.is_dead()); 2323 g_processes->remove(&process); 2324 } 2325 delete &process; 2326 return siginfo; 2327} 2328 2329KResultOr<siginfo_t> Process::do_waitid(idtype_t idtype, int id, int options) 2330{ 2331 if (idtype == P_PID) { 2332 InterruptDisabler disabler; 2333 if (idtype == P_PID && !Process::from_pid(id)) 2334 return KResult(-ECHILD); 2335 } 2336 2337 if (options & WNOHANG) { 2338 // FIXME: Figure out what WNOHANG should do with stopped children. 2339 if (idtype == P_ALL) { 2340 InterruptDisabler disabler; 2341 siginfo_t siginfo; 2342 memset(&siginfo, 0, sizeof(siginfo)); 2343 for_each_child([&siginfo](Process& process) { 2344 if (process.is_dead()) 2345 siginfo = reap(process); 2346 return IterationDecision::Continue; 2347 }); 2348 return siginfo; 2349 } else if (idtype == P_PID) { 2350 InterruptDisabler disabler; 2351 auto* waitee_process = Process::from_pid(id); 2352 if (!waitee_process) 2353 return KResult(-ECHILD); 2354 if (waitee_process->is_dead()) 2355 return reap(*waitee_process); 2356 } else { 2357 // FIXME: Implement other PID specs. 2358 return KResult(-EINVAL); 2359 } 2360 } 2361 2362 pid_t waitee_pid; 2363 2364 // FIXME: WaitBlocker should support idtype/id specs directly. 2365 if (idtype == P_ALL) { 2366 waitee_pid = -1; 2367 } else if (idtype == P_PID) { 2368 waitee_pid = id; 2369 } else { 2370 // FIXME: Implement other PID specs. 2371 return KResult(-EINVAL); 2372 } 2373 2374 if (Thread::current->block<Thread::WaitBlocker>(options, waitee_pid) != Thread::BlockResult::WokeNormally) 2375 return KResult(-EINTR); 2376 2377 InterruptDisabler disabler; 2378 2379 // NOTE: If waitee was -1, m_waitee_pid will have been filled in by the scheduler. 2380 Process* waitee_process = Process::from_pid(waitee_pid); 2381 if (!waitee_process) 2382 return KResult(-ECHILD); 2383 2384 ASSERT(waitee_process); 2385 if (waitee_process->is_dead()) { 2386 return reap(*waitee_process); 2387 } else { 2388 auto* waitee_thread = Thread::from_tid(waitee_pid); 2389 if (!waitee_thread) 2390 return KResult(-ECHILD); 2391 ASSERT(waitee_thread->state() == Thread::State::Stopped); 2392 siginfo_t siginfo; 2393 memset(&siginfo, 0, sizeof(siginfo)); 2394 siginfo.si_signo = SIGCHLD; 2395 siginfo.si_pid = waitee_process->pid(); 2396 siginfo.si_uid = waitee_process->uid(); 2397 siginfo.si_status = CLD_STOPPED; 2398 siginfo.si_code = waitee_thread->m_stop_signal; 2399 return siginfo; 2400 } 2401} 2402 2403pid_t Process::sys$waitid(const Syscall::SC_waitid_params* user_params) 2404{ 2405 REQUIRE_PROMISE(stdio); 2406 2407 Syscall::SC_waitid_params params; 2408 if (!validate_read_and_copy_typed(&params, user_params)) 2409 return -EFAULT; 2410 2411 if (!validate_write_typed(params.infop)) 2412 return -EFAULT; 2413 2414#ifdef PROCESS_DEBUG 2415 dbg() << "sys$waitid(" << params.idtype << ", " << params.id << ", " << params.infop << ", " << params.options << ")"; 2416#endif 2417 2418 auto siginfo_or_error = do_waitid(static_cast<idtype_t>(params.idtype), params.id, params.options); 2419 if (siginfo_or_error.is_error()) 2420 return siginfo_or_error.error(); 2421 2422 copy_to_user(params.infop, &siginfo_or_error.value()); 2423 return 0; 2424} 2425 2426bool Process::validate_read_from_kernel(VirtualAddress vaddr, size_t size) const 2427{ 2428 if (vaddr.is_null()) 2429 return false; 2430 return MM.validate_kernel_read(*this, vaddr, size); 2431} 2432 2433bool Process::validate_read(const void* address, size_t size) const 2434{ 2435 if (!size) 2436 return false; 2437 return MM.validate_user_read(*this, VirtualAddress(address), size); 2438} 2439 2440bool Process::validate_write(void* address, size_t size) const 2441{ 2442 if (!size) 2443 return false; 2444 return MM.validate_user_write(*this, VirtualAddress(address), size); 2445} 2446 2447pid_t Process::sys$getsid(pid_t pid) 2448{ 2449 REQUIRE_PROMISE(stdio); 2450 if (pid == 0) 2451 return m_sid; 2452 InterruptDisabler disabler; 2453 auto* process = Process::from_pid(pid); 2454 if (!process) 2455 return -ESRCH; 2456 if (m_sid != process->m_sid) 2457 return -EPERM; 2458 return process->m_sid; 2459} 2460 2461pid_t Process::sys$setsid() 2462{ 2463 REQUIRE_PROMISE(proc); 2464 InterruptDisabler disabler; 2465 bool found_process_with_same_pgid_as_my_pid = false; 2466 Process::for_each_in_pgrp(pid(), [&](auto&) { 2467 found_process_with_same_pgid_as_my_pid = true; 2468 return IterationDecision::Break; 2469 }); 2470 if (found_process_with_same_pgid_as_my_pid) 2471 return -EPERM; 2472 m_sid = m_pid; 2473 m_pgid = m_pid; 2474 m_tty = nullptr; 2475 return m_sid; 2476} 2477 2478pid_t Process::sys$getpgid(pid_t pid) 2479{ 2480 REQUIRE_PROMISE(stdio); 2481 if (pid == 0) 2482 return m_pgid; 2483 InterruptDisabler disabler; // FIXME: Use a ProcessHandle 2484 auto* process = Process::from_pid(pid); 2485 if (!process) 2486 return -ESRCH; 2487 return process->m_pgid; 2488} 2489 2490pid_t Process::sys$getpgrp() 2491{ 2492 REQUIRE_PROMISE(stdio); 2493 return m_pgid; 2494} 2495 2496static pid_t get_sid_from_pgid(pid_t pgid) 2497{ 2498 InterruptDisabler disabler; 2499 auto* group_leader = Process::from_pid(pgid); 2500 if (!group_leader) 2501 return -1; 2502 return group_leader->sid(); 2503} 2504 2505int Process::sys$setpgid(pid_t specified_pid, pid_t specified_pgid) 2506{ 2507 REQUIRE_PROMISE(proc); 2508 InterruptDisabler disabler; // FIXME: Use a ProcessHandle 2509 pid_t pid = specified_pid ? specified_pid : m_pid; 2510 if (specified_pgid < 0) { 2511 // The value of the pgid argument is less than 0, or is not a value supported by the implementation. 2512 return -EINVAL; 2513 } 2514 auto* process = Process::from_pid(pid); 2515 if (!process) 2516 return -ESRCH; 2517 if (process != this && process->ppid() != m_pid) { 2518 // The value of the pid argument does not match the process ID 2519 // of the calling process or of a child process of the calling process. 2520 return -ESRCH; 2521 } 2522 if (process->pid() == process->sid()) { 2523 // The process indicated by the pid argument is a session leader. 2524 return -EPERM; 2525 } 2526 if (process->ppid() == m_pid && process->sid() != sid()) { 2527 // The value of the pid argument matches the process ID of a child 2528 // process of the calling process and the child process is not in 2529 // the same session as the calling process. 2530 return -EPERM; 2531 } 2532 2533 pid_t new_pgid = specified_pgid ? specified_pgid : process->m_pid; 2534 pid_t current_sid = get_sid_from_pgid(process->m_pgid); 2535 pid_t new_sid = get_sid_from_pgid(new_pgid); 2536 if (current_sid != new_sid) { 2537 // Can't move a process between sessions. 2538 return -EPERM; 2539 } 2540 // FIXME: There are more EPERM conditions to check for here.. 2541 process->m_pgid = new_pgid; 2542 return 0; 2543} 2544 2545int Process::sys$ioctl(int fd, unsigned request, unsigned arg) 2546{ 2547 auto description = file_description(fd); 2548 if (!description) 2549 return -EBADF; 2550 SmapDisabler disabler; 2551 return description->file().ioctl(*description, request, arg); 2552} 2553 2554int Process::sys$getdtablesize() 2555{ 2556 REQUIRE_PROMISE(stdio); 2557 return m_max_open_file_descriptors; 2558} 2559 2560int Process::sys$dup(int old_fd) 2561{ 2562 REQUIRE_PROMISE(stdio); 2563 auto description = file_description(old_fd); 2564 if (!description) 2565 return -EBADF; 2566 int new_fd = alloc_fd(); 2567 if (new_fd < 0) 2568 return new_fd; 2569 m_fds[new_fd].set(*description); 2570 return new_fd; 2571} 2572 2573int Process::sys$dup2(int old_fd, int new_fd) 2574{ 2575 REQUIRE_PROMISE(stdio); 2576 auto description = file_description(old_fd); 2577 if (!description) 2578 return -EBADF; 2579 if (new_fd < 0 || new_fd >= m_max_open_file_descriptors) 2580 return -EINVAL; 2581 m_fds[new_fd].set(*description); 2582 return new_fd; 2583} 2584 2585int Process::sys$sigprocmask(int how, const sigset_t* set, sigset_t* old_set) 2586{ 2587 REQUIRE_PROMISE(stdio); 2588 if (old_set) { 2589 if (!validate_write_typed(old_set)) 2590 return -EFAULT; 2591 copy_to_user(old_set, &Thread::current->m_signal_mask); 2592 } 2593 if (set) { 2594 if (!validate_read_typed(set)) 2595 return -EFAULT; 2596 sigset_t set_value; 2597 copy_from_user(&set_value, set); 2598 switch (how) { 2599 case SIG_BLOCK: 2600 Thread::current->m_signal_mask &= ~set_value; 2601 break; 2602 case SIG_UNBLOCK: 2603 Thread::current->m_signal_mask |= set_value; 2604 break; 2605 case SIG_SETMASK: 2606 Thread::current->m_signal_mask = set_value; 2607 break; 2608 default: 2609 return -EINVAL; 2610 } 2611 } 2612 return 0; 2613} 2614 2615int Process::sys$sigpending(sigset_t* set) 2616{ 2617 REQUIRE_PROMISE(stdio); 2618 if (!validate_write_typed(set)) 2619 return -EFAULT; 2620 copy_to_user(set, &Thread::current->m_pending_signals); 2621 return 0; 2622} 2623 2624int Process::sys$sigaction(int signum, const sigaction* act, sigaction* old_act) 2625{ 2626 REQUIRE_PROMISE(stdio); 2627 if (signum < 1 || signum >= 32 || signum == SIGKILL || signum == SIGSTOP) 2628 return -EINVAL; 2629 if (!validate_read_typed(act)) 2630 return -EFAULT; 2631 InterruptDisabler disabler; // FIXME: This should use a narrower lock. Maybe a way to ignore signals temporarily? 2632 auto& action = Thread::current->m_signal_action_data[signum]; 2633 if (old_act) { 2634 if (!validate_write_typed(old_act)) 2635 return -EFAULT; 2636 copy_to_user(&old_act->sa_flags, &action.flags); 2637 copy_to_user(&old_act->sa_sigaction, &action.handler_or_sigaction, sizeof(action.handler_or_sigaction)); 2638 } 2639 copy_from_user(&action.flags, &act->sa_flags); 2640 copy_from_user(&action.handler_or_sigaction, &act->sa_sigaction, sizeof(action.handler_or_sigaction)); 2641 return 0; 2642} 2643 2644int Process::sys$getgroups(ssize_t count, gid_t* user_gids) 2645{ 2646 REQUIRE_PROMISE(stdio); 2647 if (count < 0) 2648 return -EINVAL; 2649 if (!count) 2650 return m_extra_gids.size(); 2651 if (count != (int)m_extra_gids.size()) 2652 return -EINVAL; 2653 if (!validate_write_typed(user_gids, m_extra_gids.size())) 2654 return -EFAULT; 2655 2656 Vector<gid_t> gids; 2657 for (auto gid : m_extra_gids) 2658 gids.append(gid); 2659 2660 copy_to_user(user_gids, gids.data(), sizeof(gid_t) * count); 2661 return 0; 2662} 2663 2664int Process::sys$setgroups(ssize_t count, const gid_t* user_gids) 2665{ 2666 REQUIRE_PROMISE(id); 2667 if (count < 0) 2668 return -EINVAL; 2669 if (!is_superuser()) 2670 return -EPERM; 2671 if (count && !validate_read(user_gids, count)) 2672 return -EFAULT; 2673 2674 Vector<gid_t> gids; 2675 gids.resize(count); 2676 copy_from_user(gids.data(), user_gids, sizeof(gid_t) * count); 2677 2678 HashTable<gid_t> unique_extra_gids; 2679 for (auto& gid : gids) { 2680 if (gid != m_gid) 2681 unique_extra_gids.set(gid); 2682 } 2683 2684 m_extra_gids.resize(unique_extra_gids.size()); 2685 size_t i = 0; 2686 for (auto& gid : unique_extra_gids) { 2687 if (gid == m_gid) 2688 continue; 2689 m_extra_gids[i++] = gid; 2690 } 2691 return 0; 2692} 2693 2694int Process::sys$mkdir(const char* user_path, size_t path_length, mode_t mode) 2695{ 2696 REQUIRE_PROMISE(cpath); 2697 auto path = get_syscall_path_argument(user_path, path_length); 2698 if (path.is_error()) 2699 return path.error(); 2700 return VFS::the().mkdir(path.value(), mode & ~umask(), current_directory()); 2701} 2702 2703int Process::sys$realpath(const Syscall::SC_realpath_params* user_params) 2704{ 2705 REQUIRE_PROMISE(rpath); 2706 2707 Syscall::SC_realpath_params params; 2708 if (!validate_read_and_copy_typed(&params, user_params)) 2709 return -EFAULT; 2710 2711 if (!validate_write(params.buffer.data, params.buffer.size)) 2712 return -EFAULT; 2713 2714 auto path = get_syscall_path_argument(params.path); 2715 if (path.is_error()) 2716 return path.error(); 2717 2718 auto custody_or_error = VFS::the().resolve_path(path.value(), current_directory()); 2719 if (custody_or_error.is_error()) 2720 return custody_or_error.error(); 2721 auto& custody = custody_or_error.value(); 2722 auto absolute_path = custody->absolute_path(); 2723 2724 if (absolute_path.length() + 1 > params.buffer.size) 2725 return -ENAMETOOLONG; 2726 2727 copy_to_user(params.buffer.data, absolute_path.characters(), absolute_path.length() + 1); 2728 return 0; 2729}; 2730 2731clock_t Process::sys$times(tms* times) 2732{ 2733 REQUIRE_PROMISE(stdio); 2734 if (!validate_write_typed(times)) 2735 return -EFAULT; 2736 copy_to_user(&times->tms_utime, &m_ticks_in_user); 2737 copy_to_user(&times->tms_stime, &m_ticks_in_kernel); 2738 copy_to_user(&times->tms_cutime, &m_ticks_in_user_for_dead_children); 2739 copy_to_user(&times->tms_cstime, &m_ticks_in_kernel_for_dead_children); 2740 return g_uptime & 0x7fffffff; 2741} 2742 2743int Process::sys$select(const Syscall::SC_select_params* params) 2744{ 2745 REQUIRE_PROMISE(stdio); 2746 // FIXME: Return -EINVAL if timeout is invalid. 2747 if (!validate_read_typed(params)) 2748 return -EFAULT; 2749 2750 SmapDisabler disabler; 2751 2752 int nfds = params->nfds; 2753 fd_set* readfds = params->readfds; 2754 fd_set* writefds = params->writefds; 2755 fd_set* exceptfds = params->exceptfds; 2756 timeval* timeout = params->timeout; 2757 2758 if (writefds && !validate_write_typed(writefds)) 2759 return -EFAULT; 2760 if (readfds && !validate_write_typed(readfds)) 2761 return -EFAULT; 2762 if (exceptfds && !validate_write_typed(exceptfds)) 2763 return -EFAULT; 2764 if (timeout && !validate_read_typed(timeout)) 2765 return -EFAULT; 2766 if (nfds < 0) 2767 return -EINVAL; 2768 2769 timeval computed_timeout; 2770 bool select_has_timeout = false; 2771 if (timeout && (timeout->tv_sec || timeout->tv_usec)) { 2772 timeval_add(kgettimeofday(), *timeout, computed_timeout); 2773 select_has_timeout = true; 2774 } 2775 2776 Thread::SelectBlocker::FDVector rfds; 2777 Thread::SelectBlocker::FDVector wfds; 2778 Thread::SelectBlocker::FDVector efds; 2779 2780 auto transfer_fds = [&](auto* fds, auto& vector) -> int { 2781 vector.clear_with_capacity(); 2782 if (!fds) 2783 return 0; 2784 for (int fd = 0; fd < nfds; ++fd) { 2785 if (FD_ISSET(fd, fds)) { 2786 if (!file_description(fd)) { 2787 dbg() << "sys$select: Bad fd number " << fd; 2788 return -EBADF; 2789 } 2790 vector.append(fd); 2791 } 2792 } 2793 return 0; 2794 }; 2795 if (int error = transfer_fds(writefds, wfds)) 2796 return error; 2797 if (int error = transfer_fds(readfds, rfds)) 2798 return error; 2799 if (int error = transfer_fds(exceptfds, efds)) 2800 return error; 2801 2802#if defined(DEBUG_IO) || defined(DEBUG_POLL_SELECT) 2803 dbgprintf("%s<%u> selecting on (read:%u, write:%u), timeout=%p\n", name().characters(), pid(), rfds.size(), wfds.size(), timeout); 2804#endif 2805 2806 if (!timeout || select_has_timeout) { 2807 if (Thread::current->block<Thread::SelectBlocker>(computed_timeout, select_has_timeout, rfds, wfds, efds) != Thread::BlockResult::WokeNormally) 2808 return -EINTR; 2809 } 2810 2811 int marked_fd_count = 0; 2812 auto mark_fds = [&](auto* fds, auto& vector, auto should_mark) { 2813 if (!fds) 2814 return; 2815 FD_ZERO(fds); 2816 for (int fd : vector) { 2817 if (auto description = file_description(fd); description && should_mark(*description)) { 2818 FD_SET(fd, fds); 2819 ++marked_fd_count; 2820 } 2821 } 2822 }; 2823 mark_fds(readfds, rfds, [](auto& description) { return description.can_read(); }); 2824 mark_fds(writefds, wfds, [](auto& description) { return description.can_write(); }); 2825 // FIXME: We should also mark exceptfds as appropriate. 2826 2827 return marked_fd_count; 2828} 2829 2830int Process::sys$poll(pollfd* fds, int nfds, int timeout) 2831{ 2832 REQUIRE_PROMISE(stdio); 2833 if (!validate_read_typed(fds)) 2834 return -EFAULT; 2835 2836 SmapDisabler disabler; 2837 2838 Thread::SelectBlocker::FDVector rfds; 2839 Thread::SelectBlocker::FDVector wfds; 2840 2841 for (int i = 0; i < nfds; ++i) { 2842 if (fds[i].events & POLLIN) 2843 rfds.append(fds[i].fd); 2844 if (fds[i].events & POLLOUT) 2845 wfds.append(fds[i].fd); 2846 } 2847 2848 timeval actual_timeout; 2849 bool has_timeout = false; 2850 if (timeout >= 0) { 2851 // poll is in ms, we want s/us. 2852 struct timeval tvtimeout; 2853 tvtimeout.tv_sec = 0; 2854 while (timeout >= 1000) { 2855 tvtimeout.tv_sec += 1; 2856 timeout -= 1000; 2857 } 2858 tvtimeout.tv_usec = timeout * 1000; 2859 timeval_add(kgettimeofday(), tvtimeout, actual_timeout); 2860 has_timeout = true; 2861 } 2862 2863#if defined(DEBUG_IO) || defined(DEBUG_POLL_SELECT) 2864 dbgprintf("%s<%u> polling on (read:%u, write:%u), timeout=%d\n", name().characters(), pid(), rfds.size(), wfds.size(), timeout); 2865#endif 2866 2867 if (has_timeout || timeout < 0) { 2868 if (Thread::current->block<Thread::SelectBlocker>(actual_timeout, has_timeout, rfds, wfds, Thread::SelectBlocker::FDVector()) != Thread::BlockResult::WokeNormally) 2869 return -EINTR; 2870 } 2871 2872 int fds_with_revents = 0; 2873 2874 for (int i = 0; i < nfds; ++i) { 2875 auto description = file_description(fds[i].fd); 2876 if (!description) { 2877 fds[i].revents = POLLNVAL; 2878 continue; 2879 } 2880 fds[i].revents = 0; 2881 if (fds[i].events & POLLIN && description->can_read()) 2882 fds[i].revents |= POLLIN; 2883 if (fds[i].events & POLLOUT && description->can_write()) 2884 fds[i].revents |= POLLOUT; 2885 2886 if (fds[i].revents) 2887 ++fds_with_revents; 2888 } 2889 2890 return fds_with_revents; 2891} 2892 2893Custody& Process::current_directory() 2894{ 2895 if (!m_cwd) 2896 m_cwd = VFS::the().root_custody(); 2897 return *m_cwd; 2898} 2899 2900int Process::sys$link(const Syscall::SC_link_params* user_params) 2901{ 2902 REQUIRE_PROMISE(cpath); 2903 Syscall::SC_link_params params; 2904 if (!validate_read_and_copy_typed(&params, user_params)) 2905 return -EFAULT; 2906 auto old_path = validate_and_copy_string_from_user(params.old_path); 2907 auto new_path = validate_and_copy_string_from_user(params.new_path); 2908 if (old_path.is_null() || new_path.is_null()) 2909 return -EFAULT; 2910 return VFS::the().link(old_path, new_path, current_directory()); 2911} 2912 2913int Process::sys$unlink(const char* user_path, size_t path_length) 2914{ 2915 REQUIRE_PROMISE(cpath); 2916 if (!validate_read(user_path, path_length)) 2917 return -EFAULT; 2918 auto path = get_syscall_path_argument(user_path, path_length); 2919 if (path.is_error()) 2920 return path.error(); 2921 return VFS::the().unlink(path.value(), current_directory()); 2922} 2923 2924int Process::sys$symlink(const Syscall::SC_symlink_params* user_params) 2925{ 2926 REQUIRE_PROMISE(cpath); 2927 Syscall::SC_symlink_params params; 2928 if (!validate_read_and_copy_typed(&params, user_params)) 2929 return -EFAULT; 2930 auto target = get_syscall_path_argument(params.target); 2931 if (target.is_error()) 2932 return target.error(); 2933 auto linkpath = get_syscall_path_argument(params.linkpath); 2934 if (linkpath.is_error()) 2935 return linkpath.error(); 2936 return VFS::the().symlink(target.value(), linkpath.value(), current_directory()); 2937} 2938 2939KResultOr<String> Process::get_syscall_path_argument(const char* user_path, size_t path_length) const 2940{ 2941 if (path_length == 0) 2942 return KResult(-EINVAL); 2943 if (path_length > PATH_MAX) 2944 return KResult(-ENAMETOOLONG); 2945 if (!validate_read(user_path, path_length)) 2946 return KResult(-EFAULT); 2947 return copy_string_from_user(user_path, path_length); 2948} 2949 2950KResultOr<String> Process::get_syscall_path_argument(const Syscall::StringArgument& path) const 2951{ 2952 return get_syscall_path_argument(path.characters, path.length); 2953} 2954 2955int Process::sys$rmdir(const char* user_path, size_t path_length) 2956{ 2957 REQUIRE_PROMISE(cpath); 2958 auto path = get_syscall_path_argument(user_path, path_length); 2959 if (path.is_error()) 2960 return path.error(); 2961 return VFS::the().rmdir(path.value(), current_directory()); 2962} 2963 2964int Process::sys$chmod(const char* user_path, size_t path_length, mode_t mode) 2965{ 2966 REQUIRE_PROMISE(fattr); 2967 auto path = get_syscall_path_argument(user_path, path_length); 2968 if (path.is_error()) 2969 return path.error(); 2970 return VFS::the().chmod(path.value(), mode, current_directory()); 2971} 2972 2973int Process::sys$fchmod(int fd, mode_t mode) 2974{ 2975 REQUIRE_PROMISE(fattr); 2976 auto description = file_description(fd); 2977 if (!description) 2978 return -EBADF; 2979 return description->chmod(mode); 2980} 2981 2982int Process::sys$fchown(int fd, uid_t uid, gid_t gid) 2983{ 2984 REQUIRE_PROMISE(chown); 2985 auto description = file_description(fd); 2986 if (!description) 2987 return -EBADF; 2988 return description->chown(uid, gid); 2989} 2990 2991int Process::sys$chown(const Syscall::SC_chown_params* user_params) 2992{ 2993 REQUIRE_PROMISE(chown); 2994 Syscall::SC_chown_params params; 2995 if (!validate_read_and_copy_typed(&params, user_params)) 2996 return -EFAULT; 2997 auto path = get_syscall_path_argument(params.path); 2998 if (path.is_error()) 2999 return path.error(); 3000 return VFS::the().chown(path.value(), params.uid, params.gid, current_directory()); 3001} 3002 3003void Process::finalize() 3004{ 3005 ASSERT(Thread::current == g_finalizer); 3006#ifdef PROCESS_DEBUG 3007 dbg() << "Finalizing process " << *this; 3008#endif 3009 3010 if (m_perf_event_buffer) { 3011 auto description_or_error = VFS::the().open("perfcore", O_CREAT | O_EXCL, 0400, current_directory(), UidAndGid { m_uid, m_gid }); 3012 if (!description_or_error.is_error()) { 3013 auto& description = description_or_error.value(); 3014 auto json = m_perf_event_buffer->to_json(m_pid, m_executable ? m_executable->absolute_path() : ""); 3015 description->write(json.data(), json.size()); 3016 } 3017 } 3018 3019 m_fds.clear(); 3020 m_tty = nullptr; 3021 m_executable = nullptr; 3022 m_cwd = nullptr; 3023 m_root_directory = nullptr; 3024 m_root_directory_relative_to_global_root = nullptr; 3025 m_elf_loader = nullptr; 3026 3027 disown_all_shared_buffers(); 3028 { 3029 InterruptDisabler disabler; 3030 if (auto* parent_thread = Thread::from_tid(m_ppid)) { 3031 if (parent_thread->m_signal_action_data[SIGCHLD].flags & SA_NOCLDWAIT) { 3032 // NOTE: If the parent doesn't care about this process, let it go. 3033 m_ppid = 0; 3034 } else { 3035 parent_thread->send_signal(SIGCHLD, this); 3036 } 3037 } 3038 } 3039 3040 m_regions.clear(); 3041 3042 m_dead = true; 3043} 3044 3045void Process::die() 3046{ 3047 // Let go of the TTY, otherwise a slave PTY may keep the master PTY from 3048 // getting an EOF when the last process using the slave PTY dies. 3049 // If the master PTY owner relies on an EOF to know when to wait() on a 3050 // slave owner, we have to allow the PTY pair to be torn down. 3051 m_tty = nullptr; 3052 3053 if (m_tracer) 3054 m_tracer->set_dead(); 3055 3056 { 3057 // Tell the threads to unwind and die. 3058 InterruptDisabler disabler; 3059 for_each_thread([](Thread& thread) { 3060 thread.set_should_die(); 3061 return IterationDecision::Continue; 3062 }); 3063 } 3064} 3065 3066size_t Process::amount_dirty_private() const 3067{ 3068 // FIXME: This gets a bit more complicated for Regions sharing the same underlying VMObject. 3069 // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping. 3070 // That's probably a situation that needs to be looked at in general. 3071 size_t amount = 0; 3072 for (auto& region : m_regions) { 3073 if (!region.is_shared()) 3074 amount += region.amount_dirty(); 3075 } 3076 return amount; 3077} 3078 3079size_t Process::amount_clean_inode() const 3080{ 3081 HashTable<const InodeVMObject*> vmobjects; 3082 for (auto& region : m_regions) { 3083 if (region.vmobject().is_inode()) 3084 vmobjects.set(&static_cast<const InodeVMObject&>(region.vmobject())); 3085 } 3086 size_t amount = 0; 3087 for (auto& vmobject : vmobjects) 3088 amount += vmobject->amount_clean(); 3089 return amount; 3090} 3091 3092size_t Process::amount_virtual() const 3093{ 3094 size_t amount = 0; 3095 for (auto& region : m_regions) { 3096 amount += region.size(); 3097 } 3098 return amount; 3099} 3100 3101size_t Process::amount_resident() const 3102{ 3103 // FIXME: This will double count if multiple regions use the same physical page. 3104 size_t amount = 0; 3105 for (auto& region : m_regions) { 3106 amount += region.amount_resident(); 3107 } 3108 return amount; 3109} 3110 3111size_t Process::amount_shared() const 3112{ 3113 // FIXME: This will double count if multiple regions use the same physical page. 3114 // FIXME: It doesn't work at the moment, since it relies on PhysicalPage ref counts, 3115 // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored 3116 // so that every Region contributes +1 ref to each of its PhysicalPages. 3117 size_t amount = 0; 3118 for (auto& region : m_regions) { 3119 amount += region.amount_shared(); 3120 } 3121 return amount; 3122} 3123 3124size_t Process::amount_purgeable_volatile() const 3125{ 3126 size_t amount = 0; 3127 for (auto& region : m_regions) { 3128 if (region.vmobject().is_purgeable() && static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile()) 3129 amount += region.amount_resident(); 3130 } 3131 return amount; 3132} 3133 3134size_t Process::amount_purgeable_nonvolatile() const 3135{ 3136 size_t amount = 0; 3137 for (auto& region : m_regions) { 3138 if (region.vmobject().is_purgeable() && !static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile()) 3139 amount += region.amount_resident(); 3140 } 3141 return amount; 3142} 3143 3144#define REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(domain) \ 3145 do { \ 3146 if (domain == AF_INET) \ 3147 REQUIRE_PROMISE(inet); \ 3148 else if (domain == AF_LOCAL) \ 3149 REQUIRE_PROMISE(unix); \ 3150 } while (0) 3151 3152int Process::sys$socket(int domain, int type, int protocol) 3153{ 3154 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(domain); 3155 3156 if ((type & SOCK_TYPE_MASK) == SOCK_RAW && !is_superuser()) 3157 return -EACCES; 3158 int fd = alloc_fd(); 3159 if (fd < 0) 3160 return fd; 3161 auto result = Socket::create(domain, type, protocol); 3162 if (result.is_error()) 3163 return result.error(); 3164 auto description = FileDescription::create(*result.value()); 3165 description->set_readable(true); 3166 description->set_writable(true); 3167 unsigned flags = 0; 3168 if (type & SOCK_CLOEXEC) 3169 flags |= FD_CLOEXEC; 3170 if (type & SOCK_NONBLOCK) 3171 description->set_blocking(false); 3172 m_fds[fd].set(move(description), flags); 3173 return fd; 3174} 3175 3176int Process::sys$bind(int sockfd, const sockaddr* address, socklen_t address_length) 3177{ 3178 if (!validate_read(address, address_length)) 3179 return -EFAULT; 3180 auto description = file_description(sockfd); 3181 if (!description) 3182 return -EBADF; 3183 if (!description->is_socket()) 3184 return -ENOTSOCK; 3185 auto& socket = *description->socket(); 3186 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3187 return socket.bind(address, address_length); 3188} 3189 3190int Process::sys$listen(int sockfd, int backlog) 3191{ 3192 if (backlog < 0) 3193 return -EINVAL; 3194 auto description = file_description(sockfd); 3195 if (!description) 3196 return -EBADF; 3197 if (!description->is_socket()) 3198 return -ENOTSOCK; 3199 auto& socket = *description->socket(); 3200 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3201 if (socket.is_connected()) 3202 return -EINVAL; 3203 return socket.listen(backlog); 3204} 3205 3206int Process::sys$accept(int accepting_socket_fd, sockaddr* user_address, socklen_t* user_address_size) 3207{ 3208 REQUIRE_PROMISE(accept); 3209 if (!validate_write_typed(user_address_size)) 3210 return -EFAULT; 3211 socklen_t address_size = 0; 3212 copy_from_user(&address_size, user_address_size); 3213 if (!validate_write(user_address, address_size)) 3214 return -EFAULT; 3215 int accepted_socket_fd = alloc_fd(); 3216 if (accepted_socket_fd < 0) 3217 return accepted_socket_fd; 3218 auto accepting_socket_description = file_description(accepting_socket_fd); 3219 if (!accepting_socket_description) 3220 return -EBADF; 3221 if (!accepting_socket_description->is_socket()) 3222 return -ENOTSOCK; 3223 auto& socket = *accepting_socket_description->socket(); 3224 if (!socket.can_accept()) { 3225 if (accepting_socket_description->is_blocking()) { 3226 if (Thread::current->block<Thread::AcceptBlocker>(*accepting_socket_description) != Thread::BlockResult::WokeNormally) 3227 return -EINTR; 3228 } else { 3229 return -EAGAIN; 3230 } 3231 } 3232 auto accepted_socket = socket.accept(); 3233 ASSERT(accepted_socket); 3234 3235 u8 address_buffer[sizeof(sockaddr_un)]; 3236 address_size = min(sizeof(sockaddr_un), static_cast<size_t>(address_size)); 3237 accepted_socket->get_peer_address((sockaddr*)address_buffer, &address_size); 3238 copy_to_user(user_address, address_buffer, address_size); 3239 copy_to_user(user_address_size, &address_size); 3240 3241 auto accepted_socket_description = FileDescription::create(*accepted_socket); 3242 accepted_socket_description->set_readable(true); 3243 accepted_socket_description->set_writable(true); 3244 // NOTE: The accepted socket inherits fd flags from the accepting socket. 3245 // I'm not sure if this matches other systems but it makes sense to me. 3246 accepted_socket_description->set_blocking(accepting_socket_description->is_blocking()); 3247 m_fds[accepted_socket_fd].set(move(accepted_socket_description), m_fds[accepting_socket_fd].flags); 3248 3249 // NOTE: Moving this state to Completed is what causes connect() to unblock on the client side. 3250 accepted_socket->set_setup_state(Socket::SetupState::Completed); 3251 return accepted_socket_fd; 3252} 3253 3254int Process::sys$connect(int sockfd, const sockaddr* address, socklen_t address_size) 3255{ 3256 if (!validate_read(address, address_size)) 3257 return -EFAULT; 3258 int fd = alloc_fd(); 3259 if (fd < 0) 3260 return fd; 3261 auto description = file_description(sockfd); 3262 if (!description) 3263 return -EBADF; 3264 if (!description->is_socket()) 3265 return -ENOTSOCK; 3266 3267 auto& socket = *description->socket(); 3268 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3269 SmapDisabler disabler; 3270 return socket.connect(*description, address, address_size, description->is_blocking() ? ShouldBlock::Yes : ShouldBlock::No); 3271} 3272 3273int Process::sys$shutdown(int sockfd, int how) 3274{ 3275 REQUIRE_PROMISE(stdio); 3276 if (how & ~SHUT_RDWR) 3277 return -EINVAL; 3278 auto description = file_description(sockfd); 3279 if (!description) 3280 return -EBADF; 3281 if (!description->is_socket()) 3282 return -ENOTSOCK; 3283 3284 auto& socket = *description->socket(); 3285 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3286 return socket.shutdown(how); 3287} 3288 3289ssize_t Process::sys$sendto(const Syscall::SC_sendto_params* user_params) 3290{ 3291 REQUIRE_PROMISE(stdio); 3292 Syscall::SC_sendto_params params; 3293 if (!validate_read_and_copy_typed(&params, user_params)) 3294 return -EFAULT; 3295 3296 int flags = params.flags; 3297 const sockaddr* addr = params.addr; 3298 socklen_t addr_length = params.addr_length; 3299 3300 if (!validate(params.data)) 3301 return -EFAULT; 3302 if (addr && !validate_read(addr, addr_length)) 3303 return -EFAULT; 3304 auto description = file_description(params.sockfd); 3305 if (!description) 3306 return -EBADF; 3307 if (!description->is_socket()) 3308 return -ENOTSOCK; 3309 auto& socket = *description->socket(); 3310 if (socket.is_shut_down_for_writing()) 3311 return -EPIPE; 3312 SmapDisabler disabler; 3313 return socket.sendto(*description, params.data.data, params.data.size, flags, addr, addr_length); 3314} 3315 3316ssize_t Process::sys$recvfrom(const Syscall::SC_recvfrom_params* user_params) 3317{ 3318 REQUIRE_PROMISE(stdio); 3319 3320 Syscall::SC_recvfrom_params params; 3321 if (!validate_read_and_copy_typed(&params, user_params)) 3322 return -EFAULT; 3323 3324 int flags = params.flags; 3325 sockaddr* addr = params.addr; 3326 socklen_t* addr_length = params.addr_length; 3327 3328 SmapDisabler disabler; 3329 if (!validate(params.buffer)) 3330 return -EFAULT; 3331 if (addr_length) { 3332 if (!validate_write_typed(addr_length)) 3333 return -EFAULT; 3334 if (!validate_write(addr, *addr_length)) 3335 return -EFAULT; 3336 } else if (addr) { 3337 return -EINVAL; 3338 } 3339 auto description = file_description(params.sockfd); 3340 if (!description) 3341 return -EBADF; 3342 if (!description->is_socket()) 3343 return -ENOTSOCK; 3344 auto& socket = *description->socket(); 3345 3346 if (socket.is_shut_down_for_reading()) 3347 return 0; 3348 3349 bool original_blocking = description->is_blocking(); 3350 if (flags & MSG_DONTWAIT) 3351 description->set_blocking(false); 3352 3353 auto nrecv = socket.recvfrom(*description, params.buffer.data, params.buffer.size, flags, addr, addr_length); 3354 if (flags & MSG_DONTWAIT) 3355 description->set_blocking(original_blocking); 3356 3357 return nrecv; 3358} 3359 3360template<bool sockname, typename Params> 3361int Process::get_sock_or_peer_name(const Params& params) 3362{ 3363 socklen_t addrlen_value; 3364 if (!validate_read_and_copy_typed(&addrlen_value, params.addrlen)) 3365 return -EFAULT; 3366 3367 if (addrlen_value <= 0) 3368 return -EINVAL; 3369 3370 if (!validate_write(params.addr, addrlen_value)) 3371 return -EFAULT; 3372 3373 if (!validate_write_typed(params.addrlen)) 3374 return -EFAULT; 3375 3376 auto description = file_description(params.sockfd); 3377 if (!description) 3378 return -EBADF; 3379 3380 if (!description->is_socket()) 3381 return -ENOTSOCK; 3382 3383 auto& socket = *description->socket(); 3384 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3385 3386 u8 address_buffer[sizeof(sockaddr_un)]; 3387 addrlen_value = min(sizeof(sockaddr_un), static_cast<size_t>(addrlen_value)); 3388 if constexpr (sockname) 3389 socket.get_local_address((sockaddr*)address_buffer, &addrlen_value); 3390 else 3391 socket.get_peer_address((sockaddr*)address_buffer, &addrlen_value); 3392 copy_to_user(params.addr, address_buffer, addrlen_value); 3393 copy_to_user(params.addrlen, &addrlen_value); 3394 return 0; 3395} 3396 3397int Process::sys$getsockname(const Syscall::SC_getsockname_params* user_params) 3398{ 3399 Syscall::SC_getsockname_params params; 3400 if (!validate_read_and_copy_typed(&params, user_params)) 3401 return -EFAULT; 3402 return get_sock_or_peer_name<true>(params); 3403} 3404 3405int Process::sys$getpeername(const Syscall::SC_getpeername_params* user_params) 3406{ 3407 Syscall::SC_getpeername_params params; 3408 if (!validate_read_and_copy_typed(&params, user_params)) 3409 return -EFAULT; 3410 return get_sock_or_peer_name<false>(params); 3411} 3412 3413int Process::sys$sched_setparam(int tid, const struct sched_param* param) 3414{ 3415 REQUIRE_PROMISE(proc); 3416 if (!validate_read_typed(param)) 3417 return -EFAULT; 3418 3419 int desired_priority; 3420 copy_from_user(&desired_priority, &param->sched_priority); 3421 3422 InterruptDisabler disabler; 3423 auto* peer = Thread::current; 3424 if (tid != 0) 3425 peer = Thread::from_tid(tid); 3426 3427 if (!peer) 3428 return -ESRCH; 3429 3430 if (!is_superuser() && m_euid != peer->process().m_uid && m_uid != peer->process().m_uid) 3431 return -EPERM; 3432 3433 if (desired_priority < THREAD_PRIORITY_MIN || desired_priority > THREAD_PRIORITY_MAX) 3434 return -EINVAL; 3435 3436 peer->set_priority((u32)desired_priority); 3437 return 0; 3438} 3439 3440int Process::sys$sched_getparam(pid_t pid, struct sched_param* param) 3441{ 3442 REQUIRE_PROMISE(proc); 3443 if (!validate_write_typed(param)) 3444 return -EFAULT; 3445 3446 InterruptDisabler disabler; 3447 auto* peer = Thread::current; 3448 if (pid != 0) 3449 peer = Thread::from_tid(pid); 3450 3451 if (!peer) 3452 return -ESRCH; 3453 3454 if (!is_superuser() && m_euid != peer->process().m_uid && m_uid != peer->process().m_uid) 3455 return -EPERM; 3456 3457 int priority = peer->priority(); 3458 copy_to_user(&param->sched_priority, &priority); 3459 return 0; 3460} 3461 3462int Process::sys$getsockopt(const Syscall::SC_getsockopt_params* params) 3463{ 3464 if (!validate_read_typed(params)) 3465 return -EFAULT; 3466 3467 SmapDisabler disabler; 3468 3469 int sockfd = params->sockfd; 3470 int level = params->level; 3471 int option = params->option; 3472 void* value = params->value; 3473 socklen_t* value_size = params->value_size; 3474 3475 if (!validate_write_typed(value_size)) 3476 return -EFAULT; 3477 if (!validate_write(value, *value_size)) 3478 return -EFAULT; 3479 auto description = file_description(sockfd); 3480 if (!description) 3481 return -EBADF; 3482 if (!description->is_socket()) 3483 return -ENOTSOCK; 3484 auto& socket = *description->socket(); 3485 3486 if (has_promised(Pledge::accept) && socket.is_local() && level == SOL_SOCKET && option == SO_PEERCRED) { 3487 // We make an exception for SOL_SOCKET::SO_PEERCRED on local sockets if you've pledged "accept" 3488 } else { 3489 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3490 } 3491 return socket.getsockopt(*description, level, option, value, value_size); 3492} 3493 3494int Process::sys$setsockopt(const Syscall::SC_setsockopt_params* params) 3495{ 3496 if (!validate_read_typed(params)) 3497 return -EFAULT; 3498 3499 SmapDisabler disabler; 3500 3501 int sockfd = params->sockfd; 3502 int level = params->level; 3503 int option = params->option; 3504 const void* value = params->value; 3505 socklen_t value_size = params->value_size; 3506 3507 if (!validate_read(value, value_size)) 3508 return -EFAULT; 3509 auto description = file_description(sockfd); 3510 if (!description) 3511 return -EBADF; 3512 if (!description->is_socket()) 3513 return -ENOTSOCK; 3514 auto& socket = *description->socket(); 3515 REQUIRE_PROMISE_FOR_SOCKET_DOMAIN(socket.domain()); 3516 return socket.setsockopt(level, option, value, value_size); 3517} 3518 3519void Process::disown_all_shared_buffers() 3520{ 3521 LOCKER(shared_buffers().lock()); 3522 Vector<SharedBuffer*, 32> buffers_to_disown; 3523 for (auto& it : shared_buffers().resource()) 3524 buffers_to_disown.append(it.value.ptr()); 3525 for (auto* shared_buffer : buffers_to_disown) 3526 shared_buffer->disown(m_pid); 3527} 3528 3529int Process::sys$create_shared_buffer(int size, void** buffer) 3530{ 3531 REQUIRE_PROMISE(shared_buffer); 3532 if (!size || size < 0) 3533 return -EINVAL; 3534 size = PAGE_ROUND_UP(size); 3535 if (!validate_write_typed(buffer)) 3536 return -EFAULT; 3537 3538 LOCKER(shared_buffers().lock()); 3539 static int s_next_shared_buffer_id; 3540 int shared_buffer_id = ++s_next_shared_buffer_id; 3541 auto shared_buffer = make<SharedBuffer>(shared_buffer_id, size); 3542 shared_buffer->share_with(m_pid); 3543 3544 void* address = shared_buffer->ref_for_process_and_get_address(*this); 3545 copy_to_user(buffer, &address); 3546 ASSERT((int)shared_buffer->size() >= size); 3547#ifdef SHARED_BUFFER_DEBUG 3548 kprintf("%s(%u): Created shared buffer %d @ %p (%u bytes, vmobject is %u)\n", name().characters(), pid(), shared_buffer_id, *buffer, size, shared_buffer->size()); 3549#endif 3550 shared_buffers().resource().set(shared_buffer_id, move(shared_buffer)); 3551 3552 return shared_buffer_id; 3553} 3554 3555int Process::sys$share_buffer_with(int shared_buffer_id, pid_t peer_pid) 3556{ 3557 REQUIRE_PROMISE(shared_buffer); 3558 if (!peer_pid || peer_pid < 0 || peer_pid == m_pid) 3559 return -EINVAL; 3560 LOCKER(shared_buffers().lock()); 3561 auto it = shared_buffers().resource().find(shared_buffer_id); 3562 if (it == shared_buffers().resource().end()) 3563 return -EINVAL; 3564 auto& shared_buffer = *(*it).value; 3565 if (!shared_buffer.is_shared_with(m_pid)) 3566 return -EPERM; 3567 { 3568 InterruptDisabler disabler; 3569 auto* peer = Process::from_pid(peer_pid); 3570 if (!peer) 3571 return -ESRCH; 3572 } 3573 shared_buffer.share_with(peer_pid); 3574 return 0; 3575} 3576 3577int Process::sys$share_buffer_globally(int shared_buffer_id) 3578{ 3579 REQUIRE_PROMISE(shared_buffer); 3580 LOCKER(shared_buffers().lock()); 3581 auto it = shared_buffers().resource().find(shared_buffer_id); 3582 if (it == shared_buffers().resource().end()) 3583 return -EINVAL; 3584 auto& shared_buffer = *(*it).value; 3585 if (!shared_buffer.is_shared_with(m_pid)) 3586 return -EPERM; 3587 shared_buffer.share_globally(); 3588 return 0; 3589} 3590 3591int Process::sys$release_shared_buffer(int shared_buffer_id) 3592{ 3593 REQUIRE_PROMISE(shared_buffer); 3594 LOCKER(shared_buffers().lock()); 3595 auto it = shared_buffers().resource().find(shared_buffer_id); 3596 if (it == shared_buffers().resource().end()) 3597 return -EINVAL; 3598 auto& shared_buffer = *(*it).value; 3599 if (!shared_buffer.is_shared_with(m_pid)) 3600 return -EPERM; 3601#ifdef SHARED_BUFFER_DEBUG 3602 kprintf("%s(%u): Releasing shared buffer %d, buffer count: %u\n", name().characters(), pid(), shared_buffer_id, shared_buffers().resource().size()); 3603#endif 3604 shared_buffer.deref_for_process(*this); 3605 return 0; 3606} 3607 3608void* Process::sys$get_shared_buffer(int shared_buffer_id) 3609{ 3610 REQUIRE_PROMISE(shared_buffer); 3611 LOCKER(shared_buffers().lock()); 3612 auto it = shared_buffers().resource().find(shared_buffer_id); 3613 if (it == shared_buffers().resource().end()) 3614 return (void*)-EINVAL; 3615 auto& shared_buffer = *(*it).value; 3616 if (!shared_buffer.is_shared_with(m_pid)) 3617 return (void*)-EPERM; 3618#ifdef SHARED_BUFFER_DEBUG 3619 kprintf("%s(%u): Retaining shared buffer %d, buffer count: %u\n", name().characters(), pid(), shared_buffer_id, shared_buffers().resource().size()); 3620#endif 3621 return shared_buffer.ref_for_process_and_get_address(*this); 3622} 3623 3624int Process::sys$seal_shared_buffer(int shared_buffer_id) 3625{ 3626 REQUIRE_PROMISE(shared_buffer); 3627 LOCKER(shared_buffers().lock()); 3628 auto it = shared_buffers().resource().find(shared_buffer_id); 3629 if (it == shared_buffers().resource().end()) 3630 return -EINVAL; 3631 auto& shared_buffer = *(*it).value; 3632 if (!shared_buffer.is_shared_with(m_pid)) 3633 return -EPERM; 3634#ifdef SHARED_BUFFER_DEBUG 3635 kprintf("%s(%u): Sealing shared buffer %d\n", name().characters(), pid(), shared_buffer_id); 3636#endif 3637 shared_buffer.seal(); 3638 return 0; 3639} 3640 3641int Process::sys$get_shared_buffer_size(int shared_buffer_id) 3642{ 3643 REQUIRE_PROMISE(shared_buffer); 3644 LOCKER(shared_buffers().lock()); 3645 auto it = shared_buffers().resource().find(shared_buffer_id); 3646 if (it == shared_buffers().resource().end()) 3647 return -EINVAL; 3648 auto& shared_buffer = *(*it).value; 3649 if (!shared_buffer.is_shared_with(m_pid)) 3650 return -EPERM; 3651#ifdef SHARED_BUFFER_DEBUG 3652 kprintf("%s(%u): Get shared buffer %d size: %u\n", name().characters(), pid(), shared_buffer_id, shared_buffers().resource().size()); 3653#endif 3654 return shared_buffer.size(); 3655} 3656 3657int Process::sys$set_shared_buffer_volatile(int shared_buffer_id, bool state) 3658{ 3659 REQUIRE_PROMISE(shared_buffer); 3660 LOCKER(shared_buffers().lock()); 3661 auto it = shared_buffers().resource().find(shared_buffer_id); 3662 if (it == shared_buffers().resource().end()) 3663 return -EINVAL; 3664 auto& shared_buffer = *(*it).value; 3665 if (!shared_buffer.is_shared_with(m_pid)) 3666 return -EPERM; 3667#ifdef SHARED_BUFFER_DEBUG 3668 kprintf("%s(%u): Set shared buffer %d volatile: %u\n", name().characters(), pid(), shared_buffer_id, state); 3669#endif 3670 if (!state) { 3671 bool was_purged = shared_buffer.vmobject().was_purged(); 3672 shared_buffer.vmobject().set_volatile(state); 3673 shared_buffer.vmobject().set_was_purged(false); 3674 return was_purged ? 1 : 0; 3675 } 3676 shared_buffer.vmobject().set_volatile(true); 3677 return 0; 3678} 3679 3680void Process::terminate_due_to_signal(u8 signal) 3681{ 3682 ASSERT_INTERRUPTS_DISABLED(); 3683 ASSERT(signal < 32); 3684 dbgprintf("terminate_due_to_signal %s(%u) <- %u\n", name().characters(), pid(), signal); 3685 m_termination_status = 0; 3686 m_termination_signal = signal; 3687 die(); 3688} 3689 3690void Process::send_signal(u8 signal, Process* sender) 3691{ 3692 InterruptDisabler disabler; 3693 auto* thread = Thread::from_tid(m_pid); 3694 if (!thread) 3695 thread = &any_thread(); 3696 thread->send_signal(signal, sender); 3697} 3698 3699int Process::sys$create_thread(void* (*entry)(void*), void* argument, const Syscall::SC_create_thread_params* user_params) 3700{ 3701 REQUIRE_PROMISE(thread); 3702 if (!validate_read((const void*)entry, sizeof(void*))) 3703 return -EFAULT; 3704 3705 Syscall::SC_create_thread_params params; 3706 if (!validate_read_and_copy_typed(&params, user_params)) 3707 return -EFAULT; 3708 3709 unsigned detach_state = params.m_detach_state; 3710 int schedule_priority = params.m_schedule_priority; 3711 void* stack_location = params.m_stack_location; 3712 unsigned stack_size = params.m_stack_size; 3713 3714 if (!validate_write(stack_location, stack_size)) 3715 return -EFAULT; 3716 3717 u32 user_stack_address = reinterpret_cast<u32>(stack_location) + stack_size; 3718 3719 if (!MM.validate_user_stack(*this, VirtualAddress(user_stack_address - 4))) 3720 return -EFAULT; 3721 3722 // FIXME: return EAGAIN if Thread::all_threads().size() is greater than PTHREAD_THREADS_MAX 3723 3724 int requested_thread_priority = schedule_priority; 3725 if (requested_thread_priority < THREAD_PRIORITY_MIN || requested_thread_priority > THREAD_PRIORITY_MAX) 3726 return -EINVAL; 3727 3728 bool is_thread_joinable = (0 == detach_state); 3729 3730 // FIXME: Do something with guard pages? 3731 3732 auto* thread = new Thread(*this); 3733 3734 // We know this thread is not the main_thread, 3735 // So give it a unique name until the user calls $set_thread_name on it 3736 // length + 4 to give space for our extra junk at the end 3737 StringBuilder builder(m_name.length() + 4); 3738 builder.append(m_name); 3739 builder.appendf("[%d]", thread->tid()); 3740 thread->set_name(builder.to_string()); 3741 3742 thread->set_priority(requested_thread_priority); 3743 thread->set_joinable(is_thread_joinable); 3744 3745 auto& tss = thread->tss(); 3746 tss.eip = (uintptr_t)entry; 3747 tss.eflags = 0x0202; 3748 tss.cr3 = page_directory().cr3(); 3749 tss.esp = user_stack_address; 3750 3751 // NOTE: The stack needs to be 16-byte aligned. 3752 thread->push_value_on_stack((uintptr_t)argument); 3753 thread->push_value_on_stack(0); 3754 3755 thread->make_thread_specific_region({}); 3756 thread->set_state(Thread::State::Runnable); 3757 return thread->tid(); 3758} 3759 3760void Process::sys$exit_thread(void* exit_value) 3761{ 3762 REQUIRE_PROMISE(thread); 3763 cli(); 3764 Thread::current->m_exit_value = exit_value; 3765 Thread::current->set_should_die(); 3766 big_lock().force_unlock_if_locked(); 3767 Thread::current->die_if_needed(); 3768 ASSERT_NOT_REACHED(); 3769} 3770 3771int Process::sys$detach_thread(int tid) 3772{ 3773 REQUIRE_PROMISE(thread); 3774 InterruptDisabler disabler; 3775 auto* thread = Thread::from_tid(tid); 3776 if (!thread || thread->pid() != pid()) 3777 return -ESRCH; 3778 3779 if (!thread->is_joinable()) 3780 return -EINVAL; 3781 3782 thread->set_joinable(false); 3783 return 0; 3784} 3785 3786int Process::sys$join_thread(int tid, void** exit_value) 3787{ 3788 REQUIRE_PROMISE(thread); 3789 if (exit_value && !validate_write_typed(exit_value)) 3790 return -EFAULT; 3791 3792 InterruptDisabler disabler; 3793 auto* thread = Thread::from_tid(tid); 3794 if (!thread || thread->pid() != pid()) 3795 return -ESRCH; 3796 3797 if (thread == Thread::current) 3798 return -EDEADLK; 3799 3800 if (thread->m_joinee == Thread::current) 3801 return -EDEADLK; 3802 3803 ASSERT(thread->m_joiner != Thread::current); 3804 if (thread->m_joiner) 3805 return -EINVAL; 3806 3807 if (!thread->is_joinable()) 3808 return -EINVAL; 3809 3810 void* joinee_exit_value = nullptr; 3811 3812 // NOTE: pthread_join() cannot be interrupted by signals. Only by death. 3813 for (;;) { 3814 auto result = Thread::current->block<Thread::JoinBlocker>(*thread, joinee_exit_value); 3815 if (result == Thread::BlockResult::InterruptedByDeath) { 3816 // NOTE: This cleans things up so that Thread::finalize() won't 3817 // get confused about a missing joiner when finalizing the joinee. 3818 InterruptDisabler disabler; 3819 Thread::current->m_joinee->m_joiner = nullptr; 3820 Thread::current->m_joinee = nullptr; 3821 return 0; 3822 } 3823 } 3824 3825 // NOTE: 'thread' is very possibly deleted at this point. Clear it just to be safe. 3826 thread = nullptr; 3827 3828 if (exit_value) 3829 copy_to_user(exit_value, &joinee_exit_value); 3830 return 0; 3831} 3832 3833int Process::sys$set_thread_name(int tid, const char* user_name, size_t user_name_length) 3834{ 3835 REQUIRE_PROMISE(thread); 3836 auto name = validate_and_copy_string_from_user(user_name, user_name_length); 3837 if (name.is_null()) 3838 return -EFAULT; 3839 3840 const size_t max_thread_name_size = 64; 3841 if (name.length() > max_thread_name_size) 3842 return -EINVAL; 3843 3844 InterruptDisabler disabler; 3845 auto* thread = Thread::from_tid(tid); 3846 if (!thread || thread->pid() != pid()) 3847 return -ESRCH; 3848 3849 thread->set_name(name); 3850 return 0; 3851} 3852int Process::sys$get_thread_name(int tid, char* buffer, size_t buffer_size) 3853{ 3854 REQUIRE_PROMISE(thread); 3855 if (buffer_size == 0) 3856 return -EINVAL; 3857 3858 if (!validate_write(buffer, buffer_size)) 3859 return -EFAULT; 3860 3861 InterruptDisabler disabler; 3862 auto* thread = Thread::from_tid(tid); 3863 if (!thread || thread->pid() != pid()) 3864 return -ESRCH; 3865 3866 if (thread->name().length() + 1 > (size_t)buffer_size) 3867 return -ENAMETOOLONG; 3868 3869 copy_to_user(buffer, thread->name().characters(), thread->name().length() + 1); 3870 return 0; 3871} 3872 3873int Process::sys$gettid() 3874{ 3875 REQUIRE_PROMISE(stdio); 3876 return Thread::current->tid(); 3877} 3878 3879int Process::sys$donate(int tid) 3880{ 3881 REQUIRE_PROMISE(stdio); 3882 if (tid < 0) 3883 return -EINVAL; 3884 InterruptDisabler disabler; 3885 auto* thread = Thread::from_tid(tid); 3886 if (!thread || thread->pid() != pid()) 3887 return -ESRCH; 3888 Scheduler::donate_to(thread, "sys$donate"); 3889 return 0; 3890} 3891 3892int Process::sys$rename(const Syscall::SC_rename_params* user_params) 3893{ 3894 REQUIRE_PROMISE(cpath); 3895 Syscall::SC_rename_params params; 3896 if (!validate_read_and_copy_typed(&params, user_params)) 3897 return -EFAULT; 3898 auto old_path = get_syscall_path_argument(params.old_path); 3899 if (old_path.is_error()) 3900 return old_path.error(); 3901 auto new_path = get_syscall_path_argument(params.new_path); 3902 if (new_path.is_error()) 3903 return new_path.error(); 3904 return VFS::the().rename(old_path.value(), new_path.value(), current_directory()); 3905} 3906 3907int Process::sys$ftruncate(int fd, off_t length) 3908{ 3909 REQUIRE_PROMISE(stdio); 3910 if (length < 0) 3911 return -EINVAL; 3912 auto description = file_description(fd); 3913 if (!description) 3914 return -EBADF; 3915 if (!description->is_writable()) 3916 return -EBADF; 3917 return description->truncate(static_cast<u64>(length)); 3918} 3919 3920int Process::sys$watch_file(const char* user_path, size_t path_length) 3921{ 3922 REQUIRE_PROMISE(rpath); 3923 auto path = get_syscall_path_argument(user_path, path_length); 3924 if (path.is_error()) 3925 return path.error(); 3926 3927 auto custody_or_error = VFS::the().resolve_path(path.value(), current_directory()); 3928 if (custody_or_error.is_error()) 3929 return custody_or_error.error(); 3930 3931 auto& custody = custody_or_error.value(); 3932 auto& inode = custody->inode(); 3933 3934 if (!inode.fs().supports_watchers()) 3935 return -ENOTSUP; 3936 3937 int fd = alloc_fd(); 3938 if (fd < 0) 3939 return fd; 3940 3941 m_fds[fd].set(FileDescription::create(*InodeWatcher::create(inode))); 3942 m_fds[fd].description->set_readable(true); 3943 return fd; 3944} 3945 3946int Process::sys$systrace(pid_t pid) 3947{ 3948 REQUIRE_PROMISE(proc); 3949 InterruptDisabler disabler; 3950 auto* peer = Process::from_pid(pid); 3951 if (!peer) 3952 return -ESRCH; 3953 if (peer->uid() != m_euid) 3954 return -EACCES; 3955 int fd = alloc_fd(); 3956 if (fd < 0) 3957 return fd; 3958 auto description = FileDescription::create(peer->ensure_tracer()); 3959 description->set_readable(true); 3960 m_fds[fd].set(move(description), 0); 3961 return fd; 3962} 3963 3964int Process::sys$halt() 3965{ 3966 if (!is_superuser()) 3967 return -EPERM; 3968 3969 REQUIRE_NO_PROMISES; 3970 3971 dbgprintf("acquiring FS locks...\n"); 3972 FS::lock_all(); 3973 dbgprintf("syncing mounted filesystems...\n"); 3974 FS::sync(); 3975 dbgprintf("attempting system shutdown...\n"); 3976 IO::out16(0x604, 0x2000); 3977 3978 return ESUCCESS; 3979} 3980 3981int Process::sys$reboot() 3982{ 3983 if (!is_superuser()) 3984 return -EPERM; 3985 3986 REQUIRE_NO_PROMISES; 3987 3988 dbgprintf("acquiring FS locks...\n"); 3989 FS::lock_all(); 3990 dbgprintf("syncing mounted filesystems...\n"); 3991 FS::sync(); 3992 dbgprintf("attempting reboot via KB Controller...\n"); 3993 IO::out8(0x64, 0xFE); 3994 3995 return ESUCCESS; 3996} 3997 3998int Process::sys$mount(const Syscall::SC_mount_params* user_params) 3999{ 4000 if (!is_superuser()) 4001 return -EPERM; 4002 4003 REQUIRE_NO_PROMISES; 4004 4005 Syscall::SC_mount_params params; 4006 if (!validate_read_and_copy_typed(&params, user_params)) 4007 return -EFAULT; 4008 4009 auto source = validate_and_copy_string_from_user(params.source); 4010 auto target = validate_and_copy_string_from_user(params.target); 4011 auto fs_type = validate_and_copy_string_from_user(params.fs_type); 4012 4013 if (source.is_null() || target.is_null() || fs_type.is_null()) 4014 return -EFAULT; 4015 4016 dbg() << "mount " << fs_type << ": source " << source << " @ " << target; 4017 4018 auto custody_or_error = VFS::the().resolve_path(target, current_directory()); 4019 if (custody_or_error.is_error()) 4020 return custody_or_error.error(); 4021 4022 auto& target_custody = custody_or_error.value(); 4023 4024 RefPtr<FS> fs; 4025 4026 if (params.flags & MS_BIND) { 4027 // We're doing a bind mount. 4028 auto source_or_error = VFS::the().resolve_path(source, current_directory()); 4029 if (source_or_error.is_error()) 4030 return source_or_error.error(); 4031 auto& source_custody = source_or_error.value(); 4032 return VFS::the().bind_mount(source_custody, target_custody, params.flags); 4033 } 4034 4035 if (fs_type == "ext2" || fs_type == "Ext2FS") { 4036 auto source_or_error = VFS::the().open(source, O_RDWR, 0, current_directory()); 4037 if (source_or_error.is_error()) 4038 return source_or_error.error(); 4039 4040 auto* device = source_or_error.value()->device(); 4041 if (!device || !device->is_block_device()) { 4042 dbg() << "mount: this is not a BlockDevice"; 4043 return -ENODEV; 4044 } 4045 auto& block_device = static_cast<BlockDevice&>(*device); 4046 4047 dbg() << "mount: attempting to mount " << block_device.absolute_path() << " on " << target; 4048 4049 fs = Ext2FS::create(block_device); 4050 } else if (fs_type == "proc" || fs_type == "ProcFS") { 4051 fs = ProcFS::create(); 4052 } else if (fs_type == "devpts" || fs_type == "DevPtsFS") { 4053 fs = DevPtsFS::create(); 4054 } else if (fs_type == "tmp" || fs_type == "TmpFS") { 4055 fs = TmpFS::create(); 4056 } else { 4057 return -ENODEV; 4058 } 4059 4060 if (!fs->initialize()) { 4061 dbg() << "mount: failed to initialize " << fs_type << " filesystem on " << source; 4062 return -ENODEV; 4063 } 4064 4065 auto result = VFS::the().mount(fs.release_nonnull(), target_custody, params.flags); 4066 dbg() << "mount: successfully mounted " << source << " on " << target; 4067 return result; 4068} 4069 4070int Process::sys$umount(const char* user_mountpoint, size_t mountpoint_length) 4071{ 4072 if (!is_superuser()) 4073 return -EPERM; 4074 4075 REQUIRE_NO_PROMISES; 4076 4077 if (!validate_read(user_mountpoint, mountpoint_length)) 4078 return -EFAULT; 4079 4080 auto mountpoint = get_syscall_path_argument(user_mountpoint, mountpoint_length); 4081 if (mountpoint.is_error()) 4082 return mountpoint.error(); 4083 4084 auto metadata_or_error = VFS::the().lookup_metadata(mountpoint.value(), current_directory()); 4085 if (metadata_or_error.is_error()) 4086 return metadata_or_error.error(); 4087 4088 auto guest_inode_id = metadata_or_error.value().inode; 4089 return VFS::the().unmount(guest_inode_id); 4090} 4091 4092ProcessTracer& Process::ensure_tracer() 4093{ 4094 if (!m_tracer) 4095 m_tracer = ProcessTracer::create(m_pid); 4096 return *m_tracer; 4097} 4098 4099void Process::FileDescriptionAndFlags::clear() 4100{ 4101 description = nullptr; 4102 flags = 0; 4103} 4104 4105void Process::FileDescriptionAndFlags::set(NonnullRefPtr<FileDescription>&& d, u32 f) 4106{ 4107 description = move(d); 4108 flags = f; 4109} 4110 4111int Process::sys$mknod(const Syscall::SC_mknod_params* user_params) 4112{ 4113 REQUIRE_PROMISE(dpath); 4114 Syscall::SC_mknod_params params; 4115 if (!validate_read_and_copy_typed(&params, user_params)) 4116 return -EFAULT; 4117 if (!is_superuser() && !is_regular_file(params.mode) && !is_fifo(params.mode) && !is_socket(params.mode)) 4118 return -EPERM; 4119 auto path = get_syscall_path_argument(params.path); 4120 if (path.is_error()) 4121 return path.error(); 4122 return VFS::the().mknod(path.value(), params.mode & ~umask(), params.dev, current_directory()); 4123} 4124 4125int Process::sys$dump_backtrace() 4126{ 4127 dump_backtrace(); 4128 return 0; 4129} 4130 4131int Process::sys$dbgputch(u8 ch) 4132{ 4133 IO::out8(0xe9, ch); 4134 return 0; 4135} 4136 4137int Process::sys$dbgputstr(const u8* characters, int length) 4138{ 4139 if (!length) 4140 return 0; 4141 if (!validate_read(characters, length)) 4142 return -EFAULT; 4143 SmapDisabler disabler; 4144 for (int i = 0; i < length; ++i) 4145 IO::out8(0xe9, characters[i]); 4146 return 0; 4147} 4148 4149KBuffer Process::backtrace(ProcessInspectionHandle& handle) const 4150{ 4151 KBufferBuilder builder; 4152 for_each_thread([&](Thread& thread) { 4153 builder.appendf("Thread %d (%s):\n", thread.tid(), thread.name().characters()); 4154 builder.append(thread.backtrace(handle)); 4155 return IterationDecision::Continue; 4156 }); 4157 return builder.build(); 4158} 4159 4160int Process::sys$set_process_icon(int icon_id) 4161{ 4162 REQUIRE_PROMISE(shared_buffer); 4163 LOCKER(shared_buffers().lock()); 4164 auto it = shared_buffers().resource().find(icon_id); 4165 if (it == shared_buffers().resource().end()) 4166 return -EINVAL; 4167 auto& shared_buffer = *(*it).value; 4168 if (!shared_buffer.is_shared_with(m_pid)) 4169 return -EPERM; 4170 m_icon_id = icon_id; 4171 return 0; 4172} 4173 4174int Process::sys$get_process_name(char* buffer, int buffer_size) 4175{ 4176 REQUIRE_PROMISE(stdio); 4177 if (buffer_size <= 0) 4178 return -EINVAL; 4179 4180 if (!validate_write(buffer, buffer_size)) 4181 return -EFAULT; 4182 4183 if (m_name.length() + 1 > (size_t)buffer_size) 4184 return -ENAMETOOLONG; 4185 4186 copy_to_user(buffer, m_name.characters(), m_name.length() + 1); 4187 return 0; 4188} 4189 4190// We don't use the flag yet, but we could use it for distinguishing 4191// random source like Linux, unlike the OpenBSD equivalent. However, if we 4192// do, we should be able of the caveats that Linux has dealt with. 4193int Process::sys$getrandom(void* buffer, size_t buffer_size, unsigned int flags __attribute__((unused))) 4194{ 4195 REQUIRE_PROMISE(stdio); 4196 if (buffer_size <= 0) 4197 return -EINVAL; 4198 4199 if (!validate_write(buffer, buffer_size)) 4200 return -EFAULT; 4201 4202 SmapDisabler disabler; 4203 get_good_random_bytes((u8*)buffer, buffer_size); 4204 return 0; 4205} 4206 4207int Process::sys$setkeymap(const Syscall::SC_setkeymap_params* user_params) 4208{ 4209 if (!is_superuser()) 4210 return -EPERM; 4211 4212 REQUIRE_NO_PROMISES; 4213 Syscall::SC_setkeymap_params params; 4214 if (!validate_read_and_copy_typed(&params, user_params)) 4215 return -EFAULT; 4216 4217 const char* map = params.map; 4218 const char* shift_map = params.shift_map; 4219 const char* alt_map = params.alt_map; 4220 const char* altgr_map = params.altgr_map; 4221 4222 if (!validate_read(map, 0x80)) 4223 return -EFAULT; 4224 if (!validate_read(shift_map, 0x80)) 4225 return -EFAULT; 4226 if (!validate_read(alt_map, 0x80)) 4227 return -EFAULT; 4228 if (!validate_read(altgr_map, 0x80)) 4229 return -EFAULT; 4230 4231 SmapDisabler disabler; 4232 KeyboardDevice::the().set_maps(map, shift_map, alt_map, altgr_map); 4233 return 0; 4234} 4235 4236int Process::sys$clock_gettime(clockid_t clock_id, timespec* user_ts) 4237{ 4238 REQUIRE_PROMISE(stdio); 4239 if (!validate_write_typed(user_ts)) 4240 return -EFAULT; 4241 4242 timespec ts; 4243 memset(&ts, 0, sizeof(ts)); 4244 4245 switch (clock_id) { 4246 case CLOCK_MONOTONIC: 4247 ts.tv_sec = g_uptime / TICKS_PER_SECOND; 4248 ts.tv_nsec = (g_uptime % TICKS_PER_SECOND) * 1000000; 4249 break; 4250 default: 4251 return -EINVAL; 4252 } 4253 4254 copy_to_user(user_ts, &ts); 4255 return 0; 4256} 4257 4258int Process::sys$clock_nanosleep(const Syscall::SC_clock_nanosleep_params* user_params) 4259{ 4260 REQUIRE_PROMISE(stdio); 4261 4262 Syscall::SC_clock_nanosleep_params params; 4263 if (!validate_read_and_copy_typed(&params, user_params)) 4264 return -EFAULT; 4265 4266 if (params.requested_sleep && !validate_read_typed(params.requested_sleep)) 4267 return -EFAULT; 4268 4269 timespec requested_sleep; 4270 copy_from_user(&requested_sleep, params.requested_sleep); 4271 4272 if (params.remaining_sleep && !validate_write_typed(params.remaining_sleep)) 4273 return -EFAULT; 4274 4275 bool is_absolute = params.flags & TIMER_ABSTIME; 4276 4277 switch (params.clock_id) { 4278 case CLOCK_MONOTONIC: { 4279 u64 wakeup_time; 4280 if (is_absolute) { 4281 u64 time_to_wake = (requested_sleep.tv_sec * 1000 + requested_sleep.tv_nsec / 1000000); 4282 wakeup_time = Thread::current->sleep_until(time_to_wake); 4283 } else { 4284 u32 ticks_to_sleep = (requested_sleep.tv_sec * 1000 + requested_sleep.tv_nsec / 1000000); 4285 if (!ticks_to_sleep) 4286 return 0; 4287 wakeup_time = Thread::current->sleep(ticks_to_sleep); 4288 } 4289 if (wakeup_time > g_uptime) { 4290 u32 ticks_left = wakeup_time - g_uptime; 4291 if (!is_absolute && params.remaining_sleep) { 4292 timespec remaining_sleep; 4293 memset(&remaining_sleep, 0, sizeof(timespec)); 4294 remaining_sleep.tv_sec = ticks_left / TICKS_PER_SECOND; 4295 ticks_left -= remaining_sleep.tv_sec * TICKS_PER_SECOND; 4296 remaining_sleep.tv_nsec = ticks_left * 1000000; 4297 copy_to_user(params.remaining_sleep, &remaining_sleep); 4298 } 4299 return -EINTR; 4300 } 4301 return 0; 4302 } 4303 default: 4304 return -EINVAL; 4305 } 4306} 4307 4308int Process::sys$sync() 4309{ 4310 REQUIRE_PROMISE(stdio); 4311 VFS::the().sync(); 4312 return 0; 4313} 4314 4315int Process::sys$yield() 4316{ 4317 REQUIRE_PROMISE(stdio); 4318 Thread::current->yield_without_holding_big_lock(); 4319 return 0; 4320} 4321 4322int Process::sys$beep() 4323{ 4324 PCSpeaker::tone_on(440); 4325 u64 wakeup_time = Thread::current->sleep(100); 4326 PCSpeaker::tone_off(); 4327 if (wakeup_time > g_uptime) 4328 return -EINTR; 4329 return 0; 4330} 4331 4332int Process::sys$module_load(const char* user_path, size_t path_length) 4333{ 4334 if (!is_superuser()) 4335 return -EPERM; 4336 4337 REQUIRE_NO_PROMISES; 4338 4339 auto path = get_syscall_path_argument(user_path, path_length); 4340 if (path.is_error()) 4341 return path.error(); 4342 auto description_or_error = VFS::the().open(path.value(), O_RDONLY, 0, current_directory()); 4343 if (description_or_error.is_error()) 4344 return description_or_error.error(); 4345 auto& description = description_or_error.value(); 4346 auto payload = description->read_entire_file(); 4347 auto storage = KBuffer::create_with_size(payload.size()); 4348 memcpy(storage.data(), payload.data(), payload.size()); 4349 payload.clear(); 4350 4351 auto elf_image = make<ELFImage>(storage.data(), storage.size()); 4352 if (!elf_image->parse()) 4353 return -ENOEXEC; 4354 4355 HashMap<String, u8*> section_storage_by_name; 4356 4357 auto module = make<Module>(); 4358 4359 elf_image->for_each_section_of_type(SHT_PROGBITS, [&](const ELFImage::Section& section) { 4360 auto section_storage = KBuffer::copy(section.raw_data(), section.size(), Region::Access::Read | Region::Access::Write | Region::Access::Execute); 4361 section_storage_by_name.set(section.name(), section_storage.data()); 4362 module->sections.append(move(section_storage)); 4363 return IterationDecision::Continue; 4364 }); 4365 4366 bool missing_symbols = false; 4367 4368 elf_image->for_each_section_of_type(SHT_PROGBITS, [&](const ELFImage::Section& section) { 4369 auto* section_storage = section_storage_by_name.get(section.name()).value_or(nullptr); 4370 ASSERT(section_storage); 4371 section.relocations().for_each_relocation([&](const ELFImage::Relocation& relocation) { 4372 auto& patch_ptr = *reinterpret_cast<ptrdiff_t*>(section_storage + relocation.offset()); 4373 switch (relocation.type()) { 4374 case R_386_PC32: { 4375 // PC-relative relocation 4376 dbg() << "PC-relative relocation: " << relocation.symbol().name(); 4377 u32 symbol_address = address_for_kernel_symbol(relocation.symbol().name()); 4378 if (symbol_address == 0) 4379 missing_symbols = true; 4380 dbg() << " Symbol address: " << (void*)symbol_address; 4381 ptrdiff_t relative_offset = (char*)symbol_address - ((char*)&patch_ptr + 4); 4382 patch_ptr = relative_offset; 4383 break; 4384 } 4385 case R_386_32: // Absolute relocation 4386 dbg() << "Absolute relocation: '" << relocation.symbol().name() << "' value:" << relocation.symbol().value() << ", index:" << relocation.symbol_index(); 4387 4388 if (relocation.symbol().bind() == STB_LOCAL) { 4389 auto* section_storage_containing_symbol = section_storage_by_name.get(relocation.symbol().section().name()).value_or(nullptr); 4390 ASSERT(section_storage_containing_symbol); 4391 u32 symbol_address = (ptrdiff_t)(section_storage_containing_symbol + relocation.symbol().value()); 4392 if (symbol_address == 0) 4393 missing_symbols = true; 4394 dbg() << " Symbol address: " << (void*)symbol_address; 4395 patch_ptr += symbol_address; 4396 } else if (relocation.symbol().bind() == STB_GLOBAL) { 4397 u32 symbol_address = address_for_kernel_symbol(relocation.symbol().name()); 4398 if (symbol_address == 0) 4399 missing_symbols = true; 4400 dbg() << " Symbol address: " << (void*)symbol_address; 4401 patch_ptr += symbol_address; 4402 } else { 4403 ASSERT_NOT_REACHED(); 4404 } 4405 break; 4406 } 4407 return IterationDecision::Continue; 4408 }); 4409 4410 return IterationDecision::Continue; 4411 }); 4412 4413 if (missing_symbols) 4414 return -EINVAL; 4415 4416 auto* text_base = section_storage_by_name.get(".text").value_or(nullptr); 4417 if (!text_base) { 4418 dbg() << "No .text section found in module!"; 4419 return -EINVAL; 4420 } 4421 4422 elf_image->for_each_symbol([&](const ELFImage::Symbol& symbol) { 4423 dbg() << " - " << symbol.type() << " '" << symbol.name() << "' @ " << (void*)symbol.value() << ", size=" << symbol.size(); 4424 if (symbol.name() == "module_init") { 4425 module->module_init = (ModuleInitPtr)(text_base + symbol.value()); 4426 } else if (symbol.name() == "module_fini") { 4427 module->module_fini = (ModuleFiniPtr)(text_base + symbol.value()); 4428 } else if (symbol.name() == "module_name") { 4429 const u8* storage = section_storage_by_name.get(symbol.section().name()).value_or(nullptr); 4430 if (storage) 4431 module->name = String((const char*)(storage + symbol.value())); 4432 } 4433 return IterationDecision::Continue; 4434 }); 4435 4436 if (!module->module_init) 4437 return -EINVAL; 4438 4439 if (g_modules->contains(module->name)) { 4440 dbg() << "a module with the name " << module->name << " is already loaded; please unload it first"; 4441 return -EEXIST; 4442 } 4443 4444 module->module_init(); 4445 4446 auto name = module->name; 4447 g_modules->set(name, move(module)); 4448 4449 return 0; 4450} 4451 4452int Process::sys$module_unload(const char* user_name, size_t name_length) 4453{ 4454 if (!is_superuser()) 4455 return -EPERM; 4456 4457 REQUIRE_NO_PROMISES; 4458 4459 auto module_name = validate_and_copy_string_from_user(user_name, name_length); 4460 if (module_name.is_null()) 4461 return -EFAULT; 4462 4463 auto it = g_modules->find(module_name); 4464 if (it == g_modules->end()) 4465 return -ENOENT; 4466 4467 if (it->value->module_fini) 4468 it->value->module_fini(); 4469 4470 g_modules->remove(it); 4471 return 0; 4472} 4473 4474int Process::sys$profiling_enable(pid_t pid) 4475{ 4476 REQUIRE_NO_PROMISES; 4477 InterruptDisabler disabler; 4478 auto* process = Process::from_pid(pid); 4479 if (!process) 4480 return -ESRCH; 4481 if (!is_superuser() && process->uid() != m_uid) 4482 return -EPERM; 4483 Profiling::start(*process); 4484 process->set_profiling(true); 4485 return 0; 4486} 4487 4488int Process::sys$profiling_disable(pid_t pid) 4489{ 4490 InterruptDisabler disabler; 4491 auto* process = Process::from_pid(pid); 4492 if (!process) 4493 return -ESRCH; 4494 if (!is_superuser() && process->uid() != m_uid) 4495 return -EPERM; 4496 process->set_profiling(false); 4497 Profiling::stop(); 4498 return 0; 4499} 4500 4501void* Process::sys$get_kernel_info_page() 4502{ 4503 REQUIRE_PROMISE(stdio); 4504 return s_info_page_address_for_userspace.as_ptr(); 4505} 4506 4507Thread& Process::any_thread() 4508{ 4509 Thread* found_thread = nullptr; 4510 for_each_thread([&](auto& thread) { 4511 found_thread = &thread; 4512 return IterationDecision::Break; 4513 }); 4514 ASSERT(found_thread); 4515 return *found_thread; 4516} 4517 4518WaitQueue& Process::futex_queue(i32* userspace_address) 4519{ 4520 auto& queue = m_futex_queues.ensure((uintptr_t)userspace_address); 4521 if (!queue) 4522 queue = make<WaitQueue>(); 4523 return *queue; 4524} 4525 4526int Process::sys$futex(const Syscall::SC_futex_params* user_params) 4527{ 4528 REQUIRE_PROMISE(thread); 4529 4530 Syscall::SC_futex_params params; 4531 if (!validate_read_and_copy_typed(&params, user_params)) 4532 return -EFAULT; 4533 4534 i32* userspace_address = params.userspace_address; 4535 int futex_op = params.futex_op; 4536 i32 value = params.val; 4537 const timespec* user_timeout = params.timeout; 4538 4539 if (!validate_read_typed(userspace_address)) 4540 return -EFAULT; 4541 4542 if (user_timeout && !validate_read_typed(user_timeout)) 4543 return -EFAULT; 4544 4545 timespec timeout { 0, 0 }; 4546 if (user_timeout) 4547 copy_from_user(&timeout, user_timeout); 4548 4549 i32 user_value; 4550 4551 switch (futex_op) { 4552 case FUTEX_WAIT: 4553 copy_from_user(&user_value, userspace_address); 4554 if (user_value != value) 4555 return -EAGAIN; 4556 // FIXME: This is supposed to be interruptible by a signal, but right now WaitQueue cannot be interrupted. 4557 // FIXME: Support timeout! 4558 Thread::current->wait_on(futex_queue(userspace_address)); 4559 break; 4560 case FUTEX_WAKE: 4561 if (value == 0) 4562 return 0; 4563 if (value == 1) { 4564 futex_queue(userspace_address).wake_one(); 4565 } else { 4566 // FIXME: Wake exactly (value) waiters. 4567 futex_queue(userspace_address).wake_all(); 4568 } 4569 break; 4570 } 4571 4572 return 0; 4573} 4574 4575int Process::sys$set_thread_boost(int tid, int amount) 4576{ 4577 REQUIRE_PROMISE(proc); 4578 if (amount < 0 || amount > 20) 4579 return -EINVAL; 4580 InterruptDisabler disabler; 4581 auto* thread = Thread::from_tid(tid); 4582 if (!thread) 4583 return -ESRCH; 4584 if (thread->state() == Thread::State::Dead || thread->state() == Thread::State::Dying) 4585 return -ESRCH; 4586 if (!is_superuser() && thread->process().uid() != euid()) 4587 return -EPERM; 4588 thread->set_priority_boost(amount); 4589 return 0; 4590} 4591 4592int Process::sys$set_process_boost(pid_t pid, int amount) 4593{ 4594 REQUIRE_PROMISE(proc); 4595 if (amount < 0 || amount > 20) 4596 return -EINVAL; 4597 InterruptDisabler disabler; 4598 auto* process = Process::from_pid(pid); 4599 if (!process || process->is_dead()) 4600 return -ESRCH; 4601 if (!is_superuser() && process->uid() != euid()) 4602 return -EPERM; 4603 process->m_priority_boost = amount; 4604 return 0; 4605} 4606 4607int Process::sys$chroot(const char* user_path, size_t path_length, int mount_flags) 4608{ 4609 if (!is_superuser()) 4610 return -EPERM; 4611 REQUIRE_PROMISE(chroot); 4612 auto path = get_syscall_path_argument(user_path, path_length); 4613 if (path.is_error()) 4614 return path.error(); 4615 auto directory_or_error = VFS::the().open_directory(path.value(), current_directory()); 4616 if (directory_or_error.is_error()) 4617 return directory_or_error.error(); 4618 auto directory = directory_or_error.value(); 4619 m_root_directory_relative_to_global_root = directory; 4620 int chroot_mount_flags = mount_flags == -1 ? directory->mount_flags() : mount_flags; 4621 set_root_directory(Custody::create(nullptr, "", directory->inode(), chroot_mount_flags)); 4622 return 0; 4623} 4624 4625Custody& Process::root_directory() 4626{ 4627 if (!m_root_directory) 4628 m_root_directory = VFS::the().root_custody(); 4629 return *m_root_directory; 4630} 4631 4632Custody& Process::root_directory_relative_to_global_root() 4633{ 4634 if (!m_root_directory_relative_to_global_root) 4635 m_root_directory_relative_to_global_root = root_directory(); 4636 return *m_root_directory_relative_to_global_root; 4637} 4638 4639void Process::set_root_directory(const Custody& root) 4640{ 4641 m_root_directory = root; 4642} 4643 4644int Process::sys$pledge(const Syscall::SC_pledge_params* user_params) 4645{ 4646 Syscall::SC_pledge_params params; 4647 if (!validate_read_and_copy_typed(&params, user_params)) 4648 return -EFAULT; 4649 4650 if (params.promises.length > 1024 || params.execpromises.length > 1024) 4651 return -E2BIG; 4652 4653 String promises; 4654 if (params.promises.characters) { 4655 promises = validate_and_copy_string_from_user(params.promises); 4656 if (promises.is_null()) 4657 return -EFAULT; 4658 } 4659 4660 String execpromises; 4661 if (params.execpromises.characters) { 4662 execpromises = validate_and_copy_string_from_user(params.execpromises); 4663 if (execpromises.is_null()) 4664 return -EFAULT; 4665 } 4666 4667 auto parse_pledge = [&](auto& pledge_spec, u32& mask) { 4668 auto parts = pledge_spec.split_view(' '); 4669 for (auto& part : parts) { 4670#define __ENUMERATE_PLEDGE_PROMISE(x) \ 4671 if (part == #x) { \ 4672 mask |= (1u << (u32)Pledge::x); \ 4673 continue; \ 4674 } 4675 ENUMERATE_PLEDGE_PROMISES 4676#undef __ENUMERATE_PLEDGE_PROMISE 4677 if (part == "dns") { 4678 // "dns" is an alias for "unix" since DNS queries go via LookupServer 4679 mask |= (1u << (u32)Pledge::unix); 4680 continue; 4681 } 4682 return false; 4683 } 4684 return true; 4685 }; 4686 4687 if (!promises.is_null()) { 4688 u32 new_promises = 0; 4689 if (!parse_pledge(promises, new_promises)) 4690 return -EINVAL; 4691 if (m_promises && (!new_promises || new_promises & ~m_promises)) 4692 return -EPERM; 4693 m_promises = new_promises; 4694 } 4695 4696 if (!execpromises.is_null()) { 4697 u32 new_execpromises = 0; 4698 if (!parse_pledge(execpromises, new_execpromises)) 4699 return -EINVAL; 4700 if (m_execpromises && (!new_execpromises || new_execpromises & ~m_execpromises)) 4701 return -EPERM; 4702 m_execpromises = new_execpromises; 4703 } 4704 4705 return 0; 4706} 4707 4708Region& Process::add_region(NonnullOwnPtr<Region> region) 4709{ 4710 auto* ptr = region.ptr(); 4711 m_regions.append(move(region)); 4712 return *ptr; 4713} 4714 4715int Process::sys$unveil(const Syscall::SC_unveil_params* user_params) 4716{ 4717 Syscall::SC_unveil_params params; 4718 if (!validate_read_and_copy_typed(&params, user_params)) 4719 return -EFAULT; 4720 4721 if (!params.path.characters && !params.permissions.characters) { 4722 m_veil_state = VeilState::Locked; 4723 return 0; 4724 } 4725 4726 if (m_veil_state == VeilState::Locked) 4727 return -EPERM; 4728 4729 if (!params.path.characters || !params.permissions.characters) 4730 return -EINVAL; 4731 4732 if (params.permissions.length > 4) 4733 return -EINVAL; 4734 4735 auto path = get_syscall_path_argument(params.path); 4736 if (path.is_error()) 4737 return path.error(); 4738 4739 if (path.value().is_empty() || path.value().characters()[0] != '/') 4740 return -EINVAL; 4741 4742 auto permissions = validate_and_copy_string_from_user(params.permissions); 4743 if (permissions.is_null()) 4744 return -EFAULT; 4745 4746 unsigned new_permissions = 0; 4747 for (size_t i = 0; i < permissions.length(); ++i) { 4748 switch (permissions[i]) { 4749 case 'r': 4750 new_permissions |= UnveiledPath::Access::Read; 4751 break; 4752 case 'w': 4753 new_permissions |= UnveiledPath::Access::Write; 4754 break; 4755 case 'x': 4756 new_permissions |= UnveiledPath::Access::Execute; 4757 break; 4758 case 'c': 4759 new_permissions |= UnveiledPath::Access::CreateOrRemove; 4760 break; 4761 default: 4762 return -EINVAL; 4763 } 4764 } 4765 4766 for (size_t i = 0; i < m_unveiled_paths.size(); ++i) { 4767 auto& unveiled_path = m_unveiled_paths[i]; 4768 if (unveiled_path.path == path.value()) { 4769 if (new_permissions & ~unveiled_path.permissions) 4770 return -EPERM; 4771 unveiled_path.permissions = new_permissions; 4772 return 0; 4773 } 4774 } 4775 4776 m_unveiled_paths.append({ path.value(), new_permissions }); 4777 ASSERT(m_veil_state != VeilState::Locked); 4778 m_veil_state = VeilState::Dropped; 4779 return 0; 4780} 4781 4782int Process::sys$perf_event(int type, uintptr_t arg1, uintptr_t arg2) 4783{ 4784 if (!m_perf_event_buffer) 4785 m_perf_event_buffer = make<PerformanceEventBuffer>(); 4786 return m_perf_event_buffer->append(type, arg1, arg2); 4787} 4788 4789void Process::set_tty(TTY* tty) 4790{ 4791 m_tty = tty; 4792} 4793 4794}