Serenity Operating System
at master 1002 lines 43 kB view raw
1/* 2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> 3 * Copyright (c) 2022, the SerenityOS developers. 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/ScopeGuard.h> 9#include <AK/TemporaryChange.h> 10#include <Kernel/Arch/CPU.h> 11#include <Kernel/Debug.h> 12#include <Kernel/FileSystem/Custody.h> 13#include <Kernel/FileSystem/OpenFileDescription.h> 14#include <Kernel/FileSystem/VirtualFileSystem.h> 15#include <Kernel/Memory/MemoryManager.h> 16#include <Kernel/Memory/Region.h> 17#include <Kernel/Memory/SharedInodeVMObject.h> 18#include <Kernel/Panic.h> 19#include <Kernel/PerformanceManager.h> 20#include <Kernel/Process.h> 21#include <Kernel/Random.h> 22#include <Kernel/Scheduler.h> 23#include <Kernel/Time/TimeManagement.h> 24#include <LibELF/AuxiliaryVector.h> 25#include <LibELF/Image.h> 26#include <LibELF/Validation.h> 27 28namespace Kernel { 29 30extern Memory::Region* g_signal_trampoline_region; 31 32struct LoadResult { 33 OwnPtr<Memory::AddressSpace> space; 34 FlatPtr load_base { 0 }; 35 FlatPtr entry_eip { 0 }; 36 size_t size { 0 }; 37 LockWeakPtr<Memory::Region> tls_region; 38 size_t tls_size { 0 }; 39 size_t tls_alignment { 0 }; 40 LockWeakPtr<Memory::Region> stack_region; 41}; 42 43static constexpr size_t auxiliary_vector_size = 15; 44static Array<ELF::AuxiliaryValue, auxiliary_vector_size> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, StringView executable_path, Optional<Process::ScopedDescriptionAllocation> const& main_program_fd_allocation); 45 46static bool validate_stack_size(Vector<NonnullOwnPtr<KString>> const& arguments, Vector<NonnullOwnPtr<KString>>& environment, Array<ELF::AuxiliaryValue, auxiliary_vector_size> const& auxiliary) 47{ 48 size_t total_arguments_size = 0; 49 size_t total_environment_size = 0; 50 size_t total_auxiliary_size = 0; 51 52 for (auto const& a : arguments) 53 total_arguments_size += a->length() + 1; 54 for (auto const& e : environment) 55 total_environment_size += e->length() + 1; 56 for (auto const& v : auxiliary) { 57 if (!v.optional_string.is_empty()) 58 total_auxiliary_size += round_up_to_power_of_two(v.optional_string.length() + 1, sizeof(FlatPtr)); 59 60 if (v.auxv.a_type == ELF::AuxiliaryValue::Random) 61 total_auxiliary_size += round_up_to_power_of_two(16, sizeof(FlatPtr)); 62 } 63 64 total_arguments_size += sizeof(char*) * (arguments.size() + 1); 65 total_environment_size += sizeof(char*) * (environment.size() + 1); 66 total_auxiliary_size += sizeof(auxv_t) * auxiliary.size(); 67 68 if (total_arguments_size > Process::max_arguments_size) 69 return false; 70 71 if (total_environment_size > Process::max_environment_size) 72 return false; 73 74 if (total_auxiliary_size > Process::max_auxiliary_size) 75 return false; 76 77 return true; 78} 79 80static ErrorOr<FlatPtr> make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Memory::Region& region, Vector<NonnullOwnPtr<KString>> const& arguments, 81 Vector<NonnullOwnPtr<KString>> const& environment, Array<ELF::AuxiliaryValue, auxiliary_vector_size> auxiliary_values) 82{ 83 FlatPtr new_sp = region.range().end().get(); 84 85 // Add some bits of randomness to the user stack pointer. 86 new_sp -= round_up_to_power_of_two(get_fast_random<u32>() % 4096, 16); 87 88 auto push_on_new_stack = [&new_sp](FlatPtr value) { 89 new_sp -= sizeof(FlatPtr); 90 Userspace<FlatPtr*> stack_ptr = new_sp; 91 auto result = copy_to_user(stack_ptr, &value); 92 VERIFY(!result.is_error()); 93 }; 94 95 auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) { 96 new_sp -= sizeof(auxv_t); 97 Userspace<auxv_t*> stack_ptr = new_sp; 98 auto result = copy_to_user(stack_ptr, &value); 99 VERIFY(!result.is_error()); 100 }; 101 102 auto push_string_on_new_stack = [&new_sp](StringView string) { 103 new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr)); 104 Userspace<FlatPtr*> stack_ptr = new_sp; 105 auto result = copy_to_user(stack_ptr, string.characters_without_null_termination(), string.length() + 1); 106 VERIFY(!result.is_error()); 107 }; 108 109 Vector<FlatPtr> argv_entries; 110 for (auto const& argument : arguments) { 111 push_string_on_new_stack(argument->view()); 112 TRY(argv_entries.try_append(new_sp)); 113 } 114 115 Vector<FlatPtr> env_entries; 116 for (auto const& variable : environment) { 117 push_string_on_new_stack(variable->view()); 118 TRY(env_entries.try_append(new_sp)); 119 } 120 121 for (auto& value : auxiliary_values) { 122 if (!value.optional_string.is_empty()) { 123 push_string_on_new_stack(value.optional_string); 124 value.auxv.a_un.a_ptr = (void*)new_sp; 125 } 126 if (value.auxv.a_type == ELF::AuxiliaryValue::Random) { 127 u8 random_bytes[16] {}; 128 get_fast_random_bytes({ random_bytes, sizeof(random_bytes) }); 129 push_string_on_new_stack({ random_bytes, sizeof(random_bytes) }); 130 value.auxv.a_un.a_ptr = (void*)new_sp; 131 } 132 } 133 134 for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) { 135 auto& value = auxiliary_values[i]; 136 push_aux_value_on_new_stack(value.auxv); 137 } 138 139 push_on_new_stack(0); 140 for (ssize_t i = env_entries.size() - 1; i >= 0; --i) 141 push_on_new_stack(env_entries[i]); 142 FlatPtr envp = new_sp; 143 144 push_on_new_stack(0); 145 for (ssize_t i = argv_entries.size() - 1; i >= 0; --i) 146 push_on_new_stack(argv_entries[i]); 147 FlatPtr argv = new_sp; 148 149 // NOTE: The stack needs to be 16-byte aligned. 150 new_sp -= new_sp % 16; 151 152#if ARCH(X86_64) 153 regs.rdi = argv_entries.size(); 154 regs.rsi = argv; 155 regs.rdx = envp; 156#elif ARCH(AARCH64) 157 regs.x[0] = argv_entries.size(); 158 regs.x[1] = argv; 159 regs.x[2] = envp; 160#else 161# error Unknown architecture 162#endif 163 164 VERIFY(new_sp % 16 == 0); 165 166 // FIXME: The way we're setting up the stack and passing arguments to the entry point isn't ABI-compliant 167 return new_sp; 168} 169 170struct RequiredLoadRange { 171 FlatPtr start { 0 }; 172 FlatPtr end { 0 }; 173}; 174 175static ErrorOr<RequiredLoadRange> get_required_load_range(OpenFileDescription& program_description) 176{ 177 auto& inode = *(program_description.inode()); 178 auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode)); 179 180 size_t executable_size = inode.size(); 181 size_t rounded_executable_size = TRY(Memory::page_round_up(executable_size)); 182 auto region = TRY(MM.allocate_kernel_region_with_vmobject(*vmobject, rounded_executable_size, "ELF memory range calculation"sv, Memory::Region::Access::Read)); 183 auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size); 184 if (!elf_image.is_valid()) { 185 return EINVAL; 186 } 187 188 RequiredLoadRange range {}; 189 elf_image.for_each_program_header([&range](auto const& pheader) { 190 if (pheader.type() != PT_LOAD) 191 return; 192 193 auto region_start = (FlatPtr)pheader.vaddr().as_ptr(); 194 auto region_end = region_start + pheader.size_in_memory(); 195 if (range.start == 0 || region_start < range.start) 196 range.start = region_start; 197 if (range.end == 0 || region_end > range.end) 198 range.end = region_end; 199 }); 200 201 VERIFY(range.end > range.start); 202 return range; 203}; 204 205static ErrorOr<FlatPtr> get_load_offset(const ElfW(Ehdr) & main_program_header, OpenFileDescription& main_program_description, OpenFileDescription* interpreter_description) 206{ 207 constexpr FlatPtr load_range_start = 0x08000000; 208 constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB 209 constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB; 210 211 auto random_load_offset_in_range([](auto start, auto size) { 212 return Memory::page_round_down(start + get_good_random<FlatPtr>() % size); 213 }); 214 215 if (main_program_header.e_type == ET_DYN) { 216 return random_load_offset_in_range(load_range_start, load_range_size); 217 } 218 219 if (main_program_header.e_type != ET_EXEC) 220 return EINVAL; 221 222 auto main_program_load_range = TRY(get_required_load_range(main_program_description)); 223 224 RequiredLoadRange selected_range {}; 225 226 if (interpreter_description) { 227 auto interpreter_load_range = TRY(get_required_load_range(*interpreter_description)); 228 229 auto interpreter_size_in_memory = interpreter_load_range.end - interpreter_load_range.start; 230 auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory; 231 232 // No intersection 233 if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end) 234 return random_load_offset_in_range(load_range_start, load_range_size); 235 236 RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start }; 237 RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end }; 238 239 // Select larger part 240 if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start) 241 selected_range = first_available_part; 242 else 243 selected_range = second_available_part; 244 } else 245 selected_range = main_program_load_range; 246 247 // If main program is too big and leaves us without enough space for adequate loader randomization 248 if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size) 249 return E2BIG; 250 251 return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start); 252} 253 254enum class ShouldAllocateTls { 255 No, 256 Yes, 257}; 258 259enum class ShouldAllowSyscalls { 260 No, 261 Yes, 262}; 263 264static ErrorOr<LoadResult> load_elf_object(NonnullOwnPtr<Memory::AddressSpace> new_space, OpenFileDescription& object_description, 265 FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls) 266{ 267 auto& inode = *(object_description.inode()); 268 auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode)); 269 270 if (vmobject->writable_mappings()) { 271 dbgln("Refusing to execute a write-mapped program"); 272 return ETXTBSY; 273 } 274 275 size_t executable_size = inode.size(); 276 size_t rounded_executable_size = TRY(Memory::page_round_up(executable_size)); 277 278 auto executable_region = TRY(MM.allocate_kernel_region_with_vmobject(*vmobject, rounded_executable_size, "ELF loading"sv, Memory::Region::Access::Read)); 279 auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size); 280 281 if (!elf_image.is_valid()) 282 return ENOEXEC; 283 284 Memory::Region* master_tls_region { nullptr }; 285 size_t master_tls_size = 0; 286 size_t master_tls_alignment = 0; 287 FlatPtr load_base_address = 0; 288 size_t stack_size = 0; 289 290 auto elf_name = TRY(object_description.pseudo_path()); 291 VERIFY(!Processor::in_critical()); 292 293 Memory::MemoryManager::enter_address_space(*new_space); 294 295 auto load_tls_section = [&](auto& program_header) -> ErrorOr<void> { 296 VERIFY(should_allocate_tls == ShouldAllocateTls::Yes); 297 VERIFY(program_header.size_in_memory()); 298 299 if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { 300 dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable."); 301 return ENOEXEC; 302 } 303 304 auto region_name = TRY(KString::formatted("{} (master-tls)", elf_name)); 305 master_tls_region = TRY(new_space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, program_header.size_in_memory(), PAGE_SIZE, region_name->view(), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve)); 306 master_tls_size = program_header.size_in_memory(); 307 master_tls_alignment = program_header.alignment(); 308 309 TRY(copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image())); 310 return {}; 311 }; 312 313 auto load_writable_section = [&](auto& program_header) -> ErrorOr<void> { 314 // Writable section: create a copy in memory. 315 VERIFY(program_header.alignment() % PAGE_SIZE == 0); 316 317 if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { 318 dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable."); 319 return ENOEXEC; 320 } 321 322 int prot = 0; 323 if (program_header.is_readable()) 324 prot |= PROT_READ; 325 if (program_header.is_writable()) 326 prot |= PROT_WRITE; 327 auto region_name = TRY(KString::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "")); 328 329 auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) }; 330 size_t rounded_range_end = TRY(Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get())); 331 auto range_end = VirtualAddress { rounded_range_end }; 332 333 auto region = TRY(new_space->allocate_region(Memory::RandomizeVirtualAddress::Yes, range_base, range_end.get() - range_base.get(), PAGE_SIZE, region_name->view(), prot, AllocationStrategy::Reserve)); 334 335 // It's not always the case with PIE executables (and very well shouldn't be) that the 336 // virtual address in the program header matches the one we end up giving the process. 337 // In order to copy the data image correctly into memory, we need to copy the data starting at 338 // the right initial page offset into the pages allocated for the elf_alloc-XX section. 339 // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between 340 // the .text and .data PT_LOAD sections of the executable. 341 // Accessing it would definitely be a bug. 342 auto page_offset = program_header.vaddr(); 343 page_offset.mask(~PAGE_MASK); 344 TRY(copy_to_user((u8*)region->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image())); 345 return {}; 346 }; 347 348 auto load_section = [&](auto& program_header) -> ErrorOr<void> { 349 if (program_header.size_in_memory() == 0) 350 return {}; 351 352 if (program_header.is_writable()) 353 return load_writable_section(program_header); 354 355 // Non-writable section: map the executable itself in memory. 356 VERIFY(program_header.alignment() % PAGE_SIZE == 0); 357 int prot = 0; 358 if (program_header.is_readable()) 359 prot |= PROT_READ; 360 if (program_header.is_writable()) 361 prot |= PROT_WRITE; 362 if (program_header.is_executable()) 363 prot |= PROT_EXEC; 364 365 auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) }; 366 size_t rounded_range_end = TRY(Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get())); 367 auto range_end = VirtualAddress { rounded_range_end }; 368 auto region = TRY(new_space->allocate_region_with_vmobject(Memory::RandomizeVirtualAddress::Yes, range_base, range_end.get() - range_base.get(), program_header.alignment(), *vmobject, program_header.offset(), elf_name->view(), prot, true)); 369 370 if (should_allow_syscalls == ShouldAllowSyscalls::Yes) 371 region->set_syscall_region(true); 372 if (program_header.offset() == 0) 373 load_base_address = (FlatPtr)region->vaddr().as_ptr(); 374 return {}; 375 }; 376 377 auto load_elf_program_header = [&](auto& program_header) -> ErrorOr<void> { 378 if (program_header.type() == PT_TLS) 379 return load_tls_section(program_header); 380 381 if (program_header.type() == PT_LOAD) 382 return load_section(program_header); 383 384 if (program_header.type() == PT_GNU_STACK) { 385 stack_size = program_header.size_in_memory(); 386 } 387 388 // NOTE: We ignore other program header types. 389 return {}; 390 }; 391 392 TRY([&] { 393 ErrorOr<void> result; 394 elf_image.for_each_program_header([&](ELF::Image::ProgramHeader const& program_header) { 395 result = load_elf_program_header(program_header); 396 return result.is_error() ? IterationDecision::Break : IterationDecision::Continue; 397 }); 398 return result; 399 }()); 400 401 if (stack_size == 0) { 402 stack_size = Thread::default_userspace_stack_size; 403 } 404 405 if (!elf_image.entry().offset(load_offset).get()) { 406 dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset)); 407 return ENOEXEC; 408 } 409 410 auto* stack_region = TRY(new_space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, stack_size, PAGE_SIZE, "Stack (Main thread)"sv, PROT_READ | PROT_WRITE, AllocationStrategy::Reserve)); 411 stack_region->set_stack(true); 412 413 return LoadResult { 414 move(new_space), 415 load_base_address, 416 elf_image.entry().offset(load_offset).get(), 417 executable_size, 418 TRY(AK::try_make_weak_ptr_if_nonnull(master_tls_region)), 419 master_tls_size, 420 master_tls_alignment, 421 TRY(stack_region->try_make_weak_ptr()) 422 }; 423} 424 425ErrorOr<LoadResult> 426Process::load(NonnullRefPtr<OpenFileDescription> main_program_description, 427 RefPtr<OpenFileDescription> interpreter_description, const ElfW(Ehdr) & main_program_header) 428{ 429 auto new_space = TRY(Memory::AddressSpace::try_create(nullptr)); 430 431 ScopeGuard space_guard([&]() { 432 Memory::MemoryManager::enter_process_address_space(*this); 433 }); 434 435 auto load_offset = TRY(get_load_offset(main_program_header, main_program_description, interpreter_description)); 436 437 if (interpreter_description.is_null()) { 438 auto load_result = TRY(load_elf_object(move(new_space), main_program_description, load_offset, ShouldAllocateTls::Yes, ShouldAllowSyscalls::No)); 439 m_master_tls_region = load_result.tls_region; 440 m_master_tls_size = load_result.tls_size; 441 m_master_tls_alignment = load_result.tls_alignment; 442 return load_result; 443 } 444 445 auto interpreter_load_result = TRY(load_elf_object(move(new_space), *interpreter_description, load_offset, ShouldAllocateTls::No, ShouldAllowSyscalls::Yes)); 446 447 // TLS allocation will be done in userspace by the loader 448 VERIFY(!interpreter_load_result.tls_region); 449 VERIFY(!interpreter_load_result.tls_alignment); 450 VERIFY(!interpreter_load_result.tls_size); 451 452 return interpreter_load_result; 453} 454 455void Process::clear_signal_handlers_for_exec() 456{ 457 // Comments are as they are presented in the POSIX specification, but slightly out of order. 458 for (size_t signal = 0; signal < m_signal_action_data.size(); signal++) { 459 // Except for SIGCHLD, signals set to be ignored by the calling process image shall be set to be ignored by the new process image. 460 // If the SIGCHLD signal is set to be ignored by the calling process image, it is unspecified whether the SIGCHLD signal is set 461 // to be ignored or to the default action in the new process image. 462 if (signal != SIGCHLD && m_signal_action_data[signal].handler_or_sigaction.get() == reinterpret_cast<FlatPtr>(SIG_IGN)) { 463 m_signal_action_data[signal] = {}; 464 m_signal_action_data[signal].handler_or_sigaction.set(reinterpret_cast<FlatPtr>(SIG_IGN)); 465 continue; 466 } 467 468 // Signals set to the default action in the calling process image shall be set to the default action in the new process image. 469 // Signals set to be caught by the calling process image shall be set to the default action in the new process image. 470 m_signal_action_data[signal] = {}; 471 } 472} 473 474ErrorOr<void> Process::do_exec(NonnullRefPtr<OpenFileDescription> main_program_description, Vector<NonnullOwnPtr<KString>> arguments, Vector<NonnullOwnPtr<KString>> environment, 475 RefPtr<OpenFileDescription> interpreter_description, Thread*& new_main_thread, InterruptsState& previous_interrupts_state, const ElfW(Ehdr) & main_program_header) 476{ 477 VERIFY(is_user_process()); 478 VERIFY(!Processor::in_critical()); 479 auto main_program_metadata = main_program_description->metadata(); 480 // NOTE: Don't allow running SUID binaries at all if we are in a jail. 481 TRY(Process::current().jail().with([&](auto const& my_jail) -> ErrorOr<void> { 482 if (my_jail && (main_program_metadata.is_setuid() || main_program_metadata.is_setgid())) { 483 return Error::from_errno(EPERM); 484 } 485 return {}; 486 })); 487 488 // Although we *could* handle a pseudo_path here, trying to execute something that doesn't have 489 // a custody (e.g. BlockDevice or RandomDevice) is pretty suspicious anyway. 490 auto path = TRY(main_program_description->original_absolute_path()); 491 492 dbgln_if(EXEC_DEBUG, "do_exec: {}", path); 493 494 auto last_part = path->view().find_last_split_view('/'); 495 496 auto new_process_name = TRY(KString::try_create(last_part)); 497 auto new_main_thread_name = TRY(new_process_name->try_clone()); 498 499 auto load_result = TRY(load(main_program_description, interpreter_description, main_program_header)); 500 501 // NOTE: We don't need the interpreter executable description after this point. 502 // We destroy it here to prevent it from getting destroyed when we return from this function. 503 // That's important because when we're returning from this function, we're in a very delicate 504 // state where we can't block (e.g by trying to acquire a mutex in description teardown.) 505 bool has_interpreter = interpreter_description; 506 interpreter_description = nullptr; 507 508 auto* signal_trampoline_region = TRY(load_result.space->allocate_region_with_vmobject(Memory::RandomizeVirtualAddress::Yes, {}, PAGE_SIZE, PAGE_SIZE, g_signal_trampoline_region->vmobject(), 0, "Signal trampoline"sv, PROT_READ | PROT_EXEC, true)); 509 signal_trampoline_region->set_syscall_region(true); 510 511 // (For dynamically linked executable) Allocate an FD for passing the main executable to the dynamic loader. 512 Optional<ScopedDescriptionAllocation> main_program_fd_allocation; 513 if (has_interpreter) 514 main_program_fd_allocation = TRY(allocate_fd()); 515 516 auto old_credentials = this->credentials(); 517 auto new_credentials = old_credentials; 518 auto old_process_attached_jail = m_attached_jail.with([&](auto& jail) -> RefPtr<Jail> { return jail; }); 519 auto old_scoped_list = m_jail_process_list.with([&](auto& list) -> RefPtr<ProcessList> { return list; }); 520 521 bool executable_is_setid = false; 522 523 if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { 524 auto new_euid = old_credentials->euid(); 525 auto new_egid = old_credentials->egid(); 526 auto new_suid = old_credentials->suid(); 527 auto new_sgid = old_credentials->sgid(); 528 529 if (main_program_metadata.is_setuid()) { 530 executable_is_setid = true; 531 new_euid = main_program_metadata.uid; 532 new_suid = main_program_metadata.uid; 533 } 534 if (main_program_metadata.is_setgid()) { 535 executable_is_setid = true; 536 new_egid = main_program_metadata.gid; 537 new_sgid = main_program_metadata.gid; 538 } 539 540 if (executable_is_setid) { 541 new_credentials = TRY(Credentials::create( 542 old_credentials->uid(), 543 old_credentials->gid(), 544 new_euid, 545 new_egid, 546 new_suid, 547 new_sgid, 548 old_credentials->extra_gids(), 549 old_credentials->sid(), 550 old_credentials->pgid())); 551 } 552 } 553 554 // We commit to the new executable at this point. There is no turning back! 555 556 // Prevent other processes from attaching to us with ptrace while we're doing this. 557 MutexLocker ptrace_locker(ptrace_lock()); 558 559 // Disable profiling temporarily in case it's running on this process. 560 auto was_profiling = m_profiling; 561 TemporaryChange profiling_disabler(m_profiling, false); 562 563 kill_threads_except_self(); 564 565 with_mutable_protected_data([&](auto& protected_data) { 566 protected_data.credentials = move(new_credentials); 567 protected_data.dumpable = !executable_is_setid; 568 protected_data.executable_is_setid = executable_is_setid; 569 }); 570 571 // We make sure to enter the new address space before destroying the old one. 572 // This ensures that the process always has a valid page directory. 573 Memory::MemoryManager::enter_address_space(*load_result.space); 574 575 m_space.with([&](auto& space) { space = load_result.space.release_nonnull(); }); 576 577 m_executable.with([&](auto& executable) { executable = main_program_description->custody(); }); 578 m_arguments = move(arguments); 579 m_attached_jail.with([&](auto& jail) { 580 jail = old_process_attached_jail; 581 }); 582 583 m_jail_process_list.with([&](auto& list) { 584 list = old_scoped_list; 585 }); 586 587 m_environment = move(environment); 588 589 TRY(m_unveil_data.with([&](auto& unveil_data) -> ErrorOr<void> { 590 TRY(m_exec_unveil_data.with([&](auto& exec_unveil_data) -> ErrorOr<void> { 591 // Note: If we have exec unveil data being waiting to be dispatched 592 // to the current execve'd program, then we apply the unveil data and 593 // ensure it is locked in the new program. 594 if (exec_unveil_data.state == VeilState::Dropped) { 595 unveil_data.state = VeilState::LockedInherited; 596 exec_unveil_data.state = VeilState::None; 597 unveil_data.paths = TRY(exec_unveil_data.paths.deep_copy()); 598 } else { 599 unveil_data.state = VeilState::None; 600 exec_unveil_data.state = VeilState::None; 601 unveil_data.paths.clear(); 602 unveil_data.paths.set_metadata({ TRY(KString::try_create("/"sv)), UnveilAccess::None, false }); 603 } 604 exec_unveil_data.paths.clear(); 605 exec_unveil_data.paths.set_metadata({ TRY(KString::try_create("/"sv)), UnveilAccess::None, false }); 606 return {}; 607 })); 608 return {}; 609 })); 610 611 m_coredump_properties.for_each([](auto& property) { 612 property = {}; 613 }); 614 615 auto* current_thread = Thread::current(); 616 current_thread->reset_signals_for_exec(); 617 618 clear_signal_handlers_for_exec(); 619 620 clear_futex_queues_on_exec(); 621 622 m_fds.with_exclusive([&](auto& fds) { 623 fds.change_each([&](auto& file_description_metadata) { 624 if (file_description_metadata.is_valid() && file_description_metadata.flags() & FD_CLOEXEC) 625 file_description_metadata = {}; 626 }); 627 }); 628 629 if (main_program_fd_allocation.has_value()) { 630 main_program_description->set_readable(true); 631 m_fds.with_exclusive([&](auto& fds) { fds[main_program_fd_allocation->fd].set(move(main_program_description), FD_CLOEXEC); }); 632 } 633 634 new_main_thread = nullptr; 635 if (&current_thread->process() == this) { 636 new_main_thread = current_thread; 637 } else { 638 for_each_thread([&](auto& thread) { 639 new_main_thread = &thread; 640 return IterationDecision::Break; 641 }); 642 } 643 VERIFY(new_main_thread); 644 645 auto credentials = this->credentials(); 646 auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, credentials->uid(), credentials->euid(), credentials->gid(), credentials->egid(), path->view(), main_program_fd_allocation); 647 648 // FIXME: How much stack space does process startup need? 649 if (!validate_stack_size(m_arguments, m_environment, auxv)) 650 return E2BIG; 651 652 // NOTE: We create the new stack before disabling interrupts since it will zero-fault 653 // and we don't want to deal with faults after this point. 654 auto new_userspace_sp = TRY(make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), m_arguments, m_environment, move(auxv))); 655 656 set_name(move(new_process_name)); 657 new_main_thread->set_name(move(new_main_thread_name)); 658 659 if (wait_for_tracer_at_next_execve()) { 660 // Make sure we release the ptrace lock here or the tracer will block forever. 661 ptrace_locker.unlock(); 662 Thread::current()->send_urgent_signal_to_self(SIGSTOP); 663 } else { 664 // Unlock regardless before disabling interrupts. 665 // Ensure we always unlock after checking ptrace status to avoid TOCTOU ptrace issues 666 ptrace_locker.unlock(); 667 } 668 669 // We enter a critical section here because we don't want to get interrupted between do_exec() 670 // and Processor::assume_context() or the next context switch. 671 // If we used an InterruptDisabler that calls enable_interrupts() on exit, we might timer tick'd too soon in exec(). 672 Processor::enter_critical(); 673 previous_interrupts_state = processor_interrupts_state(); 674 Processor::disable_interrupts(); 675 676 // NOTE: Be careful to not trigger any page faults below! 677 678 with_mutable_protected_data([&](auto& protected_data) { 679 protected_data.promises = protected_data.execpromises.load(); 680 protected_data.has_promises = protected_data.has_execpromises.load(); 681 682 protected_data.execpromises = 0; 683 protected_data.has_execpromises = false; 684 685 protected_data.signal_trampoline = signal_trampoline_region->vaddr(); 686 687 // FIXME: PID/TID ISSUE 688 protected_data.pid = new_main_thread->tid().value(); 689 }); 690 691 auto tsr_result = new_main_thread->make_thread_specific_region({}); 692 if (tsr_result.is_error()) { 693 // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable. 694 VERIFY_NOT_REACHED(); 695 } 696 new_main_thread->reset_fpu_state(); 697 698 auto& regs = new_main_thread->m_regs; 699 address_space().with([&](auto& space) { 700 regs.set_exec_state(load_result.entry_eip, new_userspace_sp, *space); 701 }); 702 703 { 704 TemporaryChange profiling_disabler(m_profiling, was_profiling); 705 PerformanceManager::add_process_exec_event(*this); 706 } 707 708 u32 lock_count_to_restore; 709 [[maybe_unused]] auto rc = big_lock().force_unlock_exclusive_if_locked(lock_count_to_restore); 710 VERIFY_INTERRUPTS_DISABLED(); 711 VERIFY(Processor::in_critical()); 712 return {}; 713} 714 715static Array<ELF::AuxiliaryValue, auxiliary_vector_size> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, StringView executable_path, Optional<Process::ScopedDescriptionAllocation> const& main_program_fd_allocation) 716{ 717 return { { 718 // PHDR/EXECFD 719 // PH* 720 { ELF::AuxiliaryValue::PageSize, PAGE_SIZE }, 721 { ELF::AuxiliaryValue::BaseAddress, (void*)load_base }, 722 723 { ELF::AuxiliaryValue::Entry, (void*)entry_eip }, 724 // NOTELF 725 { ELF::AuxiliaryValue::Uid, (long)uid.value() }, 726 { ELF::AuxiliaryValue::EUid, (long)euid.value() }, 727 { ELF::AuxiliaryValue::Gid, (long)gid.value() }, 728 { ELF::AuxiliaryValue::EGid, (long)egid.value() }, 729 730 { ELF::AuxiliaryValue::Platform, Processor::platform_string() }, 731 // FIXME: This is platform specific 732#if ARCH(X86_64) 733 { ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() }, 734#elif ARCH(AARCH64) 735 { ELF::AuxiliaryValue::HwCap, (long)0 }, 736#else 737# error "Unknown architecture" 738#endif 739 740 { ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() }, 741 742 // FIXME: Also take into account things like extended filesystem permissions? That's what linux does... 743 { ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 }, 744 745 { ELF::AuxiliaryValue::Random, nullptr }, 746 747 { ELF::AuxiliaryValue::ExecFilename, executable_path }, 748 749 main_program_fd_allocation.has_value() ? ELF::AuxiliaryValue { ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd_allocation->fd } : ELF::AuxiliaryValue { ELF::AuxiliaryValue::Ignore, 0L }, 750 751 { ELF::AuxiliaryValue::Null, 0L }, 752 } }; 753} 754 755static ErrorOr<Vector<NonnullOwnPtr<KString>>> find_shebang_interpreter_for_executable(char const first_page[], size_t nread) 756{ 757 int word_start = 2; 758 size_t word_length = 0; 759 if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') { 760 Vector<NonnullOwnPtr<KString>> interpreter_words; 761 762 for (size_t i = 2; i < nread; ++i) { 763 if (first_page[i] == '\n') { 764 break; 765 } 766 767 if (first_page[i] != ' ') { 768 ++word_length; 769 } 770 771 if (first_page[i] == ' ') { 772 if (word_length > 0) { 773 auto word = TRY(KString::try_create(StringView { &first_page[word_start], word_length })); 774 TRY(interpreter_words.try_append(move(word))); 775 } 776 word_length = 0; 777 word_start = i + 1; 778 } 779 } 780 781 if (word_length > 0) { 782 auto word = TRY(KString::try_create(StringView { &first_page[word_start], word_length })); 783 TRY(interpreter_words.try_append(move(word))); 784 } 785 786 if (!interpreter_words.is_empty()) 787 return interpreter_words; 788 } 789 790 return ENOEXEC; 791} 792 793ErrorOr<RefPtr<OpenFileDescription>> Process::find_elf_interpreter_for_executable(StringView path, ElfW(Ehdr) const& main_executable_header, size_t main_executable_header_size, size_t file_size) 794{ 795 // Not using ErrorOr here because we'll want to do the same thing in userspace in the RTLD 796 StringBuilder interpreter_path_builder; 797 if (!TRY(ELF::validate_program_headers(main_executable_header, file_size, { &main_executable_header, main_executable_header_size }, &interpreter_path_builder))) { 798 dbgln("exec({}): File has invalid ELF Program headers", path); 799 return ENOEXEC; 800 } 801 auto interpreter_path = interpreter_path_builder.string_view(); 802 803 if (!interpreter_path.is_empty()) { 804 dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path); 805 auto interpreter_description = TRY(VirtualFileSystem::the().open(credentials(), interpreter_path, O_EXEC, 0, current_directory())); 806 auto interp_metadata = interpreter_description->metadata(); 807 808 VERIFY(interpreter_description->inode()); 809 810 // Validate the program interpreter as a valid elf binary. 811 // If your program interpreter is a #! file or something, it's time to stop playing games :) 812 if (interp_metadata.size < (int)sizeof(ElfW(Ehdr))) 813 return ENOEXEC; 814 815 char first_page[PAGE_SIZE] = {}; 816 auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page); 817 auto nread = TRY(interpreter_description->read(first_page_buffer, sizeof(first_page))); 818 819 if (nread < sizeof(ElfW(Ehdr))) 820 return ENOEXEC; 821 822 auto* elf_header = (ElfW(Ehdr)*)first_page; 823 if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) { 824 dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_path); 825 return ENOEXEC; 826 } 827 828 // Not using ErrorOr here because we'll want to do the same thing in userspace in the RTLD 829 StringBuilder interpreter_interpreter_path_builder; 830 if (!TRY(ELF::validate_program_headers(*elf_header, interp_metadata.size, { first_page, nread }, &interpreter_interpreter_path_builder))) { 831 dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_path); 832 return ENOEXEC; 833 } 834 auto interpreter_interpreter_path = interpreter_interpreter_path_builder.string_view(); 835 836 if (!interpreter_interpreter_path.is_empty()) { 837 dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_path, interpreter_interpreter_path); 838 return ELOOP; 839 } 840 841 return interpreter_description; 842 } 843 844 if (main_executable_header.e_type == ET_REL) { 845 // We can't exec an ET_REL, that's just an object file from the compiler 846 return ENOEXEC; 847 } 848 if (main_executable_header.e_type == ET_DYN) { 849 // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible 850 // for its own relocation (i.e. it's /usr/lib/Loader.so) 851 if (path != "/usr/lib/Loader.so") 852 dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path); 853 return nullptr; 854 } 855 856 // No interpreter, but, path refers to a valid elf image 857 return nullptr; 858} 859 860ErrorOr<void> Process::exec(NonnullOwnPtr<KString> path, Vector<NonnullOwnPtr<KString>> arguments, Vector<NonnullOwnPtr<KString>> environment, Thread*& new_main_thread, InterruptsState& previous_interrupts_state, int recursion_depth) 861{ 862 if (recursion_depth > 2) { 863 dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path); 864 return ELOOP; 865 } 866 867 // Open the file to check what kind of binary format it is 868 // Currently supported formats: 869 // - #! interpreted file 870 // - ELF32 871 // * ET_EXEC binary that just gets loaded 872 // * ET_DYN binary that requires a program interpreter 873 // 874 auto description = TRY(VirtualFileSystem::the().open(credentials(), path->view(), O_EXEC, 0, current_directory())); 875 auto metadata = description->metadata(); 876 877 if (!metadata.is_regular_file()) 878 return EACCES; 879 880 // Always gonna need at least 3 bytes. these are for #!X 881 if (metadata.size < 3) 882 return ENOEXEC; 883 884 VERIFY(description->inode()); 885 886 // Read the first page of the program into memory so we can validate the binfmt of it 887 char first_page[PAGE_SIZE]; 888 auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page); 889 auto nread = TRY(description->read(first_page_buffer, sizeof(first_page))); 890 891 // 1) #! interpreted file 892 auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread); 893 if (!shebang_result.is_error()) { 894 auto shebang_words = shebang_result.release_value(); 895 auto shebang_path = TRY(shebang_words.first()->try_clone()); 896 arguments[0] = move(path); 897 TRY(arguments.try_prepend(move(shebang_words))); 898 return exec(move(shebang_path), move(arguments), move(environment), new_main_thread, previous_interrupts_state, ++recursion_depth); 899 } 900 901 // #2) ELF32 for i386 902 903 if (nread < sizeof(ElfW(Ehdr))) 904 return ENOEXEC; 905 auto const* main_program_header = (ElfW(Ehdr)*)first_page; 906 907 if (!ELF::validate_elf_header(*main_program_header, metadata.size)) { 908 dbgln("exec({}): File has invalid ELF header", path); 909 return ENOEXEC; 910 } 911 912 auto interpreter_description = TRY(find_elf_interpreter_for_executable(path->view(), *main_program_header, nread, metadata.size)); 913 return do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, previous_interrupts_state, *main_program_header); 914} 915 916ErrorOr<FlatPtr> Process::sys$execve(Userspace<Syscall::SC_execve_params const*> user_params) 917{ 918 VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this); 919 TRY(require_promise(Pledge::exec)); 920 921 Thread* new_main_thread = nullptr; 922 InterruptsState previous_interrupts_state = InterruptsState::Enabled; 923 924 // NOTE: Be extremely careful with allocating any kernel memory in this function. 925 // On success, the kernel stack will be lost. 926 // The explicit block scope below is specifically placed to minimize the number 927 // of stack locals in this function. 928 { 929 auto params = TRY(copy_typed_from_user(user_params)); 930 931 if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX) 932 return E2BIG; 933 934 // NOTE: The caller is expected to always pass at least one argument by convention, 935 // the program path that was passed as params.path. 936 if (params.arguments.length == 0) 937 return EINVAL; 938 939 auto path = TRY(get_syscall_path_argument(params.path)); 940 941 auto copy_user_strings = [](auto const& list, auto& output) -> ErrorOr<void> { 942 if (!list.length) 943 return {}; 944 Checked<size_t> size = sizeof(*list.strings); 945 size *= list.length; 946 if (size.has_overflow()) 947 return EOVERFLOW; 948 Vector<Syscall::StringArgument, 32> strings; 949 TRY(strings.try_resize(list.length)); 950 TRY(copy_from_user(strings.data(), list.strings, size.value())); 951 for (size_t i = 0; i < list.length; ++i) { 952 auto string = TRY(try_copy_kstring_from_user(strings[i])); 953 TRY(output.try_append(move(string))); 954 } 955 return {}; 956 }; 957 958 Vector<NonnullOwnPtr<KString>> arguments; 959 TRY(copy_user_strings(params.arguments, arguments)); 960 961 Vector<NonnullOwnPtr<KString>> environment; 962 TRY(copy_user_strings(params.environment, environment)); 963 964 TRY(exec(move(path), move(arguments), move(environment), new_main_thread, previous_interrupts_state)); 965 } 966 967 // NOTE: If we're here, the exec has succeeded and we've got a new executable image! 968 // We will not return normally from this function. Instead, the next time we 969 // get scheduled, it'll be at the entry point of the new executable. 970 971 VERIFY_INTERRUPTS_DISABLED(); 972 VERIFY(Processor::in_critical()); 973 974 auto* current_thread = Thread::current(); 975 if (current_thread == new_main_thread) { 976 // We need to enter the scheduler lock before changing the state 977 // and it will be released after the context switch into that 978 // thread. We should also still be in our critical section 979 VERIFY(!g_scheduler_lock.is_locked_by_current_processor()); 980 VERIFY(Processor::in_critical() == 1); 981 g_scheduler_lock.lock(); 982 current_thread->set_state(Thread::State::Running); 983#if ARCH(X86_64) 984 FlatPtr prev_flags = previous_interrupts_state == InterruptsState::Enabled ? 0x200 : 0; 985 Processor::assume_context(*current_thread, prev_flags); 986 VERIFY_NOT_REACHED(); 987#elif ARCH(AARCH64) 988 TODO_AARCH64(); 989#else 990# error Unknown architecture 991#endif 992 } 993 994 // NOTE: This code path is taken in the non-syscall case, i.e when the kernel spawns 995 // a userspace process directly (such as /bin/SystemServer on startup) 996 997 restore_processor_interrupts_state(previous_interrupts_state); 998 Processor::leave_critical(); 999 return 0; 1000} 1001 1002}