Serenity Operating System
at master 562 lines 21 kB view raw
1/* 2 * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/StringView.h> 8#include <Kernel/Arch/PageDirectory.h> 9#include <Kernel/Arch/PageFault.h> 10#include <Kernel/Debug.h> 11#include <Kernel/FileSystem/Inode.h> 12#include <Kernel/InterruptDisabler.h> 13#include <Kernel/Memory/AnonymousVMObject.h> 14#include <Kernel/Memory/MemoryManager.h> 15#include <Kernel/Memory/Region.h> 16#include <Kernel/Memory/SharedInodeVMObject.h> 17#include <Kernel/Panic.h> 18#include <Kernel/Process.h> 19#include <Kernel/Scheduler.h> 20#include <Kernel/Thread.h> 21 22namespace Kernel::Memory { 23 24Region::Region() 25 : m_range(VirtualRange({}, 0)) 26{ 27} 28 29Region::Region(NonnullLockRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable, bool shared) 30 : m_range(VirtualRange({}, 0)) 31 , m_offset_in_vmobject(offset_in_vmobject) 32 , m_vmobject(move(vmobject)) 33 , m_name(move(name)) 34 , m_access(access | ((access & 0x7) << 4)) 35 , m_shared(shared) 36 , m_cacheable(cacheable == Cacheable::Yes) 37{ 38 m_vmobject->add_region(*this); 39} 40 41Region::Region(VirtualRange const& range, NonnullLockRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable, bool shared) 42 : m_range(range) 43 , m_offset_in_vmobject(offset_in_vmobject) 44 , m_vmobject(move(vmobject)) 45 , m_name(move(name)) 46 , m_access(access | ((access & 0x7) << 4)) 47 , m_shared(shared) 48 , m_cacheable(cacheable == Cacheable::Yes) 49{ 50 VERIFY(m_range.base().is_page_aligned()); 51 VERIFY(m_range.size()); 52 VERIFY((m_range.size() % PAGE_SIZE) == 0); 53 54 m_vmobject->add_region(*this); 55} 56 57Region::~Region() 58{ 59 if (is_writable() && vmobject().is_shared_inode()) { 60 // FIXME: This is very aggressive. Find a way to do less work! 61 (void)static_cast<SharedInodeVMObject&>(vmobject()).sync(); 62 } 63 64 m_vmobject->remove_region(*this); 65 66 if (m_page_directory) { 67 SpinlockLocker pd_locker(m_page_directory->get_lock()); 68 if (!is_readable() && !is_writable() && !is_executable()) { 69 // If the region is "PROT_NONE", we didn't map it in the first place. 70 } else { 71 unmap_with_locks_held(ShouldFlushTLB::Yes, pd_locker); 72 VERIFY(!m_page_directory); 73 } 74 } 75 76 if (is_kernel()) 77 MM.unregister_kernel_region(*this); 78} 79 80ErrorOr<NonnullOwnPtr<Region>> Region::create_unbacked() 81{ 82 return adopt_nonnull_own_or_enomem(new (nothrow) Region); 83} 84 85ErrorOr<NonnullOwnPtr<Region>> Region::create_unplaced(NonnullLockRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable, bool shared) 86{ 87 return adopt_nonnull_own_or_enomem(new (nothrow) Region(move(vmobject), offset_in_vmobject, move(name), access, cacheable, shared)); 88} 89 90ErrorOr<NonnullOwnPtr<Region>> Region::try_clone() 91{ 92 VERIFY(Process::has_current()); 93 94 if (m_shared) { 95 VERIFY(!m_stack); 96 if (vmobject().is_inode()) 97 VERIFY(vmobject().is_shared_inode()); 98 99 // Create a new region backed by the same VMObject. 100 101 OwnPtr<KString> region_name; 102 if (m_name) 103 region_name = TRY(m_name->try_clone()); 104 105 auto region = TRY(Region::try_create_user_accessible( 106 m_range, vmobject(), m_offset_in_vmobject, move(region_name), access(), m_cacheable ? Cacheable::Yes : Cacheable::No, m_shared)); 107 region->set_mmap(m_mmap, m_mmapped_from_readable, m_mmapped_from_writable); 108 region->set_shared(m_shared); 109 region->set_syscall_region(is_syscall_region()); 110 return region; 111 } 112 113 if (vmobject().is_inode()) 114 VERIFY(vmobject().is_private_inode()); 115 116 auto vmobject_clone = TRY(vmobject().try_clone()); 117 118 // Set up a COW region. The parent (this) region becomes COW as well! 119 if (is_writable()) 120 remap(); 121 122 OwnPtr<KString> clone_region_name; 123 if (m_name) 124 clone_region_name = TRY(m_name->try_clone()); 125 126 auto clone_region = TRY(Region::try_create_user_accessible( 127 m_range, move(vmobject_clone), m_offset_in_vmobject, move(clone_region_name), access(), m_cacheable ? Cacheable::Yes : Cacheable::No, m_shared)); 128 129 if (m_stack) { 130 VERIFY(vmobject().is_anonymous()); 131 clone_region->set_stack(true); 132 } 133 clone_region->set_syscall_region(is_syscall_region()); 134 clone_region->set_mmap(m_mmap, m_mmapped_from_readable, m_mmapped_from_writable); 135 return clone_region; 136} 137 138void Region::set_vmobject(NonnullLockRefPtr<VMObject>&& obj) 139{ 140 if (m_vmobject.ptr() == obj.ptr()) 141 return; 142 m_vmobject->remove_region(*this); 143 m_vmobject = move(obj); 144 m_vmobject->add_region(*this); 145} 146 147size_t Region::cow_pages() const 148{ 149 if (!vmobject().is_anonymous()) 150 return 0; 151 return static_cast<AnonymousVMObject const&>(vmobject()).cow_pages(); 152} 153 154size_t Region::amount_dirty() const 155{ 156 if (!vmobject().is_inode()) 157 return amount_resident(); 158 return static_cast<InodeVMObject const&>(vmobject()).amount_dirty(); 159} 160 161size_t Region::amount_resident() const 162{ 163 size_t bytes = 0; 164 for (size_t i = 0; i < page_count(); ++i) { 165 auto page = physical_page(i); 166 if (page && !page->is_shared_zero_page() && !page->is_lazy_committed_page()) 167 bytes += PAGE_SIZE; 168 } 169 return bytes; 170} 171 172size_t Region::amount_shared() const 173{ 174 size_t bytes = 0; 175 for (size_t i = 0; i < page_count(); ++i) { 176 auto page = physical_page(i); 177 if (page && page->ref_count() > 1 && !page->is_shared_zero_page() && !page->is_lazy_committed_page()) 178 bytes += PAGE_SIZE; 179 } 180 return bytes; 181} 182 183ErrorOr<NonnullOwnPtr<Region>> Region::try_create_user_accessible(VirtualRange const& range, NonnullLockRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable, bool shared) 184{ 185 return adopt_nonnull_own_or_enomem(new (nothrow) Region(range, move(vmobject), offset_in_vmobject, move(name), access, cacheable, shared)); 186} 187 188bool Region::should_cow(size_t page_index) const 189{ 190 if (!vmobject().is_anonymous()) 191 return false; 192 return static_cast<AnonymousVMObject const&>(vmobject()).should_cow(first_page_index() + page_index, m_shared); 193} 194 195ErrorOr<void> Region::set_should_cow(size_t page_index, bool cow) 196{ 197 VERIFY(!m_shared); 198 if (vmobject().is_anonymous()) 199 TRY(static_cast<AnonymousVMObject&>(vmobject()).set_should_cow(first_page_index() + page_index, cow)); 200 return {}; 201} 202 203bool Region::map_individual_page_impl(size_t page_index, RefPtr<PhysicalPage> page) 204{ 205 VERIFY(m_page_directory->get_lock().is_locked_by_current_processor()); 206 207 auto page_vaddr = vaddr_from_page_index(page_index); 208 209 bool user_allowed = page_vaddr.get() >= USER_RANGE_BASE && is_user_address(page_vaddr); 210 if (is_mmap() && !user_allowed) { 211 PANIC("About to map mmap'ed page at a kernel address"); 212 } 213 214 auto* pte = MM.ensure_pte(*m_page_directory, page_vaddr); 215 if (!pte) 216 return false; 217 218 if (!page || (!is_readable() && !is_writable())) { 219 pte->clear(); 220 return true; 221 } 222 223 pte->set_cache_disabled(!m_cacheable); 224 pte->set_physical_page_base(page->paddr().get()); 225 pte->set_present(true); 226 if (page->is_shared_zero_page() || page->is_lazy_committed_page() || should_cow(page_index)) 227 pte->set_writable(false); 228 else 229 pte->set_writable(is_writable()); 230 if (Processor::current().has_nx()) 231 pte->set_execute_disabled(!is_executable()); 232 if (Processor::current().has_pat()) 233 pte->set_pat(is_write_combine()); 234 pte->set_user_allowed(user_allowed); 235 236 return true; 237} 238 239bool Region::map_individual_page_impl(size_t page_index) 240{ 241 RefPtr<PhysicalPage> page; 242 { 243 SpinlockLocker vmobject_locker(vmobject().m_lock); 244 page = physical_page(page_index); 245 } 246 247 return map_individual_page_impl(page_index, page); 248} 249 250bool Region::remap_vmobject_page(size_t page_index, NonnullRefPtr<PhysicalPage> physical_page) 251{ 252 SpinlockLocker page_lock(m_page_directory->get_lock()); 253 254 // NOTE: `page_index` is a VMObject page index, so first we convert it to a Region page index. 255 if (!translate_vmobject_page(page_index)) 256 return false; 257 258 bool success = map_individual_page_impl(page_index, physical_page); 259 MemoryManager::flush_tlb(m_page_directory, vaddr_from_page_index(page_index)); 260 return success; 261} 262 263void Region::unmap(ShouldFlushTLB should_flush_tlb) 264{ 265 if (!m_page_directory) 266 return; 267 SpinlockLocker pd_locker(m_page_directory->get_lock()); 268 unmap_with_locks_held(should_flush_tlb, pd_locker); 269} 270 271void Region::unmap_with_locks_held(ShouldFlushTLB should_flush_tlb, SpinlockLocker<RecursiveSpinlock<LockRank::None>>&) 272{ 273 if (!m_page_directory) 274 return; 275 size_t count = page_count(); 276 for (size_t i = 0; i < count; ++i) { 277 auto vaddr = vaddr_from_page_index(i); 278 MM.release_pte(*m_page_directory, vaddr, i == count - 1 ? MemoryManager::IsLastPTERelease::Yes : MemoryManager::IsLastPTERelease::No); 279 } 280 if (should_flush_tlb == ShouldFlushTLB::Yes) 281 MemoryManager::flush_tlb(m_page_directory, vaddr(), page_count()); 282 m_page_directory = nullptr; 283} 284 285void Region::set_page_directory(PageDirectory& page_directory) 286{ 287 VERIFY(!m_page_directory || m_page_directory == &page_directory); 288 m_page_directory = page_directory; 289} 290 291ErrorOr<void> Region::map(PageDirectory& page_directory, ShouldFlushTLB should_flush_tlb) 292{ 293 SpinlockLocker page_lock(page_directory.get_lock()); 294 295 // FIXME: Find a better place for this sanity check(?) 296 if (is_user() && !is_shared()) { 297 VERIFY(!vmobject().is_shared_inode()); 298 } 299 300 set_page_directory(page_directory); 301 size_t page_index = 0; 302 while (page_index < page_count()) { 303 if (!map_individual_page_impl(page_index)) 304 break; 305 ++page_index; 306 } 307 if (page_index > 0) { 308 if (should_flush_tlb == ShouldFlushTLB::Yes) 309 MemoryManager::flush_tlb(m_page_directory, vaddr(), page_index); 310 if (page_index == page_count()) 311 return {}; 312 } 313 return ENOMEM; 314} 315 316void Region::remap() 317{ 318 VERIFY(m_page_directory); 319 auto result = map(*m_page_directory); 320 if (result.is_error()) 321 TODO(); 322} 323 324ErrorOr<void> Region::set_write_combine(bool enable) 325{ 326 if (enable && !Processor::current().has_pat()) { 327 dbgln("PAT is not supported, implement MTRR fallback if available"); 328 return Error::from_errno(ENOTSUP); 329 } 330 331 m_write_combine = enable; 332 remap(); 333 return {}; 334} 335 336void Region::clear_to_zero() 337{ 338 VERIFY(vmobject().is_anonymous()); 339 SpinlockLocker locker(vmobject().m_lock); 340 for (auto i = 0u; i < page_count(); ++i) { 341 auto& page = physical_page_slot(i); 342 VERIFY(page); 343 if (page->is_shared_zero_page()) 344 continue; 345 page = MM.shared_zero_page(); 346 } 347} 348 349PageFaultResponse Region::handle_fault(PageFault const& fault) 350{ 351 auto page_index_in_region = page_index_from_address(fault.vaddr()); 352 if (fault.type() == PageFault::Type::PageNotPresent) { 353 if (fault.is_read() && !is_readable()) { 354 dbgln("NP(non-readable) fault in Region({})[{}]", this, page_index_in_region); 355 return PageFaultResponse::ShouldCrash; 356 } 357 if (fault.is_write() && !is_writable()) { 358 dbgln("NP(non-writable) write fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr()); 359 return PageFaultResponse::ShouldCrash; 360 } 361 if (vmobject().is_inode()) { 362 dbgln_if(PAGE_FAULT_DEBUG, "NP(inode) fault in Region({})[{}]", this, page_index_in_region); 363 return handle_inode_fault(page_index_in_region); 364 } 365 366 SpinlockLocker vmobject_locker(vmobject().m_lock); 367 auto& page_slot = physical_page_slot(page_index_in_region); 368 if (page_slot->is_lazy_committed_page()) { 369 auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region); 370 VERIFY(m_vmobject->is_anonymous()); 371 page_slot = static_cast<AnonymousVMObject&>(*m_vmobject).allocate_committed_page({}); 372 if (!remap_vmobject_page(page_index_in_vmobject, *page_slot)) 373 return PageFaultResponse::OutOfMemory; 374 return PageFaultResponse::Continue; 375 } 376 dbgln("BUG! Unexpected NP fault at {}", fault.vaddr()); 377 dbgln(" - Physical page slot pointer: {:p}", page_slot.ptr()); 378 if (page_slot) { 379 dbgln(" - Physical page: {}", page_slot->paddr()); 380 dbgln(" - Lazy committed: {}", page_slot->is_lazy_committed_page()); 381 dbgln(" - Shared zero: {}", page_slot->is_shared_zero_page()); 382 } 383 return PageFaultResponse::ShouldCrash; 384 } 385 VERIFY(fault.type() == PageFault::Type::ProtectionViolation); 386 if (fault.access() == PageFault::Access::Write && is_writable() && should_cow(page_index_in_region)) { 387 dbgln_if(PAGE_FAULT_DEBUG, "PV(cow) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr()); 388 auto phys_page = physical_page(page_index_in_region); 389 if (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page()) { 390 dbgln_if(PAGE_FAULT_DEBUG, "NP(zero) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr()); 391 return handle_zero_fault(page_index_in_region, *phys_page); 392 } 393 return handle_cow_fault(page_index_in_region); 394 } 395 dbgln("PV(error) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr()); 396 return PageFaultResponse::ShouldCrash; 397} 398 399PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region, PhysicalPage& page_in_slot_at_time_of_fault) 400{ 401 VERIFY(vmobject().is_anonymous()); 402 403 auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region); 404 405 auto current_thread = Thread::current(); 406 if (current_thread != nullptr) 407 current_thread->did_zero_fault(); 408 409 RefPtr<PhysicalPage> new_physical_page; 410 411 if (page_in_slot_at_time_of_fault.is_lazy_committed_page()) { 412 VERIFY(m_vmobject->is_anonymous()); 413 new_physical_page = static_cast<AnonymousVMObject&>(*m_vmobject).allocate_committed_page({}); 414 dbgln_if(PAGE_FAULT_DEBUG, " >> ALLOCATED COMMITTED {}", new_physical_page->paddr()); 415 } else { 416 auto page_or_error = MM.allocate_physical_page(MemoryManager::ShouldZeroFill::Yes); 417 if (page_or_error.is_error()) { 418 dmesgln("MM: handle_zero_fault was unable to allocate a physical page"); 419 return PageFaultResponse::OutOfMemory; 420 } 421 new_physical_page = page_or_error.release_value(); 422 dbgln_if(PAGE_FAULT_DEBUG, " >> ALLOCATED {}", new_physical_page->paddr()); 423 } 424 425 bool already_handled = false; 426 427 { 428 SpinlockLocker locker(vmobject().m_lock); 429 auto& page_slot = physical_page_slot(page_index_in_region); 430 already_handled = !page_slot.is_null() && !page_slot->is_shared_zero_page() && !page_slot->is_lazy_committed_page(); 431 if (already_handled) { 432 // Someone else already faulted in a new page in this slot. That's fine, we'll just remap with their page. 433 new_physical_page = page_slot; 434 } else { 435 // Install the newly allocated page into the VMObject. 436 page_slot = new_physical_page; 437 } 438 } 439 440 if (!remap_vmobject_page(page_index_in_vmobject, *new_physical_page)) { 441 dmesgln("MM: handle_zero_fault was unable to allocate a page table to map {}", new_physical_page); 442 return PageFaultResponse::OutOfMemory; 443 } 444 return PageFaultResponse::Continue; 445} 446 447PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region) 448{ 449 auto current_thread = Thread::current(); 450 if (current_thread) 451 current_thread->did_cow_fault(); 452 453 if (!vmobject().is_anonymous()) 454 return PageFaultResponse::ShouldCrash; 455 456 auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region); 457 auto response = reinterpret_cast<AnonymousVMObject&>(vmobject()).handle_cow_fault(page_index_in_vmobject, vaddr().offset(page_index_in_region * PAGE_SIZE)); 458 if (!remap_vmobject_page(page_index_in_vmobject, *vmobject().physical_pages()[page_index_in_vmobject])) 459 return PageFaultResponse::OutOfMemory; 460 return response; 461} 462 463PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region) 464{ 465 VERIFY(vmobject().is_inode()); 466 VERIFY(!g_scheduler_lock.is_locked_by_current_processor()); 467 468 auto& inode_vmobject = static_cast<InodeVMObject&>(vmobject()); 469 470 auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region); 471 auto& vmobject_physical_page_slot = inode_vmobject.physical_pages()[page_index_in_vmobject]; 472 473 { 474 // NOTE: The VMObject lock is required when manipulating the VMObject's physical page slot. 475 SpinlockLocker locker(inode_vmobject.m_lock); 476 if (!vmobject_physical_page_slot.is_null()) { 477 dbgln_if(PAGE_FAULT_DEBUG, "handle_inode_fault: Page faulted in by someone else before reading, remapping."); 478 if (!remap_vmobject_page(page_index_in_vmobject, *vmobject_physical_page_slot)) 479 return PageFaultResponse::OutOfMemory; 480 return PageFaultResponse::Continue; 481 } 482 } 483 484 dbgln_if(PAGE_FAULT_DEBUG, "Inode fault in {} page index: {}", name(), page_index_in_region); 485 486 auto current_thread = Thread::current(); 487 if (current_thread) 488 current_thread->did_inode_fault(); 489 490 u8 page_buffer[PAGE_SIZE]; 491 auto& inode = inode_vmobject.inode(); 492 493 auto buffer = UserOrKernelBuffer::for_kernel_buffer(page_buffer); 494 auto result = inode.read_bytes(page_index_in_vmobject * PAGE_SIZE, PAGE_SIZE, buffer, nullptr); 495 496 if (result.is_error()) { 497 dmesgln("handle_inode_fault: Error ({}) while reading from inode", result.error()); 498 return PageFaultResponse::ShouldCrash; 499 } 500 501 auto nread = result.value(); 502 // Note: If we received 0, it means we are at the end of file or after it, 503 // which means we should return bus error. 504 if (nread == 0) 505 return PageFaultResponse::BusError; 506 507 if (nread < PAGE_SIZE) { 508 // If we read less than a page, zero out the rest to avoid leaking uninitialized data. 509 memset(page_buffer + nread, 0, PAGE_SIZE - nread); 510 } 511 512 // Allocate a new physical page, and copy the read inode contents into it. 513 auto new_physical_page_or_error = MM.allocate_physical_page(MemoryManager::ShouldZeroFill::No); 514 if (new_physical_page_or_error.is_error()) { 515 dmesgln("MM: handle_inode_fault was unable to allocate a physical page"); 516 return PageFaultResponse::OutOfMemory; 517 } 518 auto new_physical_page = new_physical_page_or_error.release_value(); 519 { 520 InterruptDisabler disabler; 521 u8* dest_ptr = MM.quickmap_page(*new_physical_page); 522 memcpy(dest_ptr, page_buffer, PAGE_SIZE); 523 MM.unquickmap_page(); 524 } 525 526 { 527 // NOTE: The VMObject lock is required when manipulating the VMObject's physical page slot. 528 SpinlockLocker locker(inode_vmobject.m_lock); 529 530 if (!vmobject_physical_page_slot.is_null()) { 531 // Someone else faulted in this page while we were reading from the inode. 532 // No harm done (other than some duplicate work), remap the page here and return. 533 dbgln_if(PAGE_FAULT_DEBUG, "handle_inode_fault: Page faulted in by someone else, remapping."); 534 if (!remap_vmobject_page(page_index_in_vmobject, *vmobject_physical_page_slot)) 535 return PageFaultResponse::OutOfMemory; 536 return PageFaultResponse::Continue; 537 } 538 539 vmobject_physical_page_slot = new_physical_page; 540 } 541 542 if (!remap_vmobject_page(page_index_in_vmobject, *vmobject_physical_page_slot)) 543 return PageFaultResponse::OutOfMemory; 544 545 return PageFaultResponse::Continue; 546} 547 548RefPtr<PhysicalPage> Region::physical_page(size_t index) const 549{ 550 SpinlockLocker vmobject_locker(vmobject().m_lock); 551 VERIFY(index < page_count()); 552 return vmobject().physical_pages()[first_page_index() + index]; 553} 554 555RefPtr<PhysicalPage>& Region::physical_page_slot(size_t index) 556{ 557 VERIFY(vmobject().m_lock.is_locked_by_current_processor()); 558 VERIFY(index < page_count()); 559 return vmobject().physical_pages()[first_page_index() + index]; 560} 561 562}