Serenity Operating System
at portability 508 lines 18 kB view raw
1/* 2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <Kernel/FileSystem/Inode.h> 28#include <Kernel/Process.h> 29#include <Kernel/Thread.h> 30#include <Kernel/VM/AnonymousVMObject.h> 31#include <Kernel/VM/InodeVMObject.h> 32#include <Kernel/VM/MemoryManager.h> 33#include <Kernel/VM/PageDirectory.h> 34#include <Kernel/VM/Region.h> 35 36//#define MM_DEBUG 37//#define PAGE_FAULT_DEBUG 38 39namespace Kernel { 40 41Region::Region(const Range& range, const String& name, u8 access, bool cacheable) 42 : m_range(range) 43 , m_vmobject(AnonymousVMObject::create_with_size(size())) 44 , m_name(name) 45 , m_access(access) 46 , m_cacheable(cacheable) 47{ 48 MM.register_region(*this); 49} 50 51Region::Region(const Range& range, NonnullRefPtr<Inode> inode, const String& name, u8 access, bool cacheable) 52 : m_range(range) 53 , m_vmobject(InodeVMObject::create_with_inode(*inode)) 54 , m_name(name) 55 , m_access(access) 56 , m_cacheable(cacheable) 57{ 58 MM.register_region(*this); 59} 60 61Region::Region(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, u8 access, bool cacheable) 62 : m_range(range) 63 , m_offset_in_vmobject(offset_in_vmobject) 64 , m_vmobject(move(vmobject)) 65 , m_name(name) 66 , m_access(access) 67 , m_cacheable(cacheable) 68{ 69 MM.register_region(*this); 70} 71 72Region::~Region() 73{ 74 // Make sure we disable interrupts so we don't get interrupted between unmapping and unregistering. 75 // Unmapping the region will give the VM back to the RangeAllocator, so an interrupt handler would 76 // find the address<->region mappings in an invalid state there. 77 InterruptDisabler disabler; 78 if (m_page_directory) { 79 unmap(ShouldDeallocateVirtualMemoryRange::Yes); 80 ASSERT(!m_page_directory); 81 } 82 MM.unregister_region(*this); 83} 84 85NonnullOwnPtr<Region> Region::clone() 86{ 87 ASSERT(Process::current); 88 89 // FIXME: What should we do for privately mapped InodeVMObjects? 90 if (m_shared || vmobject().is_inode()) { 91 ASSERT(!m_stack); 92#ifdef MM_DEBUG 93 dbgprintf("%s<%u> Region::clone(): sharing %s (V%p)\n", 94 Process::current->name().characters(), 95 Process::current->pid(), 96 m_name.characters(), 97 vaddr().get()); 98#endif 99 // Create a new region backed by the same VMObject. 100 auto region = Region::create_user_accessible(m_range, m_vmobject, m_offset_in_vmobject, m_name, m_access); 101 region->set_mmap(m_mmap); 102 region->set_shared(m_shared); 103 return region; 104 } 105 106#ifdef MM_DEBUG 107 dbgprintf("%s<%u> Region::clone(): cowing %s (V%p)\n", 108 Process::current->name().characters(), 109 Process::current->pid(), 110 m_name.characters(), 111 vaddr().get()); 112#endif 113 // Set up a COW region. The parent (this) region becomes COW as well! 114 ensure_cow_map().fill(true); 115 remap(); 116 auto clone_region = Region::create_user_accessible(m_range, m_vmobject->clone(), m_offset_in_vmobject, m_name, m_access); 117 clone_region->ensure_cow_map(); 118 if (m_stack) { 119 ASSERT(is_readable()); 120 ASSERT(is_writable()); 121 ASSERT(!is_shared()); 122 ASSERT(vmobject().is_anonymous()); 123 clone_region->set_stack(true); 124 } 125 clone_region->set_mmap(m_mmap); 126 return clone_region; 127} 128 129bool Region::commit() 130{ 131 InterruptDisabler disabler; 132#ifdef MM_DEBUG 133 dbgprintf("MM: commit %u pages in Region %p (VMO=%p) at V%p\n", vmobject().page_count(), this, &vmobject(), vaddr().get()); 134#endif 135 for (size_t i = 0; i < page_count(); ++i) { 136 if (!commit(i)) 137 return false; 138 } 139 return true; 140} 141 142bool Region::commit(size_t page_index) 143{ 144 ASSERT(vmobject().is_anonymous() || vmobject().is_purgeable()); 145 InterruptDisabler disabler; 146#ifdef MM_DEBUG 147 dbgprintf("MM: commit single page (%zu) in Region %p (VMO=%p) at V%p\n", page_index, vmobject().page_count(), this, &vmobject(), vaddr().get()); 148#endif 149 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index]; 150 if (!vmobject_physical_page_entry.is_null() && !vmobject_physical_page_entry->is_shared_zero_page()) 151 return true; 152 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes); 153 if (!physical_page) { 154 kprintf("MM: commit was unable to allocate a physical page\n"); 155 return false; 156 } 157 vmobject_physical_page_entry = move(physical_page); 158 remap_page(page_index); 159 return true; 160} 161 162u32 Region::cow_pages() const 163{ 164 if (!m_cow_map) 165 return 0; 166 u32 count = 0; 167 for (size_t i = 0; i < m_cow_map->size(); ++i) 168 count += m_cow_map->get(i); 169 return count; 170} 171 172size_t Region::amount_dirty() const 173{ 174 if (!vmobject().is_inode()) 175 return amount_resident(); 176 return static_cast<const InodeVMObject&>(vmobject()).amount_dirty(); 177} 178 179size_t Region::amount_resident() const 180{ 181 size_t bytes = 0; 182 for (size_t i = 0; i < page_count(); ++i) { 183 auto& physical_page = m_vmobject->physical_pages()[first_page_index() + i]; 184 if (physical_page && !physical_page->is_shared_zero_page()) 185 bytes += PAGE_SIZE; 186 } 187 return bytes; 188} 189 190size_t Region::amount_shared() const 191{ 192 size_t bytes = 0; 193 for (size_t i = 0; i < page_count(); ++i) { 194 auto& physical_page = m_vmobject->physical_pages()[first_page_index() + i]; 195 if (physical_page && physical_page->ref_count() > 1 && !physical_page->is_shared_zero_page()) 196 bytes += PAGE_SIZE; 197 } 198 return bytes; 199} 200 201NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, const StringView& name, u8 access, bool cacheable) 202{ 203 auto region = make<Region>(range, name, access, cacheable); 204 region->m_user_accessible = true; 205 return region; 206} 207 208NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const StringView& name, u8 access, bool cacheable) 209{ 210 auto region = make<Region>(range, move(vmobject), offset_in_vmobject, name, access, cacheable); 211 region->m_user_accessible = true; 212 return region; 213} 214 215NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, NonnullRefPtr<Inode> inode, const StringView& name, u8 access, bool cacheable) 216{ 217 auto region = make<Region>(range, move(inode), name, access, cacheable); 218 region->m_user_accessible = true; 219 return region; 220} 221 222NonnullOwnPtr<Region> Region::create_kernel_only(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const StringView& name, u8 access, bool cacheable) 223{ 224 auto region = make<Region>(range, move(vmobject), offset_in_vmobject, name, access, cacheable); 225 region->m_user_accessible = false; 226 return region; 227} 228 229NonnullOwnPtr<Region> Region::create_kernel_only(const Range& range, const StringView& name, u8 access, bool cacheable) 230{ 231 auto region = make<Region>(range, name, access, cacheable); 232 region->m_user_accessible = false; 233 return region; 234} 235 236bool Region::should_cow(size_t page_index) const 237{ 238 auto& slot = vmobject().physical_pages()[page_index]; 239 if (slot && slot->is_shared_zero_page()) 240 return true; 241 if (m_shared) 242 return false; 243 return m_cow_map && m_cow_map->get(page_index); 244} 245 246void Region::set_should_cow(size_t page_index, bool cow) 247{ 248 ASSERT(!m_shared); 249 ensure_cow_map().set(page_index, cow); 250} 251 252Bitmap& Region::ensure_cow_map() const 253{ 254 if (!m_cow_map) 255 m_cow_map = make<Bitmap>(page_count(), true); 256 return *m_cow_map; 257} 258 259void Region::map_individual_page_impl(size_t page_index) 260{ 261 auto page_vaddr = vaddr().offset(page_index * PAGE_SIZE); 262 auto& pte = MM.ensure_pte(*m_page_directory, page_vaddr); 263 auto& physical_page = vmobject().physical_pages()[first_page_index() + page_index]; 264 if (!physical_page) { 265 pte.clear(); 266 } else { 267 pte.set_cache_disabled(!m_cacheable); 268 pte.set_physical_page_base(physical_page->paddr().get()); 269 pte.set_present(true); 270 if (should_cow(page_index)) 271 pte.set_writable(false); 272 else 273 pte.set_writable(is_writable()); 274 if (g_cpu_supports_nx) 275 pte.set_execute_disabled(!is_executable()); 276 pte.set_user_allowed(is_user_accessible()); 277#ifdef MM_DEBUG 278 dbg() << "MM: >> region map (PD=" << m_page_directory->cr3() << ", PTE=" << (void*)pte.raw() << "{" << &pte << "}) " << name() << " " << page_vaddr << " => " << physical_page->paddr() << " (@" << physical_page.ptr() << ")"; 279#endif 280 } 281 MM.flush_tlb(page_vaddr); 282} 283 284void Region::remap_page(size_t page_index) 285{ 286 ASSERT(m_page_directory); 287 InterruptDisabler disabler; 288 ASSERT(vmobject().physical_pages()[first_page_index() + page_index]); 289 map_individual_page_impl(page_index); 290} 291 292void Region::unmap(ShouldDeallocateVirtualMemoryRange deallocate_range) 293{ 294 InterruptDisabler disabler; 295 ASSERT(m_page_directory); 296 for (size_t i = 0; i < page_count(); ++i) { 297 auto vaddr = this->vaddr().offset(i * PAGE_SIZE); 298 auto& pte = MM.ensure_pte(*m_page_directory, vaddr); 299 pte.clear(); 300 MM.flush_tlb(vaddr); 301#ifdef MM_DEBUG 302 auto& physical_page = vmobject().physical_pages()[first_page_index() + i]; 303 dbgprintf("MM: >> Unmapped V%p => P%p <<\n", vaddr.get(), physical_page ? physical_page->paddr().get() : 0); 304#endif 305 } 306 if (deallocate_range == ShouldDeallocateVirtualMemoryRange::Yes) 307 m_page_directory->range_allocator().deallocate(range()); 308 m_page_directory = nullptr; 309} 310 311void Region::set_page_directory(PageDirectory& page_directory) 312{ 313 ASSERT(!m_page_directory || m_page_directory == &page_directory); 314 InterruptDisabler disabler; 315 m_page_directory = page_directory; 316} 317void Region::map(PageDirectory& page_directory) 318{ 319 set_page_directory(page_directory); 320 InterruptDisabler disabler; 321#ifdef MM_DEBUG 322 dbgprintf("MM: Region::map() will map VMO pages %u - %u (VMO page count: %u)\n", first_page_index(), last_page_index(), vmobject().page_count()); 323#endif 324 for (size_t page_index = 0; page_index < page_count(); ++page_index) 325 map_individual_page_impl(page_index); 326} 327 328void Region::remap() 329{ 330 ASSERT(m_page_directory); 331 map(*m_page_directory); 332} 333 334PageFaultResponse Region::handle_fault(const PageFault& fault) 335{ 336 auto page_index_in_region = page_index_from_address(fault.vaddr()); 337 if (fault.type() == PageFault::Type::PageNotPresent) { 338 if (fault.is_read() && !is_readable()) { 339 dbgprintf("NP(non-readable) fault in Region{%p}[%u]\n", this, page_index_in_region); 340 return PageFaultResponse::ShouldCrash; 341 } 342 343 if (vmobject().is_inode()) { 344#ifdef PAGE_FAULT_DEBUG 345 dbgprintf("NP(inode) fault in Region{%p}[%u]\n", this, page_index_in_region); 346#endif 347 return handle_inode_fault(page_index_in_region); 348 } 349#ifdef MAP_SHARED_ZERO_PAGE_LAZILY 350 if (fault.is_read()) { 351 vmobject().physical_pages()[first_page_index() + page_index_in_region] = MM.shared_zero_page(); 352 remap_page(page_index_in_region); 353 return PageFaultResponse::Continue; 354 } 355 return handle_zero_fault(page_index_in_region); 356#else 357 ASSERT_NOT_REACHED(); 358#endif 359 } 360 ASSERT(fault.type() == PageFault::Type::ProtectionViolation); 361 if (fault.access() == PageFault::Access::Write && is_writable() && should_cow(page_index_in_region)) { 362#ifdef PAGE_FAULT_DEBUG 363 dbgprintf("PV(cow) fault in Region{%p}[%u]\n", this, page_index_in_region); 364#endif 365 if (vmobject().physical_pages()[first_page_index() + page_index_in_region]->is_shared_zero_page()) { 366#ifdef PAGE_FAULT_DEBUG 367 dbgprintf("NP(zero) fault in Region{%p}[%u]\n", this, page_index_in_region); 368#endif 369 return handle_zero_fault(page_index_in_region); 370 } 371 return handle_cow_fault(page_index_in_region); 372 } 373 kprintf("PV(error) fault in Region{%p}[%u] at V%p\n", this, page_index_in_region, fault.vaddr().get()); 374 return PageFaultResponse::ShouldCrash; 375} 376 377PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region) 378{ 379 ASSERT_INTERRUPTS_DISABLED(); 380 ASSERT(vmobject().is_anonymous()); 381 382 sti(); 383 LOCKER(vmobject().m_paging_lock); 384 cli(); 385 386 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region]; 387 388 if (!vmobject_physical_page_entry.is_null() && !vmobject_physical_page_entry->is_shared_zero_page()) { 389#ifdef PAGE_FAULT_DEBUG 390 dbgprintf("MM: zero_page() but page already present. Fine with me!\n"); 391#endif 392 remap_page(page_index_in_region); 393 return PageFaultResponse::Continue; 394 } 395 396 if (Thread::current) 397 Thread::current->did_zero_fault(); 398 399 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes); 400 if (physical_page.is_null()) { 401 kprintf("MM: handle_zero_fault was unable to allocate a physical page\n"); 402 return PageFaultResponse::ShouldCrash; 403 } 404 405#ifdef PAGE_FAULT_DEBUG 406 dbgprintf(" >> ZERO P%p\n", physical_page->paddr().get()); 407#endif 408 vmobject_physical_page_entry = move(physical_page); 409 remap_page(page_index_in_region); 410 return PageFaultResponse::Continue; 411} 412 413PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region) 414{ 415 ASSERT_INTERRUPTS_DISABLED(); 416 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region]; 417 if (vmobject_physical_page_entry->ref_count() == 1) { 418#ifdef PAGE_FAULT_DEBUG 419 dbgprintf(" >> It's a COW page but nobody is sharing it anymore. Remap r/w\n"); 420#endif 421 set_should_cow(page_index_in_region, false); 422 remap_page(page_index_in_region); 423 return PageFaultResponse::Continue; 424 } 425 426 if (Thread::current) 427 Thread::current->did_cow_fault(); 428 429#ifdef PAGE_FAULT_DEBUG 430 dbgprintf(" >> It's a COW page and it's time to COW!\n"); 431#endif 432 auto physical_page_to_copy = move(vmobject_physical_page_entry); 433 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No); 434 if (physical_page.is_null()) { 435 kprintf("MM: handle_cow_fault was unable to allocate a physical page\n"); 436 return PageFaultResponse::ShouldCrash; 437 } 438 u8* dest_ptr = MM.quickmap_page(*physical_page); 439 const u8* src_ptr = vaddr().offset(page_index_in_region * PAGE_SIZE).as_ptr(); 440#ifdef PAGE_FAULT_DEBUG 441 dbgprintf(" >> COW P%p <- P%p\n", physical_page->paddr().get(), physical_page_to_copy->paddr().get()); 442#endif 443 copy_from_user(dest_ptr, src_ptr, PAGE_SIZE); 444 vmobject_physical_page_entry = move(physical_page); 445 MM.unquickmap_page(); 446 set_should_cow(page_index_in_region, false); 447 remap_page(page_index_in_region); 448 return PageFaultResponse::Continue; 449} 450 451PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region) 452{ 453 ASSERT_INTERRUPTS_DISABLED(); 454 ASSERT(vmobject().is_inode()); 455 456 sti(); 457 LOCKER(vmobject().m_paging_lock); 458 cli(); 459 460 auto& inode_vmobject = static_cast<InodeVMObject&>(vmobject()); 461 auto& vmobject_physical_page_entry = inode_vmobject.physical_pages()[first_page_index() + page_index_in_region]; 462 463#ifdef PAGE_FAULT_DEBUG 464 dbg() << "Inode fault in " << name() << " page index: " << page_index_in_region; 465#endif 466 467 if (!vmobject_physical_page_entry.is_null()) { 468#ifdef PAGE_FAULT_DEBUG 469 dbgprintf("MM: page_in_from_inode() but page already present. Fine with me!\n"); 470#endif 471 remap_page(page_index_in_region); 472 return PageFaultResponse::Continue; 473 } 474 475 if (Thread::current) 476 Thread::current->did_inode_fault(); 477 478#ifdef MM_DEBUG 479 dbgprintf("MM: page_in_from_inode ready to read from inode\n"); 480#endif 481 sti(); 482 u8 page_buffer[PAGE_SIZE]; 483 auto& inode = inode_vmobject.inode(); 484 auto nread = inode.read_bytes((first_page_index() + page_index_in_region) * PAGE_SIZE, PAGE_SIZE, page_buffer, nullptr); 485 if (nread < 0) { 486 kprintf("MM: handle_inode_fault had error (%d) while reading!\n", nread); 487 return PageFaultResponse::ShouldCrash; 488 } 489 if (nread < PAGE_SIZE) { 490 // If we read less than a page, zero out the rest to avoid leaking uninitialized data. 491 memset(page_buffer + nread, 0, PAGE_SIZE - nread); 492 } 493 cli(); 494 vmobject_physical_page_entry = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No); 495 if (vmobject_physical_page_entry.is_null()) { 496 kprintf("MM: handle_inode_fault was unable to allocate a physical page\n"); 497 return PageFaultResponse::ShouldCrash; 498 } 499 500 u8* dest_ptr = MM.quickmap_page(*vmobject_physical_page_entry); 501 memcpy(dest_ptr, page_buffer, PAGE_SIZE); 502 MM.unquickmap_page(); 503 504 remap_page(page_index_in_region); 505 return PageFaultResponse::Continue; 506} 507 508}