Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <Kernel/FileSystem/Inode.h>
28#include <Kernel/Process.h>
29#include <Kernel/Thread.h>
30#include <Kernel/VM/AnonymousVMObject.h>
31#include <Kernel/VM/InodeVMObject.h>
32#include <Kernel/VM/MemoryManager.h>
33#include <Kernel/VM/PageDirectory.h>
34#include <Kernel/VM/Region.h>
35
36//#define MM_DEBUG
37//#define PAGE_FAULT_DEBUG
38
39namespace Kernel {
40
41Region::Region(const Range& range, const String& name, u8 access, bool cacheable)
42 : m_range(range)
43 , m_vmobject(AnonymousVMObject::create_with_size(size()))
44 , m_name(name)
45 , m_access(access)
46 , m_cacheable(cacheable)
47{
48 MM.register_region(*this);
49}
50
51Region::Region(const Range& range, NonnullRefPtr<Inode> inode, const String& name, u8 access, bool cacheable)
52 : m_range(range)
53 , m_vmobject(InodeVMObject::create_with_inode(*inode))
54 , m_name(name)
55 , m_access(access)
56 , m_cacheable(cacheable)
57{
58 MM.register_region(*this);
59}
60
61Region::Region(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, u8 access, bool cacheable)
62 : m_range(range)
63 , m_offset_in_vmobject(offset_in_vmobject)
64 , m_vmobject(move(vmobject))
65 , m_name(name)
66 , m_access(access)
67 , m_cacheable(cacheable)
68{
69 MM.register_region(*this);
70}
71
72Region::~Region()
73{
74 // Make sure we disable interrupts so we don't get interrupted between unmapping and unregistering.
75 // Unmapping the region will give the VM back to the RangeAllocator, so an interrupt handler would
76 // find the address<->region mappings in an invalid state there.
77 InterruptDisabler disabler;
78 if (m_page_directory) {
79 unmap(ShouldDeallocateVirtualMemoryRange::Yes);
80 ASSERT(!m_page_directory);
81 }
82 MM.unregister_region(*this);
83}
84
85NonnullOwnPtr<Region> Region::clone()
86{
87 ASSERT(Process::current);
88
89 // FIXME: What should we do for privately mapped InodeVMObjects?
90 if (m_shared || vmobject().is_inode()) {
91 ASSERT(!m_stack);
92#ifdef MM_DEBUG
93 dbgprintf("%s<%u> Region::clone(): sharing %s (V%p)\n",
94 Process::current->name().characters(),
95 Process::current->pid(),
96 m_name.characters(),
97 vaddr().get());
98#endif
99 // Create a new region backed by the same VMObject.
100 auto region = Region::create_user_accessible(m_range, m_vmobject, m_offset_in_vmobject, m_name, m_access);
101 region->set_mmap(m_mmap);
102 region->set_shared(m_shared);
103 return region;
104 }
105
106#ifdef MM_DEBUG
107 dbgprintf("%s<%u> Region::clone(): cowing %s (V%p)\n",
108 Process::current->name().characters(),
109 Process::current->pid(),
110 m_name.characters(),
111 vaddr().get());
112#endif
113 // Set up a COW region. The parent (this) region becomes COW as well!
114 ensure_cow_map().fill(true);
115 remap();
116 auto clone_region = Region::create_user_accessible(m_range, m_vmobject->clone(), m_offset_in_vmobject, m_name, m_access);
117 clone_region->ensure_cow_map();
118 if (m_stack) {
119 ASSERT(is_readable());
120 ASSERT(is_writable());
121 ASSERT(!is_shared());
122 ASSERT(vmobject().is_anonymous());
123 clone_region->set_stack(true);
124 }
125 clone_region->set_mmap(m_mmap);
126 return clone_region;
127}
128
129bool Region::commit()
130{
131 InterruptDisabler disabler;
132#ifdef MM_DEBUG
133 dbgprintf("MM: commit %u pages in Region %p (VMO=%p) at V%p\n", vmobject().page_count(), this, &vmobject(), vaddr().get());
134#endif
135 for (size_t i = 0; i < page_count(); ++i) {
136 if (!commit(i))
137 return false;
138 }
139 return true;
140}
141
142bool Region::commit(size_t page_index)
143{
144 ASSERT(vmobject().is_anonymous() || vmobject().is_purgeable());
145 InterruptDisabler disabler;
146#ifdef MM_DEBUG
147 dbgprintf("MM: commit single page (%zu) in Region %p (VMO=%p) at V%p\n", page_index, vmobject().page_count(), this, &vmobject(), vaddr().get());
148#endif
149 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index];
150 if (!vmobject_physical_page_entry.is_null() && !vmobject_physical_page_entry->is_shared_zero_page())
151 return true;
152 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes);
153 if (!physical_page) {
154 kprintf("MM: commit was unable to allocate a physical page\n");
155 return false;
156 }
157 vmobject_physical_page_entry = move(physical_page);
158 remap_page(page_index);
159 return true;
160}
161
162u32 Region::cow_pages() const
163{
164 if (!m_cow_map)
165 return 0;
166 u32 count = 0;
167 for (size_t i = 0; i < m_cow_map->size(); ++i)
168 count += m_cow_map->get(i);
169 return count;
170}
171
172size_t Region::amount_dirty() const
173{
174 if (!vmobject().is_inode())
175 return amount_resident();
176 return static_cast<const InodeVMObject&>(vmobject()).amount_dirty();
177}
178
179size_t Region::amount_resident() const
180{
181 size_t bytes = 0;
182 for (size_t i = 0; i < page_count(); ++i) {
183 auto& physical_page = m_vmobject->physical_pages()[first_page_index() + i];
184 if (physical_page && !physical_page->is_shared_zero_page())
185 bytes += PAGE_SIZE;
186 }
187 return bytes;
188}
189
190size_t Region::amount_shared() const
191{
192 size_t bytes = 0;
193 for (size_t i = 0; i < page_count(); ++i) {
194 auto& physical_page = m_vmobject->physical_pages()[first_page_index() + i];
195 if (physical_page && physical_page->ref_count() > 1 && !physical_page->is_shared_zero_page())
196 bytes += PAGE_SIZE;
197 }
198 return bytes;
199}
200
201NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, const StringView& name, u8 access, bool cacheable)
202{
203 auto region = make<Region>(range, name, access, cacheable);
204 region->m_user_accessible = true;
205 return region;
206}
207
208NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const StringView& name, u8 access, bool cacheable)
209{
210 auto region = make<Region>(range, move(vmobject), offset_in_vmobject, name, access, cacheable);
211 region->m_user_accessible = true;
212 return region;
213}
214
215NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, NonnullRefPtr<Inode> inode, const StringView& name, u8 access, bool cacheable)
216{
217 auto region = make<Region>(range, move(inode), name, access, cacheable);
218 region->m_user_accessible = true;
219 return region;
220}
221
222NonnullOwnPtr<Region> Region::create_kernel_only(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const StringView& name, u8 access, bool cacheable)
223{
224 auto region = make<Region>(range, move(vmobject), offset_in_vmobject, name, access, cacheable);
225 region->m_user_accessible = false;
226 return region;
227}
228
229NonnullOwnPtr<Region> Region::create_kernel_only(const Range& range, const StringView& name, u8 access, bool cacheable)
230{
231 auto region = make<Region>(range, name, access, cacheable);
232 region->m_user_accessible = false;
233 return region;
234}
235
236bool Region::should_cow(size_t page_index) const
237{
238 auto& slot = vmobject().physical_pages()[page_index];
239 if (slot && slot->is_shared_zero_page())
240 return true;
241 if (m_shared)
242 return false;
243 return m_cow_map && m_cow_map->get(page_index);
244}
245
246void Region::set_should_cow(size_t page_index, bool cow)
247{
248 ASSERT(!m_shared);
249 ensure_cow_map().set(page_index, cow);
250}
251
252Bitmap& Region::ensure_cow_map() const
253{
254 if (!m_cow_map)
255 m_cow_map = make<Bitmap>(page_count(), true);
256 return *m_cow_map;
257}
258
259void Region::map_individual_page_impl(size_t page_index)
260{
261 auto page_vaddr = vaddr().offset(page_index * PAGE_SIZE);
262 auto& pte = MM.ensure_pte(*m_page_directory, page_vaddr);
263 auto& physical_page = vmobject().physical_pages()[first_page_index() + page_index];
264 if (!physical_page) {
265 pte.clear();
266 } else {
267 pte.set_cache_disabled(!m_cacheable);
268 pte.set_physical_page_base(physical_page->paddr().get());
269 pte.set_present(true);
270 if (should_cow(page_index))
271 pte.set_writable(false);
272 else
273 pte.set_writable(is_writable());
274 if (g_cpu_supports_nx)
275 pte.set_execute_disabled(!is_executable());
276 pte.set_user_allowed(is_user_accessible());
277#ifdef MM_DEBUG
278 dbg() << "MM: >> region map (PD=" << m_page_directory->cr3() << ", PTE=" << (void*)pte.raw() << "{" << &pte << "}) " << name() << " " << page_vaddr << " => " << physical_page->paddr() << " (@" << physical_page.ptr() << ")";
279#endif
280 }
281 MM.flush_tlb(page_vaddr);
282}
283
284void Region::remap_page(size_t page_index)
285{
286 ASSERT(m_page_directory);
287 InterruptDisabler disabler;
288 ASSERT(vmobject().physical_pages()[first_page_index() + page_index]);
289 map_individual_page_impl(page_index);
290}
291
292void Region::unmap(ShouldDeallocateVirtualMemoryRange deallocate_range)
293{
294 InterruptDisabler disabler;
295 ASSERT(m_page_directory);
296 for (size_t i = 0; i < page_count(); ++i) {
297 auto vaddr = this->vaddr().offset(i * PAGE_SIZE);
298 auto& pte = MM.ensure_pte(*m_page_directory, vaddr);
299 pte.clear();
300 MM.flush_tlb(vaddr);
301#ifdef MM_DEBUG
302 auto& physical_page = vmobject().physical_pages()[first_page_index() + i];
303 dbgprintf("MM: >> Unmapped V%p => P%p <<\n", vaddr.get(), physical_page ? physical_page->paddr().get() : 0);
304#endif
305 }
306 if (deallocate_range == ShouldDeallocateVirtualMemoryRange::Yes)
307 m_page_directory->range_allocator().deallocate(range());
308 m_page_directory = nullptr;
309}
310
311void Region::set_page_directory(PageDirectory& page_directory)
312{
313 ASSERT(!m_page_directory || m_page_directory == &page_directory);
314 InterruptDisabler disabler;
315 m_page_directory = page_directory;
316}
317void Region::map(PageDirectory& page_directory)
318{
319 set_page_directory(page_directory);
320 InterruptDisabler disabler;
321#ifdef MM_DEBUG
322 dbgprintf("MM: Region::map() will map VMO pages %u - %u (VMO page count: %u)\n", first_page_index(), last_page_index(), vmobject().page_count());
323#endif
324 for (size_t page_index = 0; page_index < page_count(); ++page_index)
325 map_individual_page_impl(page_index);
326}
327
328void Region::remap()
329{
330 ASSERT(m_page_directory);
331 map(*m_page_directory);
332}
333
334PageFaultResponse Region::handle_fault(const PageFault& fault)
335{
336 auto page_index_in_region = page_index_from_address(fault.vaddr());
337 if (fault.type() == PageFault::Type::PageNotPresent) {
338 if (fault.is_read() && !is_readable()) {
339 dbgprintf("NP(non-readable) fault in Region{%p}[%u]\n", this, page_index_in_region);
340 return PageFaultResponse::ShouldCrash;
341 }
342
343 if (vmobject().is_inode()) {
344#ifdef PAGE_FAULT_DEBUG
345 dbgprintf("NP(inode) fault in Region{%p}[%u]\n", this, page_index_in_region);
346#endif
347 return handle_inode_fault(page_index_in_region);
348 }
349#ifdef MAP_SHARED_ZERO_PAGE_LAZILY
350 if (fault.is_read()) {
351 vmobject().physical_pages()[first_page_index() + page_index_in_region] = MM.shared_zero_page();
352 remap_page(page_index_in_region);
353 return PageFaultResponse::Continue;
354 }
355 return handle_zero_fault(page_index_in_region);
356#else
357 ASSERT_NOT_REACHED();
358#endif
359 }
360 ASSERT(fault.type() == PageFault::Type::ProtectionViolation);
361 if (fault.access() == PageFault::Access::Write && is_writable() && should_cow(page_index_in_region)) {
362#ifdef PAGE_FAULT_DEBUG
363 dbgprintf("PV(cow) fault in Region{%p}[%u]\n", this, page_index_in_region);
364#endif
365 if (vmobject().physical_pages()[first_page_index() + page_index_in_region]->is_shared_zero_page()) {
366#ifdef PAGE_FAULT_DEBUG
367 dbgprintf("NP(zero) fault in Region{%p}[%u]\n", this, page_index_in_region);
368#endif
369 return handle_zero_fault(page_index_in_region);
370 }
371 return handle_cow_fault(page_index_in_region);
372 }
373 kprintf("PV(error) fault in Region{%p}[%u] at V%p\n", this, page_index_in_region, fault.vaddr().get());
374 return PageFaultResponse::ShouldCrash;
375}
376
377PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region)
378{
379 ASSERT_INTERRUPTS_DISABLED();
380 ASSERT(vmobject().is_anonymous());
381
382 sti();
383 LOCKER(vmobject().m_paging_lock);
384 cli();
385
386 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
387
388 if (!vmobject_physical_page_entry.is_null() && !vmobject_physical_page_entry->is_shared_zero_page()) {
389#ifdef PAGE_FAULT_DEBUG
390 dbgprintf("MM: zero_page() but page already present. Fine with me!\n");
391#endif
392 remap_page(page_index_in_region);
393 return PageFaultResponse::Continue;
394 }
395
396 if (Thread::current)
397 Thread::current->did_zero_fault();
398
399 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes);
400 if (physical_page.is_null()) {
401 kprintf("MM: handle_zero_fault was unable to allocate a physical page\n");
402 return PageFaultResponse::ShouldCrash;
403 }
404
405#ifdef PAGE_FAULT_DEBUG
406 dbgprintf(" >> ZERO P%p\n", physical_page->paddr().get());
407#endif
408 vmobject_physical_page_entry = move(physical_page);
409 remap_page(page_index_in_region);
410 return PageFaultResponse::Continue;
411}
412
413PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region)
414{
415 ASSERT_INTERRUPTS_DISABLED();
416 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
417 if (vmobject_physical_page_entry->ref_count() == 1) {
418#ifdef PAGE_FAULT_DEBUG
419 dbgprintf(" >> It's a COW page but nobody is sharing it anymore. Remap r/w\n");
420#endif
421 set_should_cow(page_index_in_region, false);
422 remap_page(page_index_in_region);
423 return PageFaultResponse::Continue;
424 }
425
426 if (Thread::current)
427 Thread::current->did_cow_fault();
428
429#ifdef PAGE_FAULT_DEBUG
430 dbgprintf(" >> It's a COW page and it's time to COW!\n");
431#endif
432 auto physical_page_to_copy = move(vmobject_physical_page_entry);
433 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
434 if (physical_page.is_null()) {
435 kprintf("MM: handle_cow_fault was unable to allocate a physical page\n");
436 return PageFaultResponse::ShouldCrash;
437 }
438 u8* dest_ptr = MM.quickmap_page(*physical_page);
439 const u8* src_ptr = vaddr().offset(page_index_in_region * PAGE_SIZE).as_ptr();
440#ifdef PAGE_FAULT_DEBUG
441 dbgprintf(" >> COW P%p <- P%p\n", physical_page->paddr().get(), physical_page_to_copy->paddr().get());
442#endif
443 copy_from_user(dest_ptr, src_ptr, PAGE_SIZE);
444 vmobject_physical_page_entry = move(physical_page);
445 MM.unquickmap_page();
446 set_should_cow(page_index_in_region, false);
447 remap_page(page_index_in_region);
448 return PageFaultResponse::Continue;
449}
450
451PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region)
452{
453 ASSERT_INTERRUPTS_DISABLED();
454 ASSERT(vmobject().is_inode());
455
456 sti();
457 LOCKER(vmobject().m_paging_lock);
458 cli();
459
460 auto& inode_vmobject = static_cast<InodeVMObject&>(vmobject());
461 auto& vmobject_physical_page_entry = inode_vmobject.physical_pages()[first_page_index() + page_index_in_region];
462
463#ifdef PAGE_FAULT_DEBUG
464 dbg() << "Inode fault in " << name() << " page index: " << page_index_in_region;
465#endif
466
467 if (!vmobject_physical_page_entry.is_null()) {
468#ifdef PAGE_FAULT_DEBUG
469 dbgprintf("MM: page_in_from_inode() but page already present. Fine with me!\n");
470#endif
471 remap_page(page_index_in_region);
472 return PageFaultResponse::Continue;
473 }
474
475 if (Thread::current)
476 Thread::current->did_inode_fault();
477
478#ifdef MM_DEBUG
479 dbgprintf("MM: page_in_from_inode ready to read from inode\n");
480#endif
481 sti();
482 u8 page_buffer[PAGE_SIZE];
483 auto& inode = inode_vmobject.inode();
484 auto nread = inode.read_bytes((first_page_index() + page_index_in_region) * PAGE_SIZE, PAGE_SIZE, page_buffer, nullptr);
485 if (nread < 0) {
486 kprintf("MM: handle_inode_fault had error (%d) while reading!\n", nread);
487 return PageFaultResponse::ShouldCrash;
488 }
489 if (nread < PAGE_SIZE) {
490 // If we read less than a page, zero out the rest to avoid leaking uninitialized data.
491 memset(page_buffer + nread, 0, PAGE_SIZE - nread);
492 }
493 cli();
494 vmobject_physical_page_entry = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
495 if (vmobject_physical_page_entry.is_null()) {
496 kprintf("MM: handle_inode_fault was unable to allocate a physical page\n");
497 return PageFaultResponse::ShouldCrash;
498 }
499
500 u8* dest_ptr = MM.quickmap_page(*vmobject_physical_page_entry);
501 memcpy(dest_ptr, page_buffer, PAGE_SIZE);
502 MM.unquickmap_page();
503
504 remap_page(page_index_in_region);
505 return PageFaultResponse::Continue;
506}
507
508}