Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/Memory.h>
28#include <AK/StringView.h>
29#include <Kernel/FileSystem/Inode.h>
30#include <Kernel/Process.h>
31#include <Kernel/Thread.h>
32#include <Kernel/VM/AnonymousVMObject.h>
33#include <Kernel/VM/MemoryManager.h>
34#include <Kernel/VM/PageDirectory.h>
35#include <Kernel/VM/Region.h>
36#include <Kernel/VM/SharedInodeVMObject.h>
37
38//#define MM_DEBUG
39//#define PAGE_FAULT_DEBUG
40
41namespace Kernel {
42
43Region::Region(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, u8 access, bool cacheable)
44 : m_range(range)
45 , m_offset_in_vmobject(offset_in_vmobject)
46 , m_vmobject(move(vmobject))
47 , m_name(name)
48 , m_access(access)
49 , m_cacheable(cacheable)
50{
51 MM.register_region(*this);
52}
53
54Region::~Region()
55{
56 // Make sure we disable interrupts so we don't get interrupted between unmapping and unregistering.
57 // Unmapping the region will give the VM back to the RangeAllocator, so an interrupt handler would
58 // find the address<->region mappings in an invalid state there.
59 InterruptDisabler disabler;
60 if (m_page_directory) {
61 unmap(ShouldDeallocateVirtualMemoryRange::Yes);
62 ASSERT(!m_page_directory);
63 }
64 MM.unregister_region(*this);
65}
66
67NonnullOwnPtr<Region> Region::clone()
68{
69 ASSERT(Process::current);
70
71 if (m_shared) {
72 ASSERT(!m_stack);
73#ifdef MM_DEBUG
74 dbg() << "Region::clone(): Sharing " << name() << " (" << vaddr() << ")";
75#endif
76 if (vmobject().is_inode())
77 ASSERT(vmobject().is_shared_inode());
78
79 // Create a new region backed by the same VMObject.
80 auto region = Region::create_user_accessible(m_range, m_vmobject, m_offset_in_vmobject, m_name, m_access);
81 region->set_mmap(m_mmap);
82 region->set_shared(m_shared);
83 return region;
84 }
85
86 if (vmobject().is_inode())
87 ASSERT(vmobject().is_private_inode());
88
89#ifdef MM_DEBUG
90 dbg() << "Region::clone(): CoWing " << name() << " (" << vaddr() << ")";
91#endif
92 // Set up a COW region. The parent (this) region becomes COW as well!
93 ensure_cow_map().fill(true);
94 remap();
95 auto clone_region = Region::create_user_accessible(m_range, m_vmobject->clone(), m_offset_in_vmobject, m_name, m_access);
96 clone_region->ensure_cow_map();
97 if (m_stack) {
98 ASSERT(is_readable());
99 ASSERT(is_writable());
100 ASSERT(vmobject().is_anonymous());
101 clone_region->set_stack(true);
102 }
103 clone_region->set_mmap(m_mmap);
104 return clone_region;
105}
106
107bool Region::commit()
108{
109 InterruptDisabler disabler;
110#ifdef MM_DEBUG
111 dbg() << "MM: Commit " << page_count() << " pages in Region " << this << " (VMO=" << &vmobject() << ") at " << vaddr();
112#endif
113 for (size_t i = 0; i < page_count(); ++i) {
114 if (!commit(i))
115 return false;
116 }
117 return true;
118}
119
120bool Region::commit(size_t page_index)
121{
122 ASSERT(vmobject().is_anonymous() || vmobject().is_purgeable());
123 InterruptDisabler disabler;
124 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index];
125 if (!vmobject_physical_page_entry.is_null() && !vmobject_physical_page_entry->is_shared_zero_page())
126 return true;
127 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes);
128 if (!physical_page) {
129 klog() << "MM: commit was unable to allocate a physical page";
130 return false;
131 }
132 vmobject_physical_page_entry = move(physical_page);
133 remap_page(page_index);
134 return true;
135}
136
137u32 Region::cow_pages() const
138{
139 if (!m_cow_map)
140 return 0;
141 u32 count = 0;
142 for (size_t i = 0; i < m_cow_map->size(); ++i)
143 count += m_cow_map->get(i);
144 return count;
145}
146
147size_t Region::amount_dirty() const
148{
149 if (!vmobject().is_inode())
150 return amount_resident();
151 return static_cast<const InodeVMObject&>(vmobject()).amount_dirty();
152}
153
154size_t Region::amount_resident() const
155{
156 size_t bytes = 0;
157 for (size_t i = 0; i < page_count(); ++i) {
158 auto& physical_page = m_vmobject->physical_pages()[first_page_index() + i];
159 if (physical_page && !physical_page->is_shared_zero_page())
160 bytes += PAGE_SIZE;
161 }
162 return bytes;
163}
164
165size_t Region::amount_shared() const
166{
167 size_t bytes = 0;
168 for (size_t i = 0; i < page_count(); ++i) {
169 auto& physical_page = m_vmobject->physical_pages()[first_page_index() + i];
170 if (physical_page && physical_page->ref_count() > 1 && !physical_page->is_shared_zero_page())
171 bytes += PAGE_SIZE;
172 }
173 return bytes;
174}
175
176NonnullOwnPtr<Region> Region::create_user_accessible(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const StringView& name, u8 access, bool cacheable)
177{
178 auto region = make<Region>(range, move(vmobject), offset_in_vmobject, name, access, cacheable);
179 region->m_user_accessible = true;
180 return region;
181}
182
183NonnullOwnPtr<Region> Region::create_kernel_only(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const StringView& name, u8 access, bool cacheable)
184{
185 auto region = make<Region>(range, move(vmobject), offset_in_vmobject, name, access, cacheable);
186 region->m_user_accessible = false;
187 return region;
188}
189
190bool Region::should_cow(size_t page_index) const
191{
192 auto& slot = vmobject().physical_pages()[page_index];
193 if (slot && slot->is_shared_zero_page())
194 return true;
195 if (m_shared)
196 return false;
197 return m_cow_map && m_cow_map->get(page_index);
198}
199
200void Region::set_should_cow(size_t page_index, bool cow)
201{
202 ASSERT(!m_shared);
203 ensure_cow_map().set(page_index, cow);
204}
205
206Bitmap& Region::ensure_cow_map() const
207{
208 if (!m_cow_map)
209 m_cow_map = make<Bitmap>(page_count(), true);
210 return *m_cow_map;
211}
212
213void Region::map_individual_page_impl(size_t page_index)
214{
215 auto page_vaddr = vaddr().offset(page_index * PAGE_SIZE);
216 auto& pte = MM.ensure_pte(*m_page_directory, page_vaddr);
217 auto& physical_page = vmobject().physical_pages()[first_page_index() + page_index];
218 if (!physical_page || (!is_readable() && !is_writable())) {
219 pte.clear();
220 } else {
221 pte.set_cache_disabled(!m_cacheable);
222 pte.set_physical_page_base(physical_page->paddr().get());
223 pte.set_present(true);
224 if (should_cow(page_index))
225 pte.set_writable(false);
226 else
227 pte.set_writable(is_writable());
228 if (g_cpu_supports_nx)
229 pte.set_execute_disabled(!is_executable());
230 pte.set_user_allowed(is_user_accessible());
231#ifdef MM_DEBUG
232 dbg() << "MM: >> region map (PD=" << m_page_directory->cr3() << ", PTE=" << (void*)pte.raw() << "{" << &pte << "}) " << name() << " " << page_vaddr << " => " << physical_page->paddr() << " (@" << physical_page.ptr() << ")";
233#endif
234 }
235 MM.flush_tlb(page_vaddr);
236}
237
238void Region::remap_page(size_t page_index)
239{
240 ASSERT(m_page_directory);
241 InterruptDisabler disabler;
242 ASSERT(vmobject().physical_pages()[first_page_index() + page_index]);
243 map_individual_page_impl(page_index);
244}
245
246void Region::unmap(ShouldDeallocateVirtualMemoryRange deallocate_range)
247{
248 InterruptDisabler disabler;
249 ASSERT(m_page_directory);
250 for (size_t i = 0; i < page_count(); ++i) {
251 auto vaddr = this->vaddr().offset(i * PAGE_SIZE);
252 auto& pte = MM.ensure_pte(*m_page_directory, vaddr);
253 pte.clear();
254 MM.flush_tlb(vaddr);
255#ifdef MM_DEBUG
256 auto& physical_page = vmobject().physical_pages()[first_page_index() + i];
257 dbg() << "MM: >> Unmapped " << vaddr << " => P" << String::format("%p", physical_page ? physical_page->paddr().get() : 0) << " <<";
258#endif
259 }
260 if (deallocate_range == ShouldDeallocateVirtualMemoryRange::Yes)
261 m_page_directory->range_allocator().deallocate(range());
262 m_page_directory = nullptr;
263}
264
265void Region::set_page_directory(PageDirectory& page_directory)
266{
267 ASSERT(!m_page_directory || m_page_directory == &page_directory);
268 InterruptDisabler disabler;
269 m_page_directory = page_directory;
270}
271void Region::map(PageDirectory& page_directory)
272{
273 set_page_directory(page_directory);
274 InterruptDisabler disabler;
275#ifdef MM_DEBUG
276 dbg() << "MM: Region::map() will map VMO pages " << first_page_index() << " - " << last_page_index() << " (VMO page count: " << vmobject().page_count() << ")";
277#endif
278 for (size_t page_index = 0; page_index < page_count(); ++page_index)
279 map_individual_page_impl(page_index);
280}
281
282void Region::remap()
283{
284 ASSERT(m_page_directory);
285 map(*m_page_directory);
286}
287
288PageFaultResponse Region::handle_fault(const PageFault& fault)
289{
290 auto page_index_in_region = page_index_from_address(fault.vaddr());
291 if (fault.type() == PageFault::Type::PageNotPresent) {
292 if (fault.is_read() && !is_readable()) {
293 dbg() << "NP(non-readable) fault in Region{" << this << "}[" << page_index_in_region << "]";
294 return PageFaultResponse::ShouldCrash;
295 }
296 if (fault.is_write() && !is_writable()) {
297 dbg() << "NP(non-writable) write fault in Region{" << this << "}[" << page_index_in_region << "] at " << fault.vaddr();
298 return PageFaultResponse::ShouldCrash;
299 }
300 if (vmobject().is_inode()) {
301#ifdef PAGE_FAULT_DEBUG
302 dbg() << "NP(inode) fault in Region{" << this << "}[" << page_index_in_region << "]";
303#endif
304 return handle_inode_fault(page_index_in_region);
305 }
306#ifdef MAP_SHARED_ZERO_PAGE_LAZILY
307 if (fault.is_read()) {
308 vmobject().physical_pages()[first_page_index() + page_index_in_region] = MM.shared_zero_page();
309 remap_page(page_index_in_region);
310 return PageFaultResponse::Continue;
311 }
312 return handle_zero_fault(page_index_in_region);
313#else
314 dbg() << "BUG! Unexpected NP fault at " << fault.vaddr();
315 return PageFaultResponse::ShouldCrash;
316#endif
317 }
318 ASSERT(fault.type() == PageFault::Type::ProtectionViolation);
319 if (fault.access() == PageFault::Access::Write && is_writable() && should_cow(page_index_in_region)) {
320#ifdef PAGE_FAULT_DEBUG
321 dbg() << "PV(cow) fault in Region{" << this << "}[" << page_index_in_region << "]";
322#endif
323 if (vmobject().physical_pages()[first_page_index() + page_index_in_region]->is_shared_zero_page()) {
324#ifdef PAGE_FAULT_DEBUG
325 dbg() << "NP(zero) fault in Region{" << this << "}[" << page_index_in_region << "]";
326#endif
327 return handle_zero_fault(page_index_in_region);
328 }
329 return handle_cow_fault(page_index_in_region);
330 }
331 dbg() << "PV(error) fault in Region{" << this << "}[" << page_index_in_region << "] at " << fault.vaddr();
332 return PageFaultResponse::ShouldCrash;
333}
334
335PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region)
336{
337 ASSERT_INTERRUPTS_DISABLED();
338 ASSERT(vmobject().is_anonymous());
339
340 sti();
341 LOCKER(vmobject().m_paging_lock);
342 cli();
343
344 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
345
346 if (!vmobject_physical_page_entry.is_null() && !vmobject_physical_page_entry->is_shared_zero_page()) {
347#ifdef PAGE_FAULT_DEBUG
348 dbg() << "MM: zero_page() but page already present. Fine with me!";
349#endif
350 remap_page(page_index_in_region);
351 return PageFaultResponse::Continue;
352 }
353
354 if (Thread::current)
355 Thread::current->did_zero_fault();
356
357 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes);
358 if (physical_page.is_null()) {
359 klog() << "MM: handle_zero_fault was unable to allocate a physical page";
360 return PageFaultResponse::ShouldCrash;
361 }
362
363#ifdef PAGE_FAULT_DEBUG
364 dbg() << " >> ZERO " << physical_page->paddr();
365#endif
366 vmobject_physical_page_entry = move(physical_page);
367 remap_page(page_index_in_region);
368 return PageFaultResponse::Continue;
369}
370
371PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region)
372{
373 ASSERT_INTERRUPTS_DISABLED();
374 auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
375 if (vmobject_physical_page_entry->ref_count() == 1) {
376#ifdef PAGE_FAULT_DEBUG
377 dbg() << " >> It's a COW page but nobody is sharing it anymore. Remap r/w";
378#endif
379 set_should_cow(page_index_in_region, false);
380 remap_page(page_index_in_region);
381 return PageFaultResponse::Continue;
382 }
383
384 if (Thread::current)
385 Thread::current->did_cow_fault();
386
387#ifdef PAGE_FAULT_DEBUG
388 dbg() << " >> It's a COW page and it's time to COW!";
389#endif
390 auto physical_page_to_copy = move(vmobject_physical_page_entry);
391 auto physical_page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
392 if (physical_page.is_null()) {
393 klog() << "MM: handle_cow_fault was unable to allocate a physical page";
394 return PageFaultResponse::ShouldCrash;
395 }
396 u8* dest_ptr = MM.quickmap_page(*physical_page);
397 const u8* src_ptr = vaddr().offset(page_index_in_region * PAGE_SIZE).as_ptr();
398#ifdef PAGE_FAULT_DEBUG
399 dbg() << " >> COW " << physical_page->paddr() << " <- " << physical_page_to_copy->paddr();
400#endif
401 copy_from_user(dest_ptr, src_ptr, PAGE_SIZE);
402 vmobject_physical_page_entry = move(physical_page);
403 MM.unquickmap_page();
404 set_should_cow(page_index_in_region, false);
405 remap_page(page_index_in_region);
406 return PageFaultResponse::Continue;
407}
408
409PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region)
410{
411 ASSERT_INTERRUPTS_DISABLED();
412 ASSERT(vmobject().is_inode());
413
414 sti();
415 LOCKER(vmobject().m_paging_lock);
416 cli();
417
418 auto& inode_vmobject = static_cast<InodeVMObject&>(vmobject());
419 auto& vmobject_physical_page_entry = inode_vmobject.physical_pages()[first_page_index() + page_index_in_region];
420
421#ifdef PAGE_FAULT_DEBUG
422 dbg() << "Inode fault in " << name() << " page index: " << page_index_in_region;
423#endif
424
425 if (!vmobject_physical_page_entry.is_null()) {
426#ifdef PAGE_FAULT_DEBUG
427 dbg() << ("MM: page_in_from_inode() but page already present. Fine with me!");
428#endif
429 remap_page(page_index_in_region);
430 return PageFaultResponse::Continue;
431 }
432
433 if (Thread::current)
434 Thread::current->did_inode_fault();
435
436#ifdef MM_DEBUG
437 dbg() << "MM: page_in_from_inode ready to read from inode";
438#endif
439 sti();
440 u8 page_buffer[PAGE_SIZE];
441 auto& inode = inode_vmobject.inode();
442 auto nread = inode.read_bytes((first_page_index() + page_index_in_region) * PAGE_SIZE, PAGE_SIZE, page_buffer, nullptr);
443 if (nread < 0) {
444 klog() << "MM: handle_inode_fault had error (" << nread << ") while reading!";
445 return PageFaultResponse::ShouldCrash;
446 }
447 if (nread < PAGE_SIZE) {
448 // If we read less than a page, zero out the rest to avoid leaking uninitialized data.
449 memset(page_buffer + nread, 0, PAGE_SIZE - nread);
450 }
451 cli();
452 vmobject_physical_page_entry = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
453 if (vmobject_physical_page_entry.is_null()) {
454 klog() << "MM: handle_inode_fault was unable to allocate a physical page";
455 return PageFaultResponse::ShouldCrash;
456 }
457
458 u8* dest_ptr = MM.quickmap_page(*vmobject_physical_page_entry);
459 memcpy(dest_ptr, page_buffer, PAGE_SIZE);
460 MM.unquickmap_page();
461
462 remap_page(page_index_in_region);
463 return PageFaultResponse::Continue;
464}
465
466}