Serenity Operating System
1/*
2 * Copyright (c) 2021-2022, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <Kernel/API/MemoryLayout.h>
9#include <Kernel/Arch/CPU.h>
10#include <Kernel/Locking/Spinlock.h>
11#include <Kernel/Memory/AddressSpace.h>
12#include <Kernel/Memory/AnonymousVMObject.h>
13#include <Kernel/Memory/InodeVMObject.h>
14#include <Kernel/Memory/MemoryManager.h>
15#include <Kernel/PerformanceManager.h>
16#include <Kernel/Process.h>
17#include <Kernel/Random.h>
18#include <Kernel/Scheduler.h>
19
20namespace Kernel::Memory {
21
22ErrorOr<NonnullOwnPtr<AddressSpace>> AddressSpace::try_create(AddressSpace const* parent)
23{
24 auto page_directory = TRY(PageDirectory::try_create_for_userspace());
25
26 VirtualRange total_range = [&]() -> VirtualRange {
27 if (parent)
28 return parent->m_region_tree.total_range();
29 constexpr FlatPtr userspace_range_base = USER_RANGE_BASE;
30 FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING;
31 size_t random_offset = (get_fast_random<u8>() % 2 * MiB) & PAGE_MASK;
32 FlatPtr base = userspace_range_base + random_offset;
33 return VirtualRange(VirtualAddress { base }, userspace_range_ceiling - base);
34 }();
35
36 auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(move(page_directory), total_range)));
37 space->page_directory().set_space({}, *space);
38 return space;
39}
40
41AddressSpace::AddressSpace(NonnullLockRefPtr<PageDirectory> page_directory, VirtualRange total_range)
42 : m_page_directory(move(page_directory))
43 , m_region_tree(total_range)
44{
45}
46
47AddressSpace::~AddressSpace() = default;
48
49ErrorOr<void> AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size)
50{
51 if (!size)
52 return EINVAL;
53
54 auto range_to_unmap = TRY(VirtualRange::expand_to_page_boundaries(addr.get(), size));
55
56 if (!is_user_range(range_to_unmap))
57 return EFAULT;
58
59 if (auto* whole_region = find_region_from_range(range_to_unmap)) {
60 if (!whole_region->is_mmap())
61 return EPERM;
62 if (whole_region->is_immutable())
63 return EPERM;
64
65 PerformanceManager::add_unmap_perf_event(Process::current(), whole_region->range());
66
67 deallocate_region(*whole_region);
68 return {};
69 }
70
71 if (auto* old_region = find_region_containing(range_to_unmap)) {
72 if (!old_region->is_mmap())
73 return EPERM;
74 if (old_region->is_immutable())
75 return EPERM;
76
77 // Remove the old region from our regions tree, since were going to add another region
78 // with the exact same start address.
79 auto region = take_region(*old_region);
80 region->unmap();
81
82 auto new_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
83
84 // And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
85 for (auto* new_region : new_regions) {
86 // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
87 // leaves the caller in an undefined state.
88 TRY(new_region->map(page_directory()));
89 }
90
91 PerformanceManager::add_unmap_perf_event(Process::current(), range_to_unmap);
92
93 return {};
94 }
95
96 // Try again while checking multiple regions at a time.
97 auto const& regions = TRY(find_regions_intersecting(range_to_unmap));
98 if (regions.is_empty())
99 return {};
100
101 // Check if any of the regions is not mmap'ed, to not accidentally
102 // error out with just half a region map left.
103 for (auto* region : regions) {
104 if (!region->is_mmap())
105 return EPERM;
106 if (region->is_immutable())
107 return EPERM;
108 }
109
110 Vector<Region*, 2> new_regions;
111
112 for (auto* old_region : regions) {
113 // If it's a full match we can remove the entire old region.
114 if (old_region->range().intersect(range_to_unmap).size() == old_region->size()) {
115 deallocate_region(*old_region);
116 continue;
117 }
118
119 // Remove the old region from our regions tree, since were going to add another region
120 // with the exact same start address.
121 auto region = take_region(*old_region);
122 region->unmap();
123
124 // Otherwise, split the regions and collect them for future mapping.
125 auto split_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
126 TRY(new_regions.try_extend(split_regions));
127 }
128
129 // And finally map the new region(s) into our page directory.
130 for (auto* new_region : new_regions) {
131 // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
132 // leaves the caller in an undefined state.
133 TRY(new_region->map(page_directory()));
134 }
135
136 PerformanceManager::add_unmap_perf_event(Process::current(), range_to_unmap);
137
138 return {};
139}
140
141ErrorOr<Region*> AddressSpace::try_allocate_split_region(Region const& source_region, VirtualRange const& range, size_t offset_in_vmobject)
142{
143 OwnPtr<KString> region_name;
144 if (!source_region.name().is_null())
145 region_name = TRY(KString::try_create(source_region.name()));
146
147 auto new_region = TRY(Region::create_unplaced(
148 source_region.vmobject(), offset_in_vmobject, move(region_name), source_region.access(), source_region.is_cacheable() ? Region::Cacheable::Yes : Region::Cacheable::No, source_region.is_shared()));
149 new_region->set_syscall_region(source_region.is_syscall_region());
150 new_region->set_mmap(source_region.is_mmap(), source_region.mmapped_from_readable(), source_region.mmapped_from_writable());
151 new_region->set_stack(source_region.is_stack());
152 size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
153 for (size_t i = 0; i < new_region->page_count(); ++i) {
154 if (source_region.should_cow(page_offset_in_source_region + i))
155 TRY(new_region->set_should_cow(i, true));
156 }
157 TRY(m_region_tree.place_specifically(*new_region, range));
158 return new_region.leak_ptr();
159}
160
161ErrorOr<Region*> AddressSpace::allocate_region(RandomizeVirtualAddress randomize_virtual_address, VirtualAddress requested_address, size_t requested_size, size_t requested_alignment, StringView name, int prot, AllocationStrategy strategy)
162{
163 if (!requested_address.is_page_aligned())
164 return EINVAL;
165 auto size = TRY(Memory::page_round_up(requested_size));
166 auto alignment = TRY(Memory::page_round_up(requested_alignment));
167 OwnPtr<KString> region_name;
168 if (!name.is_null())
169 region_name = TRY(KString::try_create(name));
170 auto vmobject = TRY(AnonymousVMObject::try_create_with_size(size, strategy));
171 auto region = TRY(Region::create_unplaced(move(vmobject), 0, move(region_name), prot_to_region_access_flags(prot)));
172 if (requested_address.is_null()) {
173 TRY(m_region_tree.place_anywhere(*region, randomize_virtual_address, size, alignment));
174 } else {
175 TRY(m_region_tree.place_specifically(*region, VirtualRange { requested_address, size }));
176 }
177 TRY(region->map(page_directory(), ShouldFlushTLB::No));
178 return region.leak_ptr();
179}
180
181ErrorOr<Region*> AddressSpace::allocate_region_with_vmobject(VirtualRange requested_range, NonnullLockRefPtr<VMObject> vmobject, size_t offset_in_vmobject, StringView name, int prot, bool shared)
182{
183 return allocate_region_with_vmobject(RandomizeVirtualAddress::Yes, requested_range.base(), requested_range.size(), PAGE_SIZE, move(vmobject), offset_in_vmobject, name, prot, shared);
184}
185
186ErrorOr<Region*> AddressSpace::allocate_region_with_vmobject(RandomizeVirtualAddress randomize_virtual_address, VirtualAddress requested_address, size_t requested_size, size_t requested_alignment, NonnullLockRefPtr<VMObject> vmobject, size_t offset_in_vmobject, StringView name, int prot, bool shared)
187{
188 if (!requested_address.is_page_aligned())
189 return EINVAL;
190 auto size = TRY(page_round_up(requested_size));
191 auto alignment = TRY(page_round_up(requested_alignment));
192
193 if (Checked<size_t>::addition_would_overflow(offset_in_vmobject, requested_size))
194 return EOVERFLOW;
195
196 size_t end_in_vmobject = offset_in_vmobject + requested_size;
197 if (offset_in_vmobject >= vmobject->size()) {
198 dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.");
199 return EINVAL;
200 }
201 if (end_in_vmobject > vmobject->size()) {
202 dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.");
203 return EINVAL;
204 }
205 offset_in_vmobject &= PAGE_MASK;
206 OwnPtr<KString> region_name;
207 if (!name.is_null())
208 region_name = TRY(KString::try_create(name));
209
210 auto region = TRY(Region::create_unplaced(move(vmobject), offset_in_vmobject, move(region_name), prot_to_region_access_flags(prot), Region::Cacheable::Yes, shared));
211
212 if (requested_address.is_null())
213 TRY(m_region_tree.place_anywhere(*region, randomize_virtual_address, size, alignment));
214 else
215 TRY(m_region_tree.place_specifically(*region, VirtualRange { VirtualAddress { requested_address }, size }));
216
217 ArmedScopeGuard remove_region_from_tree_on_failure = [&] {
218 // At this point the region is already part of the Process region tree, so we have to make sure
219 // we remove it from the tree before returning an error, or else the Region tree will contain
220 // a dangling pointer to the free'd Region instance
221 m_region_tree.remove(*region);
222 };
223
224 if (prot == PROT_NONE) {
225 // For PROT_NONE mappings, we don't have to set up any page table mappings.
226 // We do still need to attach the region to the page_directory though.
227 region->set_page_directory(page_directory());
228 } else {
229 TRY(region->map(page_directory(), ShouldFlushTLB::No));
230 }
231 remove_region_from_tree_on_failure.disarm();
232 return region.leak_ptr();
233}
234
235void AddressSpace::deallocate_region(Region& region)
236{
237 (void)take_region(region);
238}
239
240NonnullOwnPtr<Region> AddressSpace::take_region(Region& region)
241{
242 auto did_remove = m_region_tree.remove(region);
243 VERIFY(did_remove);
244 return NonnullOwnPtr { NonnullOwnPtr<Region>::Adopt, region };
245}
246
247Region* AddressSpace::find_region_from_range(VirtualRange const& range)
248{
249 auto* found_region = m_region_tree.regions().find(range.base().get());
250 if (!found_region)
251 return nullptr;
252 auto& region = *found_region;
253 auto rounded_range_size = page_round_up(range.size());
254 if (rounded_range_size.is_error() || region.size() != rounded_range_size.value())
255 return nullptr;
256 return ®ion;
257}
258
259Region* AddressSpace::find_region_containing(VirtualRange const& range)
260{
261 return m_region_tree.find_region_containing(range);
262}
263
264ErrorOr<Vector<Region*, 4>> AddressSpace::find_regions_intersecting(VirtualRange const& range)
265{
266 Vector<Region*, 4> regions = {};
267 size_t total_size_collected = 0;
268
269 auto* found_region = m_region_tree.regions().find_largest_not_above(range.base().get());
270 if (!found_region)
271 return regions;
272 for (auto iter = m_region_tree.regions().begin_from(*found_region); !iter.is_end(); ++iter) {
273 auto const& iter_range = (*iter).range();
274 if (iter_range.base() < range.end() && iter_range.end() > range.base()) {
275 TRY(regions.try_append(&*iter));
276
277 total_size_collected += (*iter).size() - iter_range.intersect(range).size();
278 if (total_size_collected == range.size())
279 break;
280 }
281 }
282
283 return regions;
284}
285
286// Carve out a virtual address range from a region and return the two regions on either side
287ErrorOr<Vector<Region*, 2>> AddressSpace::try_split_region_around_range(Region const& source_region, VirtualRange const& desired_range)
288{
289 VirtualRange old_region_range = source_region.range();
290 auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
291
292 VERIFY(!remaining_ranges_after_unmap.is_empty());
293 auto try_make_replacement_region = [&](VirtualRange const& new_range) -> ErrorOr<Region*> {
294 VERIFY(old_region_range.contains(new_range));
295 size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
296 return try_allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
297 };
298 Vector<Region*, 2> new_regions;
299 for (auto& new_range : remaining_ranges_after_unmap) {
300 auto* new_region = TRY(try_make_replacement_region(new_range));
301 new_regions.unchecked_append(new_region);
302 }
303 return new_regions;
304}
305
306void AddressSpace::dump_regions()
307{
308 dbgln("Process regions:");
309 char const* addr_padding = " ";
310 dbgln("BEGIN{} END{} SIZE{} ACCESS NAME",
311 addr_padding, addr_padding, addr_padding);
312
313 for (auto const& region : m_region_tree.regions()) {
314 dbgln("{:p} -- {:p} {:p} {:c}{:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(),
315 region.is_readable() ? 'R' : ' ',
316 region.is_writable() ? 'W' : ' ',
317 region.is_executable() ? 'X' : ' ',
318 region.is_shared() ? 'S' : ' ',
319 region.is_stack() ? 'T' : ' ',
320 region.is_syscall_region() ? 'C' : ' ',
321 region.name());
322 }
323 MM.dump_kernel_regions();
324}
325
326void AddressSpace::remove_all_regions(Badge<Process>)
327{
328 VERIFY(Thread::current() == g_finalizer);
329 {
330 SpinlockLocker pd_locker(m_page_directory->get_lock());
331 for (auto& region : m_region_tree.regions())
332 region.unmap_with_locks_held(ShouldFlushTLB::No, pd_locker);
333 }
334
335 m_region_tree.delete_all_regions_assuming_they_are_unmapped();
336}
337
338size_t AddressSpace::amount_dirty_private() const
339{
340 // FIXME: This gets a bit more complicated for Regions sharing the same underlying VMObject.
341 // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping.
342 // That's probably a situation that needs to be looked at in general.
343 size_t amount = 0;
344 for (auto const& region : m_region_tree.regions()) {
345 if (!region.is_shared())
346 amount += region.amount_dirty();
347 }
348 return amount;
349}
350
351ErrorOr<size_t> AddressSpace::amount_clean_inode() const
352{
353 HashTable<LockRefPtr<InodeVMObject>> vmobjects;
354 for (auto const& region : m_region_tree.regions()) {
355 if (region.vmobject().is_inode())
356 TRY(vmobjects.try_set(&static_cast<InodeVMObject const&>(region.vmobject())));
357 }
358 size_t amount = 0;
359 for (auto& vmobject : vmobjects)
360 amount += vmobject->amount_clean();
361 return amount;
362}
363
364size_t AddressSpace::amount_virtual() const
365{
366 size_t amount = 0;
367 for (auto const& region : m_region_tree.regions()) {
368 amount += region.size();
369 }
370 return amount;
371}
372
373size_t AddressSpace::amount_resident() const
374{
375 // FIXME: This will double count if multiple regions use the same physical page.
376 size_t amount = 0;
377 for (auto const& region : m_region_tree.regions()) {
378 amount += region.amount_resident();
379 }
380 return amount;
381}
382
383size_t AddressSpace::amount_shared() const
384{
385 // FIXME: This will double count if multiple regions use the same physical page.
386 // FIXME: It doesn't work at the moment, since it relies on PhysicalPage ref counts,
387 // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored
388 // so that every Region contributes +1 ref to each of its PhysicalPages.
389 size_t amount = 0;
390 for (auto const& region : m_region_tree.regions()) {
391 amount += region.amount_shared();
392 }
393 return amount;
394}
395
396size_t AddressSpace::amount_purgeable_volatile() const
397{
398 size_t amount = 0;
399 for (auto const& region : m_region_tree.regions()) {
400 if (!region.vmobject().is_anonymous())
401 continue;
402 auto const& vmobject = static_cast<AnonymousVMObject const&>(region.vmobject());
403 if (vmobject.is_purgeable() && vmobject.is_volatile())
404 amount += region.amount_resident();
405 }
406 return amount;
407}
408
409size_t AddressSpace::amount_purgeable_nonvolatile() const
410{
411 size_t amount = 0;
412 for (auto const& region : m_region_tree.regions()) {
413 if (!region.vmobject().is_anonymous())
414 continue;
415 auto const& vmobject = static_cast<AnonymousVMObject const&>(region.vmobject());
416 if (vmobject.is_purgeable() && !vmobject.is_volatile())
417 amount += region.amount_resident();
418 }
419 return amount;
420}
421
422}