Serenity Operating System
1/*
2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/Assertions.h>
8#include <AK/Types.h>
9#include <Kernel/Arch/PageDirectory.h>
10#include <Kernel/Debug.h>
11#include <Kernel/Heap/Heap.h>
12#include <Kernel/Heap/kmalloc.h>
13#include <Kernel/KSyms.h>
14#include <Kernel/Locking/Spinlock.h>
15#include <Kernel/Memory/MemoryManager.h>
16#include <Kernel/Panic.h>
17#include <Kernel/PerformanceManager.h>
18#include <Kernel/Sections.h>
19#include <Kernel/StdLib.h>
20
21#if ARCH(X86_64) || ARCH(AARCH64)
22static constexpr size_t CHUNK_SIZE = 64;
23#else
24# error Unknown architecture
25#endif
26static_assert(is_power_of_two(CHUNK_SIZE));
27
28static constexpr size_t INITIAL_KMALLOC_MEMORY_SIZE = 2 * MiB;
29static constexpr size_t KMALLOC_DEFAULT_ALIGNMENT = 16;
30
31// Treat the heap as logically separate from .bss
32__attribute__((section(".heap"))) static u8 initial_kmalloc_memory[INITIAL_KMALLOC_MEMORY_SIZE];
33
34namespace std {
35const nothrow_t nothrow;
36}
37
38// FIXME: Figure out whether this can be MemoryManager.
39static RecursiveSpinlock<LockRank::None> s_lock {}; // needs to be recursive because of dump_backtrace()
40
41struct KmallocSubheap {
42 KmallocSubheap(u8* base, size_t size)
43 : allocator(base, size)
44 {
45 }
46
47 IntrusiveListNode<KmallocSubheap> list_node;
48 using List = IntrusiveList<&KmallocSubheap::list_node>;
49 Heap<CHUNK_SIZE, KMALLOC_SCRUB_BYTE, KFREE_SCRUB_BYTE> allocator;
50};
51
52class KmallocSlabBlock {
53public:
54 static constexpr size_t block_size = 64 * KiB;
55 static constexpr FlatPtr block_mask = ~(block_size - 1);
56
57 KmallocSlabBlock(size_t slab_size)
58 : m_slab_size(slab_size)
59 , m_slab_count((block_size - sizeof(KmallocSlabBlock)) / slab_size)
60 {
61 for (size_t i = 0; i < m_slab_count; ++i) {
62 auto* freelist_entry = (FreelistEntry*)(void*)(&m_data[i * slab_size]);
63 freelist_entry->next = m_freelist;
64 m_freelist = freelist_entry;
65 }
66 }
67
68 void* allocate()
69 {
70 VERIFY(m_freelist);
71 ++m_allocated_slabs;
72 return exchange(m_freelist, m_freelist->next);
73 }
74
75 void deallocate(void* ptr)
76 {
77 VERIFY(ptr >= &m_data && ptr < ((u8*)this + block_size));
78 --m_allocated_slabs;
79 auto* freelist_entry = (FreelistEntry*)ptr;
80 freelist_entry->next = m_freelist;
81 m_freelist = freelist_entry;
82 }
83
84 bool is_full() const
85 {
86 return m_freelist == nullptr;
87 }
88
89 size_t allocated_bytes() const
90 {
91 return m_allocated_slabs * m_slab_size;
92 }
93
94 size_t free_bytes() const
95 {
96 return (m_slab_count - m_allocated_slabs) * m_slab_size;
97 }
98
99 IntrusiveListNode<KmallocSlabBlock> list_node;
100 using List = IntrusiveList<&KmallocSlabBlock::list_node>;
101
102private:
103 struct FreelistEntry {
104 FreelistEntry* next;
105 };
106
107 FreelistEntry* m_freelist { nullptr };
108
109 size_t m_slab_size { 0 };
110 size_t m_slab_count { 0 };
111 size_t m_allocated_slabs { 0 };
112
113 [[gnu::aligned(16)]] u8 m_data[];
114};
115
116class KmallocSlabheap {
117public:
118 KmallocSlabheap(size_t slab_size)
119 : m_slab_size(slab_size)
120 {
121 }
122
123 size_t slab_size() const { return m_slab_size; }
124
125 void* allocate(CallerWillInitializeMemory caller_will_initialize_memory)
126 {
127 if (m_usable_blocks.is_empty()) {
128 // FIXME: This allocation wastes `block_size` bytes due to the implementation of kmalloc_aligned().
129 // Handle this with a custom VM+page allocator instead of using kmalloc_aligned().
130 auto* slot = kmalloc_aligned(KmallocSlabBlock::block_size, KmallocSlabBlock::block_size);
131 if (!slot) {
132 dbgln_if(KMALLOC_DEBUG, "OOM while growing slabheap ({})", m_slab_size);
133 return nullptr;
134 }
135 auto* block = new (slot) KmallocSlabBlock(m_slab_size);
136 m_usable_blocks.append(*block);
137 }
138 auto* block = m_usable_blocks.first();
139 auto* ptr = block->allocate();
140 if (block->is_full())
141 m_full_blocks.append(*block);
142
143 if (caller_will_initialize_memory == CallerWillInitializeMemory::No) {
144 memset(ptr, KMALLOC_SCRUB_BYTE, m_slab_size);
145 }
146 return ptr;
147 }
148
149 void deallocate(void* ptr)
150 {
151 memset(ptr, KFREE_SCRUB_BYTE, m_slab_size);
152
153 auto* block = (KmallocSlabBlock*)((FlatPtr)ptr & KmallocSlabBlock::block_mask);
154 bool block_was_full = block->is_full();
155 block->deallocate(ptr);
156 if (block_was_full)
157 m_usable_blocks.append(*block);
158 }
159
160 size_t allocated_bytes() const
161 {
162 size_t total = m_full_blocks.size_slow() * KmallocSlabBlock::block_size;
163 for (auto const& slab_block : m_usable_blocks)
164 total += slab_block.allocated_bytes();
165 return total;
166 }
167
168 size_t free_bytes() const
169 {
170 size_t total = 0;
171 for (auto const& slab_block : m_usable_blocks)
172 total += slab_block.free_bytes();
173 return total;
174 }
175
176 bool try_purge()
177 {
178 bool did_purge = false;
179
180 // Note: We cannot remove children from the list when using a structured loop,
181 // Because we need to advance the iterator before we delete the underlying
182 // value, so we have to iterate manually
183
184 auto block = m_usable_blocks.begin();
185 while (block != m_usable_blocks.end()) {
186 if (block->allocated_bytes() != 0) {
187 ++block;
188 continue;
189 }
190 auto& block_to_remove = *block;
191 ++block;
192 block_to_remove.list_node.remove();
193 block_to_remove.~KmallocSlabBlock();
194 kfree_sized(&block_to_remove, KmallocSlabBlock::block_size);
195
196 did_purge = true;
197 }
198 return did_purge;
199 }
200
201private:
202 size_t m_slab_size { 0 };
203
204 KmallocSlabBlock::List m_usable_blocks;
205 KmallocSlabBlock::List m_full_blocks;
206};
207
208struct KmallocGlobalData {
209 static constexpr size_t minimum_subheap_size = 1 * MiB;
210
211 KmallocGlobalData(u8* initial_heap, size_t initial_heap_size)
212 {
213 add_subheap(initial_heap, initial_heap_size);
214 }
215
216 void add_subheap(u8* storage, size_t storage_size)
217 {
218 dbgln_if(KMALLOC_DEBUG, "Adding kmalloc subheap @ {} with size {}", storage, storage_size);
219 static_assert(sizeof(KmallocSubheap) <= PAGE_SIZE);
220 auto* subheap = new (storage) KmallocSubheap(storage + PAGE_SIZE, storage_size - PAGE_SIZE);
221 subheaps.append(*subheap);
222 }
223
224 void* allocate(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory)
225 {
226 VERIFY(!expansion_in_progress);
227
228 for (auto& slabheap : slabheaps) {
229 if (size <= slabheap.slab_size() && alignment <= slabheap.slab_size())
230 return slabheap.allocate(caller_will_initialize_memory);
231 }
232
233 for (auto& subheap : subheaps) {
234 if (auto* ptr = subheap.allocator.allocate(size, alignment, caller_will_initialize_memory))
235 return ptr;
236 }
237
238 // NOTE: This size calculation is a mirror of kmalloc_aligned(KmallocSlabBlock)
239 if (size <= KmallocSlabBlock::block_size * 2 + sizeof(ptrdiff_t) + sizeof(size_t)) {
240 // FIXME: We should propagate a freed pointer, to find the specific subheap it belonged to
241 // This would save us iterating over them in the next step and remove a recursion
242 bool did_purge = false;
243 for (auto& slabheap : slabheaps) {
244 if (slabheap.try_purge()) {
245 dbgln_if(KMALLOC_DEBUG, "Kmalloc purged block(s) from slabheap of size {} to avoid expansion", slabheap.slab_size());
246 did_purge = true;
247 break;
248 }
249 }
250 if (did_purge)
251 return allocate(size, alignment, caller_will_initialize_memory);
252 }
253
254 if (!try_expand(size)) {
255 dbgln_if(KMALLOC_DEBUG, "OOM when trying to expand kmalloc heap");
256 return nullptr;
257 }
258
259 return allocate(size, alignment, caller_will_initialize_memory);
260 }
261
262 void deallocate(void* ptr, size_t size)
263 {
264 VERIFY(!expansion_in_progress);
265 VERIFY(is_valid_kmalloc_address(VirtualAddress { ptr }));
266
267 for (auto& slabheap : slabheaps) {
268 if (size <= slabheap.slab_size())
269 return slabheap.deallocate(ptr);
270 }
271
272 for (auto& subheap : subheaps) {
273 if (subheap.allocator.contains(ptr)) {
274 subheap.allocator.deallocate(ptr);
275 return;
276 }
277 }
278
279 PANIC("Bogus pointer passed to kfree_sized({:p}, {})", ptr, size);
280 }
281
282 size_t allocated_bytes() const
283 {
284 size_t total = 0;
285 for (auto const& subheap : subheaps)
286 total += subheap.allocator.allocated_bytes();
287 for (auto const& slabheap : slabheaps)
288 total += slabheap.allocated_bytes();
289 return total;
290 }
291
292 size_t free_bytes() const
293 {
294 size_t total = 0;
295 for (auto const& subheap : subheaps)
296 total += subheap.allocator.free_bytes();
297 for (auto const& slabheap : slabheaps)
298 total += slabheap.free_bytes();
299 return total;
300 }
301
302 bool try_expand(size_t allocation_request)
303 {
304 VERIFY(!expansion_in_progress);
305 TemporaryChange change(expansion_in_progress, true);
306
307 auto new_subheap_base = expansion_data->next_virtual_address;
308 Checked<size_t> padded_allocation_request = allocation_request;
309 padded_allocation_request *= 2;
310 padded_allocation_request += PAGE_SIZE;
311 if (padded_allocation_request.has_overflow()) {
312 PANIC("Integer overflow during kmalloc heap expansion");
313 }
314 auto rounded_allocation_request = Memory::page_round_up(padded_allocation_request.value());
315 if (rounded_allocation_request.is_error()) {
316 PANIC("Integer overflow computing pages for kmalloc heap expansion");
317 }
318 size_t new_subheap_size = max(minimum_subheap_size, rounded_allocation_request.value());
319
320 dbgln_if(KMALLOC_DEBUG, "Unable to allocate {}, expanding kmalloc heap", allocation_request);
321
322 if (!expansion_data->virtual_range.contains(new_subheap_base, new_subheap_size)) {
323 dbgln_if(KMALLOC_DEBUG, "Out of address space when expanding kmalloc heap");
324 return false;
325 }
326
327 auto physical_pages_or_error = MM.commit_physical_pages(new_subheap_size / PAGE_SIZE);
328 if (physical_pages_or_error.is_error()) {
329 dbgln_if(KMALLOC_DEBUG, "Out of address space when expanding kmalloc heap");
330 return false;
331 }
332 auto physical_pages = physical_pages_or_error.release_value();
333
334 expansion_data->next_virtual_address = expansion_data->next_virtual_address.offset(new_subheap_size);
335
336 auto cpu_supports_nx = Processor::current().has_nx();
337
338 SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock());
339
340 for (auto vaddr = new_subheap_base; !physical_pages.is_empty(); vaddr = vaddr.offset(PAGE_SIZE)) {
341 // FIXME: We currently leak physical memory when mapping it into the kmalloc heap.
342 auto& page = physical_pages.take_one().leak_ref();
343 auto* pte = MM.pte(MM.kernel_page_directory(), vaddr);
344 VERIFY(pte);
345 pte->set_physical_page_base(page.paddr().get());
346 pte->set_global(true);
347 pte->set_user_allowed(false);
348 pte->set_writable(true);
349 if (cpu_supports_nx)
350 pte->set_execute_disabled(true);
351 pte->set_present(true);
352 }
353
354 add_subheap(new_subheap_base.as_ptr(), new_subheap_size);
355 return true;
356 }
357
358 void enable_expansion()
359 {
360 // FIXME: This range can be much bigger on 64-bit, but we need to figure something out for 32-bit.
361 auto reserved_region = MUST(MM.allocate_unbacked_region_anywhere(64 * MiB, 1 * MiB));
362
363 expansion_data = KmallocGlobalData::ExpansionData {
364 .virtual_range = reserved_region->range(),
365 .next_virtual_address = reserved_region->range().base(),
366 };
367
368 // Make sure the entire kmalloc VM range is backed by page tables.
369 // This avoids having to deal with lazy page table allocation during heap expansion.
370 SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock());
371 for (auto vaddr = reserved_region->range().base(); vaddr < reserved_region->range().end(); vaddr = vaddr.offset(PAGE_SIZE)) {
372 MM.ensure_pte(MM.kernel_page_directory(), vaddr);
373 }
374
375 (void)reserved_region.leak_ptr();
376 }
377
378 struct ExpansionData {
379 Memory::VirtualRange virtual_range;
380 VirtualAddress next_virtual_address;
381 };
382 Optional<ExpansionData> expansion_data;
383
384 bool is_valid_kmalloc_address(VirtualAddress vaddr) const
385 {
386 if (vaddr.as_ptr() >= initial_kmalloc_memory && vaddr.as_ptr() < (initial_kmalloc_memory + INITIAL_KMALLOC_MEMORY_SIZE))
387 return true;
388
389 if (!expansion_data.has_value())
390 return false;
391
392 return expansion_data->virtual_range.contains(vaddr);
393 }
394
395 KmallocSubheap::List subheaps;
396
397 KmallocSlabheap slabheaps[6] = { 16, 32, 64, 128, 256, 512 };
398
399 bool expansion_in_progress { false };
400};
401
402READONLY_AFTER_INIT static KmallocGlobalData* g_kmalloc_global;
403alignas(KmallocGlobalData) static u8 g_kmalloc_global_heap[sizeof(KmallocGlobalData)];
404
405static size_t g_kmalloc_call_count;
406static size_t g_kfree_call_count;
407static size_t g_nested_kfree_calls;
408bool g_dump_kmalloc_stacks;
409
410void kmalloc_enable_expand()
411{
412 g_kmalloc_global->enable_expansion();
413}
414
415UNMAP_AFTER_INIT void kmalloc_init()
416{
417 // Zero out heap since it's placed after end_of_kernel_bss.
418 memset(initial_kmalloc_memory, 0, sizeof(initial_kmalloc_memory));
419 g_kmalloc_global = new (g_kmalloc_global_heap) KmallocGlobalData(initial_kmalloc_memory, sizeof(initial_kmalloc_memory));
420
421 s_lock.initialize();
422}
423
424static void* kmalloc_impl(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory)
425{
426 // Catch bad callers allocating under spinlock.
427 if constexpr (KMALLOC_VERIFY_NO_SPINLOCK_HELD) {
428 Processor::verify_no_spinlocks_held();
429 }
430
431 // Alignment must be a power of two.
432 VERIFY(is_power_of_two(alignment));
433
434 SpinlockLocker lock(s_lock);
435 ++g_kmalloc_call_count;
436
437 if (g_dump_kmalloc_stacks && Kernel::g_kernel_symbols_available) {
438 dbgln("kmalloc({})", size);
439 Kernel::dump_backtrace();
440 }
441
442 void* ptr = g_kmalloc_global->allocate(size, alignment, caller_will_initialize_memory);
443
444 Thread* current_thread = Thread::current();
445 if (!current_thread)
446 current_thread = Processor::idle_thread();
447 if (current_thread) {
448 // FIXME: By the time we check this, we have already allocated above.
449 // This means that in the case of an infinite recursion, we can't catch it this way.
450 VERIFY(current_thread->is_allocation_enabled());
451 PerformanceManager::add_kmalloc_perf_event(*current_thread, size, (FlatPtr)ptr);
452 }
453
454 return ptr;
455}
456
457void* kmalloc(size_t size)
458{
459 return kmalloc_impl(size, KMALLOC_DEFAULT_ALIGNMENT, CallerWillInitializeMemory::No);
460}
461
462void* kcalloc(size_t count, size_t size)
463{
464 if (Checked<size_t>::multiplication_would_overflow(count, size))
465 return nullptr;
466 size_t new_size = count * size;
467 auto* ptr = kmalloc_impl(new_size, KMALLOC_DEFAULT_ALIGNMENT, CallerWillInitializeMemory::Yes);
468 if (ptr)
469 memset(ptr, 0, new_size);
470 return ptr;
471}
472
473void kfree_sized(void* ptr, size_t size)
474{
475 if (!ptr)
476 return;
477
478 VERIFY(size > 0);
479
480 // Catch bad callers allocating under spinlock.
481 if constexpr (KMALLOC_VERIFY_NO_SPINLOCK_HELD) {
482 Processor::verify_no_spinlocks_held();
483 }
484
485 SpinlockLocker lock(s_lock);
486 ++g_kfree_call_count;
487 ++g_nested_kfree_calls;
488
489 if (g_nested_kfree_calls == 1) {
490 Thread* current_thread = Thread::current();
491 if (!current_thread)
492 current_thread = Processor::idle_thread();
493 if (current_thread) {
494 VERIFY(current_thread->is_allocation_enabled());
495 PerformanceManager::add_kfree_perf_event(*current_thread, 0, (FlatPtr)ptr);
496 }
497 }
498
499 g_kmalloc_global->deallocate(ptr, size);
500 --g_nested_kfree_calls;
501}
502
503size_t kmalloc_good_size(size_t size)
504{
505 VERIFY(size > 0);
506 // NOTE: There's no need to take the kmalloc lock, as the kmalloc slab-heaps (and their sizes) are constant
507 for (auto const& slabheap : g_kmalloc_global->slabheaps) {
508 if (size <= slabheap.slab_size())
509 return slabheap.slab_size();
510 }
511 return round_up_to_power_of_two(size + Heap<CHUNK_SIZE>::AllocationHeaderSize, CHUNK_SIZE) - Heap<CHUNK_SIZE>::AllocationHeaderSize;
512}
513
514void* kmalloc_aligned(size_t size, size_t alignment)
515{
516 return kmalloc_impl(size, alignment, CallerWillInitializeMemory::No);
517}
518
519void* operator new(size_t size)
520{
521 void* ptr = kmalloc(size);
522 VERIFY(ptr);
523 return ptr;
524}
525
526void* operator new(size_t size, std::nothrow_t const&) noexcept
527{
528 return kmalloc(size);
529}
530
531void* operator new(size_t size, std::align_val_t al)
532{
533 void* ptr = kmalloc_aligned(size, (size_t)al);
534 VERIFY(ptr);
535 return ptr;
536}
537
538void* operator new(size_t size, std::align_val_t al, std::nothrow_t const&) noexcept
539{
540 return kmalloc_aligned(size, (size_t)al);
541}
542
543void* operator new[](size_t size)
544{
545 void* ptr = kmalloc(size);
546 VERIFY(ptr);
547 return ptr;
548}
549
550void* operator new[](size_t size, std::nothrow_t const&) noexcept
551{
552 return kmalloc(size);
553}
554
555void operator delete(void*) noexcept
556{
557 // All deletes in kernel code should have a known size.
558 VERIFY_NOT_REACHED();
559}
560
561void operator delete(void* ptr, size_t size) noexcept
562{
563 return kfree_sized(ptr, size);
564}
565
566void operator delete(void* ptr, size_t size, std::align_val_t) noexcept
567{
568 return kfree_sized(ptr, size);
569}
570
571void operator delete[](void*) noexcept
572{
573 // All deletes in kernel code should have a known size.
574 VERIFY_NOT_REACHED();
575}
576
577void operator delete[](void* ptr, size_t size) noexcept
578{
579 return kfree_sized(ptr, size);
580}
581
582void get_kmalloc_stats(kmalloc_stats& stats)
583{
584 SpinlockLocker lock(s_lock);
585 stats.bytes_allocated = g_kmalloc_global->allocated_bytes();
586 stats.bytes_free = g_kmalloc_global->free_bytes();
587 stats.kmalloc_call_count = g_kmalloc_call_count;
588 stats.kfree_call_count = g_kfree_call_count;
589}