Serenity Operating System
at master 589 lines 19 kB view raw
1/* 2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/Assertions.h> 8#include <AK/Types.h> 9#include <Kernel/Arch/PageDirectory.h> 10#include <Kernel/Debug.h> 11#include <Kernel/Heap/Heap.h> 12#include <Kernel/Heap/kmalloc.h> 13#include <Kernel/KSyms.h> 14#include <Kernel/Locking/Spinlock.h> 15#include <Kernel/Memory/MemoryManager.h> 16#include <Kernel/Panic.h> 17#include <Kernel/PerformanceManager.h> 18#include <Kernel/Sections.h> 19#include <Kernel/StdLib.h> 20 21#if ARCH(X86_64) || ARCH(AARCH64) 22static constexpr size_t CHUNK_SIZE = 64; 23#else 24# error Unknown architecture 25#endif 26static_assert(is_power_of_two(CHUNK_SIZE)); 27 28static constexpr size_t INITIAL_KMALLOC_MEMORY_SIZE = 2 * MiB; 29static constexpr size_t KMALLOC_DEFAULT_ALIGNMENT = 16; 30 31// Treat the heap as logically separate from .bss 32__attribute__((section(".heap"))) static u8 initial_kmalloc_memory[INITIAL_KMALLOC_MEMORY_SIZE]; 33 34namespace std { 35const nothrow_t nothrow; 36} 37 38// FIXME: Figure out whether this can be MemoryManager. 39static RecursiveSpinlock<LockRank::None> s_lock {}; // needs to be recursive because of dump_backtrace() 40 41struct KmallocSubheap { 42 KmallocSubheap(u8* base, size_t size) 43 : allocator(base, size) 44 { 45 } 46 47 IntrusiveListNode<KmallocSubheap> list_node; 48 using List = IntrusiveList<&KmallocSubheap::list_node>; 49 Heap<CHUNK_SIZE, KMALLOC_SCRUB_BYTE, KFREE_SCRUB_BYTE> allocator; 50}; 51 52class KmallocSlabBlock { 53public: 54 static constexpr size_t block_size = 64 * KiB; 55 static constexpr FlatPtr block_mask = ~(block_size - 1); 56 57 KmallocSlabBlock(size_t slab_size) 58 : m_slab_size(slab_size) 59 , m_slab_count((block_size - sizeof(KmallocSlabBlock)) / slab_size) 60 { 61 for (size_t i = 0; i < m_slab_count; ++i) { 62 auto* freelist_entry = (FreelistEntry*)(void*)(&m_data[i * slab_size]); 63 freelist_entry->next = m_freelist; 64 m_freelist = freelist_entry; 65 } 66 } 67 68 void* allocate() 69 { 70 VERIFY(m_freelist); 71 ++m_allocated_slabs; 72 return exchange(m_freelist, m_freelist->next); 73 } 74 75 void deallocate(void* ptr) 76 { 77 VERIFY(ptr >= &m_data && ptr < ((u8*)this + block_size)); 78 --m_allocated_slabs; 79 auto* freelist_entry = (FreelistEntry*)ptr; 80 freelist_entry->next = m_freelist; 81 m_freelist = freelist_entry; 82 } 83 84 bool is_full() const 85 { 86 return m_freelist == nullptr; 87 } 88 89 size_t allocated_bytes() const 90 { 91 return m_allocated_slabs * m_slab_size; 92 } 93 94 size_t free_bytes() const 95 { 96 return (m_slab_count - m_allocated_slabs) * m_slab_size; 97 } 98 99 IntrusiveListNode<KmallocSlabBlock> list_node; 100 using List = IntrusiveList<&KmallocSlabBlock::list_node>; 101 102private: 103 struct FreelistEntry { 104 FreelistEntry* next; 105 }; 106 107 FreelistEntry* m_freelist { nullptr }; 108 109 size_t m_slab_size { 0 }; 110 size_t m_slab_count { 0 }; 111 size_t m_allocated_slabs { 0 }; 112 113 [[gnu::aligned(16)]] u8 m_data[]; 114}; 115 116class KmallocSlabheap { 117public: 118 KmallocSlabheap(size_t slab_size) 119 : m_slab_size(slab_size) 120 { 121 } 122 123 size_t slab_size() const { return m_slab_size; } 124 125 void* allocate(CallerWillInitializeMemory caller_will_initialize_memory) 126 { 127 if (m_usable_blocks.is_empty()) { 128 // FIXME: This allocation wastes `block_size` bytes due to the implementation of kmalloc_aligned(). 129 // Handle this with a custom VM+page allocator instead of using kmalloc_aligned(). 130 auto* slot = kmalloc_aligned(KmallocSlabBlock::block_size, KmallocSlabBlock::block_size); 131 if (!slot) { 132 dbgln_if(KMALLOC_DEBUG, "OOM while growing slabheap ({})", m_slab_size); 133 return nullptr; 134 } 135 auto* block = new (slot) KmallocSlabBlock(m_slab_size); 136 m_usable_blocks.append(*block); 137 } 138 auto* block = m_usable_blocks.first(); 139 auto* ptr = block->allocate(); 140 if (block->is_full()) 141 m_full_blocks.append(*block); 142 143 if (caller_will_initialize_memory == CallerWillInitializeMemory::No) { 144 memset(ptr, KMALLOC_SCRUB_BYTE, m_slab_size); 145 } 146 return ptr; 147 } 148 149 void deallocate(void* ptr) 150 { 151 memset(ptr, KFREE_SCRUB_BYTE, m_slab_size); 152 153 auto* block = (KmallocSlabBlock*)((FlatPtr)ptr & KmallocSlabBlock::block_mask); 154 bool block_was_full = block->is_full(); 155 block->deallocate(ptr); 156 if (block_was_full) 157 m_usable_blocks.append(*block); 158 } 159 160 size_t allocated_bytes() const 161 { 162 size_t total = m_full_blocks.size_slow() * KmallocSlabBlock::block_size; 163 for (auto const& slab_block : m_usable_blocks) 164 total += slab_block.allocated_bytes(); 165 return total; 166 } 167 168 size_t free_bytes() const 169 { 170 size_t total = 0; 171 for (auto const& slab_block : m_usable_blocks) 172 total += slab_block.free_bytes(); 173 return total; 174 } 175 176 bool try_purge() 177 { 178 bool did_purge = false; 179 180 // Note: We cannot remove children from the list when using a structured loop, 181 // Because we need to advance the iterator before we delete the underlying 182 // value, so we have to iterate manually 183 184 auto block = m_usable_blocks.begin(); 185 while (block != m_usable_blocks.end()) { 186 if (block->allocated_bytes() != 0) { 187 ++block; 188 continue; 189 } 190 auto& block_to_remove = *block; 191 ++block; 192 block_to_remove.list_node.remove(); 193 block_to_remove.~KmallocSlabBlock(); 194 kfree_sized(&block_to_remove, KmallocSlabBlock::block_size); 195 196 did_purge = true; 197 } 198 return did_purge; 199 } 200 201private: 202 size_t m_slab_size { 0 }; 203 204 KmallocSlabBlock::List m_usable_blocks; 205 KmallocSlabBlock::List m_full_blocks; 206}; 207 208struct KmallocGlobalData { 209 static constexpr size_t minimum_subheap_size = 1 * MiB; 210 211 KmallocGlobalData(u8* initial_heap, size_t initial_heap_size) 212 { 213 add_subheap(initial_heap, initial_heap_size); 214 } 215 216 void add_subheap(u8* storage, size_t storage_size) 217 { 218 dbgln_if(KMALLOC_DEBUG, "Adding kmalloc subheap @ {} with size {}", storage, storage_size); 219 static_assert(sizeof(KmallocSubheap) <= PAGE_SIZE); 220 auto* subheap = new (storage) KmallocSubheap(storage + PAGE_SIZE, storage_size - PAGE_SIZE); 221 subheaps.append(*subheap); 222 } 223 224 void* allocate(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory) 225 { 226 VERIFY(!expansion_in_progress); 227 228 for (auto& slabheap : slabheaps) { 229 if (size <= slabheap.slab_size() && alignment <= slabheap.slab_size()) 230 return slabheap.allocate(caller_will_initialize_memory); 231 } 232 233 for (auto& subheap : subheaps) { 234 if (auto* ptr = subheap.allocator.allocate(size, alignment, caller_will_initialize_memory)) 235 return ptr; 236 } 237 238 // NOTE: This size calculation is a mirror of kmalloc_aligned(KmallocSlabBlock) 239 if (size <= KmallocSlabBlock::block_size * 2 + sizeof(ptrdiff_t) + sizeof(size_t)) { 240 // FIXME: We should propagate a freed pointer, to find the specific subheap it belonged to 241 // This would save us iterating over them in the next step and remove a recursion 242 bool did_purge = false; 243 for (auto& slabheap : slabheaps) { 244 if (slabheap.try_purge()) { 245 dbgln_if(KMALLOC_DEBUG, "Kmalloc purged block(s) from slabheap of size {} to avoid expansion", slabheap.slab_size()); 246 did_purge = true; 247 break; 248 } 249 } 250 if (did_purge) 251 return allocate(size, alignment, caller_will_initialize_memory); 252 } 253 254 if (!try_expand(size)) { 255 dbgln_if(KMALLOC_DEBUG, "OOM when trying to expand kmalloc heap"); 256 return nullptr; 257 } 258 259 return allocate(size, alignment, caller_will_initialize_memory); 260 } 261 262 void deallocate(void* ptr, size_t size) 263 { 264 VERIFY(!expansion_in_progress); 265 VERIFY(is_valid_kmalloc_address(VirtualAddress { ptr })); 266 267 for (auto& slabheap : slabheaps) { 268 if (size <= slabheap.slab_size()) 269 return slabheap.deallocate(ptr); 270 } 271 272 for (auto& subheap : subheaps) { 273 if (subheap.allocator.contains(ptr)) { 274 subheap.allocator.deallocate(ptr); 275 return; 276 } 277 } 278 279 PANIC("Bogus pointer passed to kfree_sized({:p}, {})", ptr, size); 280 } 281 282 size_t allocated_bytes() const 283 { 284 size_t total = 0; 285 for (auto const& subheap : subheaps) 286 total += subheap.allocator.allocated_bytes(); 287 for (auto const& slabheap : slabheaps) 288 total += slabheap.allocated_bytes(); 289 return total; 290 } 291 292 size_t free_bytes() const 293 { 294 size_t total = 0; 295 for (auto const& subheap : subheaps) 296 total += subheap.allocator.free_bytes(); 297 for (auto const& slabheap : slabheaps) 298 total += slabheap.free_bytes(); 299 return total; 300 } 301 302 bool try_expand(size_t allocation_request) 303 { 304 VERIFY(!expansion_in_progress); 305 TemporaryChange change(expansion_in_progress, true); 306 307 auto new_subheap_base = expansion_data->next_virtual_address; 308 Checked<size_t> padded_allocation_request = allocation_request; 309 padded_allocation_request *= 2; 310 padded_allocation_request += PAGE_SIZE; 311 if (padded_allocation_request.has_overflow()) { 312 PANIC("Integer overflow during kmalloc heap expansion"); 313 } 314 auto rounded_allocation_request = Memory::page_round_up(padded_allocation_request.value()); 315 if (rounded_allocation_request.is_error()) { 316 PANIC("Integer overflow computing pages for kmalloc heap expansion"); 317 } 318 size_t new_subheap_size = max(minimum_subheap_size, rounded_allocation_request.value()); 319 320 dbgln_if(KMALLOC_DEBUG, "Unable to allocate {}, expanding kmalloc heap", allocation_request); 321 322 if (!expansion_data->virtual_range.contains(new_subheap_base, new_subheap_size)) { 323 dbgln_if(KMALLOC_DEBUG, "Out of address space when expanding kmalloc heap"); 324 return false; 325 } 326 327 auto physical_pages_or_error = MM.commit_physical_pages(new_subheap_size / PAGE_SIZE); 328 if (physical_pages_or_error.is_error()) { 329 dbgln_if(KMALLOC_DEBUG, "Out of address space when expanding kmalloc heap"); 330 return false; 331 } 332 auto physical_pages = physical_pages_or_error.release_value(); 333 334 expansion_data->next_virtual_address = expansion_data->next_virtual_address.offset(new_subheap_size); 335 336 auto cpu_supports_nx = Processor::current().has_nx(); 337 338 SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock()); 339 340 for (auto vaddr = new_subheap_base; !physical_pages.is_empty(); vaddr = vaddr.offset(PAGE_SIZE)) { 341 // FIXME: We currently leak physical memory when mapping it into the kmalloc heap. 342 auto& page = physical_pages.take_one().leak_ref(); 343 auto* pte = MM.pte(MM.kernel_page_directory(), vaddr); 344 VERIFY(pte); 345 pte->set_physical_page_base(page.paddr().get()); 346 pte->set_global(true); 347 pte->set_user_allowed(false); 348 pte->set_writable(true); 349 if (cpu_supports_nx) 350 pte->set_execute_disabled(true); 351 pte->set_present(true); 352 } 353 354 add_subheap(new_subheap_base.as_ptr(), new_subheap_size); 355 return true; 356 } 357 358 void enable_expansion() 359 { 360 // FIXME: This range can be much bigger on 64-bit, but we need to figure something out for 32-bit. 361 auto reserved_region = MUST(MM.allocate_unbacked_region_anywhere(64 * MiB, 1 * MiB)); 362 363 expansion_data = KmallocGlobalData::ExpansionData { 364 .virtual_range = reserved_region->range(), 365 .next_virtual_address = reserved_region->range().base(), 366 }; 367 368 // Make sure the entire kmalloc VM range is backed by page tables. 369 // This avoids having to deal with lazy page table allocation during heap expansion. 370 SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock()); 371 for (auto vaddr = reserved_region->range().base(); vaddr < reserved_region->range().end(); vaddr = vaddr.offset(PAGE_SIZE)) { 372 MM.ensure_pte(MM.kernel_page_directory(), vaddr); 373 } 374 375 (void)reserved_region.leak_ptr(); 376 } 377 378 struct ExpansionData { 379 Memory::VirtualRange virtual_range; 380 VirtualAddress next_virtual_address; 381 }; 382 Optional<ExpansionData> expansion_data; 383 384 bool is_valid_kmalloc_address(VirtualAddress vaddr) const 385 { 386 if (vaddr.as_ptr() >= initial_kmalloc_memory && vaddr.as_ptr() < (initial_kmalloc_memory + INITIAL_KMALLOC_MEMORY_SIZE)) 387 return true; 388 389 if (!expansion_data.has_value()) 390 return false; 391 392 return expansion_data->virtual_range.contains(vaddr); 393 } 394 395 KmallocSubheap::List subheaps; 396 397 KmallocSlabheap slabheaps[6] = { 16, 32, 64, 128, 256, 512 }; 398 399 bool expansion_in_progress { false }; 400}; 401 402READONLY_AFTER_INIT static KmallocGlobalData* g_kmalloc_global; 403alignas(KmallocGlobalData) static u8 g_kmalloc_global_heap[sizeof(KmallocGlobalData)]; 404 405static size_t g_kmalloc_call_count; 406static size_t g_kfree_call_count; 407static size_t g_nested_kfree_calls; 408bool g_dump_kmalloc_stacks; 409 410void kmalloc_enable_expand() 411{ 412 g_kmalloc_global->enable_expansion(); 413} 414 415UNMAP_AFTER_INIT void kmalloc_init() 416{ 417 // Zero out heap since it's placed after end_of_kernel_bss. 418 memset(initial_kmalloc_memory, 0, sizeof(initial_kmalloc_memory)); 419 g_kmalloc_global = new (g_kmalloc_global_heap) KmallocGlobalData(initial_kmalloc_memory, sizeof(initial_kmalloc_memory)); 420 421 s_lock.initialize(); 422} 423 424static void* kmalloc_impl(size_t size, size_t alignment, CallerWillInitializeMemory caller_will_initialize_memory) 425{ 426 // Catch bad callers allocating under spinlock. 427 if constexpr (KMALLOC_VERIFY_NO_SPINLOCK_HELD) { 428 Processor::verify_no_spinlocks_held(); 429 } 430 431 // Alignment must be a power of two. 432 VERIFY(is_power_of_two(alignment)); 433 434 SpinlockLocker lock(s_lock); 435 ++g_kmalloc_call_count; 436 437 if (g_dump_kmalloc_stacks && Kernel::g_kernel_symbols_available) { 438 dbgln("kmalloc({})", size); 439 Kernel::dump_backtrace(); 440 } 441 442 void* ptr = g_kmalloc_global->allocate(size, alignment, caller_will_initialize_memory); 443 444 Thread* current_thread = Thread::current(); 445 if (!current_thread) 446 current_thread = Processor::idle_thread(); 447 if (current_thread) { 448 // FIXME: By the time we check this, we have already allocated above. 449 // This means that in the case of an infinite recursion, we can't catch it this way. 450 VERIFY(current_thread->is_allocation_enabled()); 451 PerformanceManager::add_kmalloc_perf_event(*current_thread, size, (FlatPtr)ptr); 452 } 453 454 return ptr; 455} 456 457void* kmalloc(size_t size) 458{ 459 return kmalloc_impl(size, KMALLOC_DEFAULT_ALIGNMENT, CallerWillInitializeMemory::No); 460} 461 462void* kcalloc(size_t count, size_t size) 463{ 464 if (Checked<size_t>::multiplication_would_overflow(count, size)) 465 return nullptr; 466 size_t new_size = count * size; 467 auto* ptr = kmalloc_impl(new_size, KMALLOC_DEFAULT_ALIGNMENT, CallerWillInitializeMemory::Yes); 468 if (ptr) 469 memset(ptr, 0, new_size); 470 return ptr; 471} 472 473void kfree_sized(void* ptr, size_t size) 474{ 475 if (!ptr) 476 return; 477 478 VERIFY(size > 0); 479 480 // Catch bad callers allocating under spinlock. 481 if constexpr (KMALLOC_VERIFY_NO_SPINLOCK_HELD) { 482 Processor::verify_no_spinlocks_held(); 483 } 484 485 SpinlockLocker lock(s_lock); 486 ++g_kfree_call_count; 487 ++g_nested_kfree_calls; 488 489 if (g_nested_kfree_calls == 1) { 490 Thread* current_thread = Thread::current(); 491 if (!current_thread) 492 current_thread = Processor::idle_thread(); 493 if (current_thread) { 494 VERIFY(current_thread->is_allocation_enabled()); 495 PerformanceManager::add_kfree_perf_event(*current_thread, 0, (FlatPtr)ptr); 496 } 497 } 498 499 g_kmalloc_global->deallocate(ptr, size); 500 --g_nested_kfree_calls; 501} 502 503size_t kmalloc_good_size(size_t size) 504{ 505 VERIFY(size > 0); 506 // NOTE: There's no need to take the kmalloc lock, as the kmalloc slab-heaps (and their sizes) are constant 507 for (auto const& slabheap : g_kmalloc_global->slabheaps) { 508 if (size <= slabheap.slab_size()) 509 return slabheap.slab_size(); 510 } 511 return round_up_to_power_of_two(size + Heap<CHUNK_SIZE>::AllocationHeaderSize, CHUNK_SIZE) - Heap<CHUNK_SIZE>::AllocationHeaderSize; 512} 513 514void* kmalloc_aligned(size_t size, size_t alignment) 515{ 516 return kmalloc_impl(size, alignment, CallerWillInitializeMemory::No); 517} 518 519void* operator new(size_t size) 520{ 521 void* ptr = kmalloc(size); 522 VERIFY(ptr); 523 return ptr; 524} 525 526void* operator new(size_t size, std::nothrow_t const&) noexcept 527{ 528 return kmalloc(size); 529} 530 531void* operator new(size_t size, std::align_val_t al) 532{ 533 void* ptr = kmalloc_aligned(size, (size_t)al); 534 VERIFY(ptr); 535 return ptr; 536} 537 538void* operator new(size_t size, std::align_val_t al, std::nothrow_t const&) noexcept 539{ 540 return kmalloc_aligned(size, (size_t)al); 541} 542 543void* operator new[](size_t size) 544{ 545 void* ptr = kmalloc(size); 546 VERIFY(ptr); 547 return ptr; 548} 549 550void* operator new[](size_t size, std::nothrow_t const&) noexcept 551{ 552 return kmalloc(size); 553} 554 555void operator delete(void*) noexcept 556{ 557 // All deletes in kernel code should have a known size. 558 VERIFY_NOT_REACHED(); 559} 560 561void operator delete(void* ptr, size_t size) noexcept 562{ 563 return kfree_sized(ptr, size); 564} 565 566void operator delete(void* ptr, size_t size, std::align_val_t) noexcept 567{ 568 return kfree_sized(ptr, size); 569} 570 571void operator delete[](void*) noexcept 572{ 573 // All deletes in kernel code should have a known size. 574 VERIFY_NOT_REACHED(); 575} 576 577void operator delete[](void* ptr, size_t size) noexcept 578{ 579 return kfree_sized(ptr, size); 580} 581 582void get_kmalloc_stats(kmalloc_stats& stats) 583{ 584 SpinlockLocker lock(s_lock); 585 stats.bytes_allocated = g_kmalloc_global->allocated_bytes(); 586 stats.bytes_free = g_kmalloc_global->free_bytes(); 587 stats.kmalloc_call_count = g_kmalloc_call_count; 588 stats.kfree_call_count = g_kfree_call_count; 589}