The open source OpenXR runtime
at main 3518 lines 126 kB view raw
1#ifdef TRACY_ENABLE 2 3/* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson 4 * 5 * This library provides a cross-platform lock free thread caching malloc implementation in C11. 6 * The latest source code is always available at 7 * 8 * https://github.com/mjansson/rpmalloc 9 * 10 * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. 11 * 12 */ 13 14#include "tracy_rpmalloc.hpp" 15 16#define BUILD_DYNAMIC_LINK 1 17 18//////////// 19/// 20/// Build time configurable limits 21/// 22////// 23 24#if defined(__clang__) 25#pragma clang diagnostic ignored "-Wunused-macros" 26#pragma clang diagnostic ignored "-Wunused-function" 27#if __has_warning("-Wreserved-identifier") 28#pragma clang diagnostic ignored "-Wreserved-identifier" 29#endif 30#elif defined(__GNUC__) 31#pragma GCC diagnostic ignored "-Wunused-macros" 32#pragma GCC diagnostic ignored "-Wunused-function" 33#pragma GCC diagnostic ignored "-Warray-bounds" 34#endif 35 36#ifndef HEAP_ARRAY_SIZE 37//! Size of heap hashmap 38#define HEAP_ARRAY_SIZE 47 39#endif 40#ifndef ENABLE_THREAD_CACHE 41//! Enable per-thread cache 42#define ENABLE_THREAD_CACHE 1 43#endif 44#ifndef ENABLE_GLOBAL_CACHE 45//! Enable global cache shared between all threads, requires thread cache 46#define ENABLE_GLOBAL_CACHE 1 47#endif 48#ifndef ENABLE_VALIDATE_ARGS 49//! Enable validation of args to public entry points 50#define ENABLE_VALIDATE_ARGS 0 51#endif 52#ifndef ENABLE_STATISTICS 53//! Enable statistics collection 54#define ENABLE_STATISTICS 0 55#endif 56#ifndef ENABLE_ASSERTS 57//! Enable asserts 58#define ENABLE_ASSERTS 0 59#endif 60#ifndef ENABLE_OVERRIDE 61//! Override standard library malloc/free and new/delete entry points 62#define ENABLE_OVERRIDE 0 63#endif 64#ifndef ENABLE_PRELOAD 65//! Support preloading 66#define ENABLE_PRELOAD 0 67#endif 68#ifndef DISABLE_UNMAP 69//! Disable unmapping memory pages (also enables unlimited cache) 70#define DISABLE_UNMAP 0 71#endif 72#ifndef ENABLE_UNLIMITED_CACHE 73//! Enable unlimited global cache (no unmapping until finalization) 74#define ENABLE_UNLIMITED_CACHE 0 75#endif 76#ifndef ENABLE_ADAPTIVE_THREAD_CACHE 77//! Enable adaptive thread cache size based on use heuristics 78#define ENABLE_ADAPTIVE_THREAD_CACHE 0 79#endif 80#ifndef DEFAULT_SPAN_MAP_COUNT 81//! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here) 82#define DEFAULT_SPAN_MAP_COUNT 64 83#endif 84#ifndef GLOBAL_CACHE_MULTIPLIER 85//! Multiplier for global cache 86#define GLOBAL_CACHE_MULTIPLIER 8 87#endif 88 89#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE 90#error Must use global cache if unmap is disabled 91#endif 92 93#if DISABLE_UNMAP 94#undef ENABLE_UNLIMITED_CACHE 95#define ENABLE_UNLIMITED_CACHE 1 96#endif 97 98#if !ENABLE_GLOBAL_CACHE 99#undef ENABLE_UNLIMITED_CACHE 100#define ENABLE_UNLIMITED_CACHE 0 101#endif 102 103#if !ENABLE_THREAD_CACHE 104#undef ENABLE_ADAPTIVE_THREAD_CACHE 105#define ENABLE_ADAPTIVE_THREAD_CACHE 0 106#endif 107 108#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64) 109# define PLATFORM_WINDOWS 1 110# define PLATFORM_POSIX 0 111#else 112# define PLATFORM_WINDOWS 0 113# define PLATFORM_POSIX 1 114#endif 115 116/// Platform and arch specifics 117#if defined(_MSC_VER) && !defined(__clang__) 118# pragma warning (disable: 5105) 119# ifndef FORCEINLINE 120# define FORCEINLINE inline __forceinline 121# endif 122#else 123# ifndef FORCEINLINE 124# define FORCEINLINE inline __attribute__((__always_inline__)) 125# endif 126#endif 127#if PLATFORM_WINDOWS 128# ifndef WIN32_LEAN_AND_MEAN 129# define WIN32_LEAN_AND_MEAN 130# endif 131# include <windows.h> 132# if ENABLE_VALIDATE_ARGS 133# include <intsafe.h> 134# endif 135#else 136# include <unistd.h> 137# include <stdio.h> 138# include <stdlib.h> 139# include <time.h> 140# if defined(__linux__) || defined(__ANDROID__) 141# include <sys/prctl.h> 142# if !defined(PR_SET_VMA) 143# define PR_SET_VMA 0x53564d41 144# define PR_SET_VMA_ANON_NAME 0 145# endif 146# endif 147# if defined(__APPLE__) 148# include <TargetConditionals.h> 149# if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 150# include <mach/mach_vm.h> 151# include <mach/vm_statistics.h> 152# endif 153# include <pthread.h> 154# endif 155# if defined(__HAIKU__) || defined(__TINYC__) 156# include <pthread.h> 157# endif 158#endif 159 160#include <stdint.h> 161#include <string.h> 162#include <errno.h> 163 164#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 165#include <fibersapi.h> 166static DWORD fls_key; 167#endif 168 169#if PLATFORM_POSIX 170# include <sys/mman.h> 171# include <sched.h> 172# ifdef __FreeBSD__ 173# include <sys/sysctl.h> 174# define MAP_HUGETLB MAP_ALIGNED_SUPER 175# ifndef PROT_MAX 176# define PROT_MAX(f) 0 177# endif 178# else 179# define PROT_MAX(f) 0 180# endif 181# ifdef __sun 182extern int madvise(caddr_t, size_t, int); 183# endif 184# ifndef MAP_UNINITIALIZED 185# define MAP_UNINITIALIZED 0 186# endif 187#endif 188#include <errno.h> 189 190#if ENABLE_ASSERTS 191# undef NDEBUG 192# if defined(_MSC_VER) && !defined(_DEBUG) 193# define _DEBUG 194# endif 195# include <assert.h> 196#define RPMALLOC_TOSTRING_M(x) #x 197#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x) 198#define rpmalloc_assert(truth, message) \ 199 do { \ 200 if (!(truth)) { \ 201 if (_memory_config.error_callback) { \ 202 _memory_config.error_callback( \ 203 message " (" RPMALLOC_TOSTRING(truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \ 204 } else { \ 205 assert((truth) && message); \ 206 } \ 207 } \ 208 } while (0) 209#else 210# define rpmalloc_assert(truth, message) do {} while(0) 211#endif 212#if ENABLE_STATISTICS 213# include <stdio.h> 214#endif 215 216////// 217/// 218/// Atomic access abstraction (since MSVC does not do C11 yet) 219/// 220////// 221 222#include <atomic> 223 224typedef std::atomic<int32_t> atomic32_t; 225typedef std::atomic<int64_t> atomic64_t; 226typedef std::atomic<void*> atomicptr_t; 227 228static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return std::atomic_load_explicit(src, std::memory_order_relaxed); } 229static FORCEINLINE void atomic_store32(atomic32_t* dst, int32_t val) { std::atomic_store_explicit(dst, val, std::memory_order_relaxed); } 230static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return std::atomic_fetch_add_explicit(val, 1, std::memory_order_relaxed) + 1; } 231static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return std::atomic_fetch_add_explicit(val, -1, std::memory_order_relaxed) - 1; } 232static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return std::atomic_fetch_add_explicit(val, add, std::memory_order_relaxed) + add; } 233static FORCEINLINE int atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return std::atomic_compare_exchange_weak_explicit(dst, &ref, val, std::memory_order_acquire, std::memory_order_relaxed); } 234static FORCEINLINE void atomic_store32_release(atomic32_t* dst, int32_t val) { std::atomic_store_explicit(dst, val, std::memory_order_release); } 235static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return std::atomic_load_explicit(val, std::memory_order_relaxed); } 236static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return std::atomic_fetch_add_explicit(val, add, std::memory_order_relaxed) + add; } 237static FORCEINLINE void* atomic_load_ptr(atomicptr_t* src) { return std::atomic_load_explicit(src, std::memory_order_relaxed); } 238static FORCEINLINE void atomic_store_ptr(atomicptr_t* dst, void* val) { std::atomic_store_explicit(dst, val, std::memory_order_relaxed); } 239static FORCEINLINE void atomic_store_ptr_release(atomicptr_t* dst, void* val) { std::atomic_store_explicit(dst, val, std::memory_order_release); } 240static FORCEINLINE void* atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return std::atomic_exchange_explicit(dst, val, std::memory_order_acquire); } 241static FORCEINLINE int atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return std::atomic_compare_exchange_weak_explicit(dst, &ref, val, std::memory_order_relaxed, std::memory_order_relaxed); } 242 243#if defined(_MSC_VER) && !defined(__clang__) 244 245#define EXPECTED(x) (x) 246#define UNEXPECTED(x) (x) 247 248#else 249 250#define EXPECTED(x) __builtin_expect((x), 1) 251#define UNEXPECTED(x) __builtin_expect((x), 0) 252 253#endif 254 255//////////// 256/// 257/// Statistics related functions (evaluate to nothing when statistics not enabled) 258/// 259////// 260 261#if ENABLE_STATISTICS 262# define _rpmalloc_stat_inc(counter) atomic_incr32(counter) 263# define _rpmalloc_stat_dec(counter) atomic_decr32(counter) 264# define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value)) 265# define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value)) 266# define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0) 267# define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value)) 268# define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \ 269 int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \ 270 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \ 271 heap->size_class_use[class_idx].alloc_peak = alloc_current; \ 272 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \ 273} while(0) 274# define _rpmalloc_stat_inc_free(heap, class_idx) do { \ 275 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \ 276 atomic_incr32(&heap->size_class_use[class_idx].free_total); \ 277} while(0) 278#else 279# define _rpmalloc_stat_inc(counter) do {} while(0) 280# define _rpmalloc_stat_dec(counter) do {} while(0) 281# define _rpmalloc_stat_add(counter, value) do {} while(0) 282# define _rpmalloc_stat_add64(counter, value) do {} while(0) 283# define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0) 284# define _rpmalloc_stat_sub(counter, value) do {} while(0) 285# define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0) 286# define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0) 287#endif 288 289 290/// 291/// Preconfigured limits and sizes 292/// 293 294//! Granularity of a small allocation block (must be power of two) 295#define SMALL_GRANULARITY 16 296//! Small granularity shift count 297#define SMALL_GRANULARITY_SHIFT 4 298//! Number of small block size classes 299#define SMALL_CLASS_COUNT 65 300//! Maximum size of a small block 301#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1)) 302//! Granularity of a medium allocation block 303#define MEDIUM_GRANULARITY 512 304//! Medium granularity shift count 305#define MEDIUM_GRANULARITY_SHIFT 9 306//! Number of medium block size classes 307#define MEDIUM_CLASS_COUNT 61 308//! Total number of small + medium size classes 309#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT) 310//! Number of large block size classes 311#define LARGE_CLASS_COUNT 63 312//! Maximum size of a medium block 313#define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT)) 314//! Maximum size of a large block 315#define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE) 316//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two) 317#define SPAN_HEADER_SIZE 128 318//! Number of spans in thread cache 319#define MAX_THREAD_SPAN_CACHE 400 320//! Number of spans to transfer between thread and global cache 321#define THREAD_SPAN_CACHE_TRANSFER 64 322//! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2) 323#define MAX_THREAD_SPAN_LARGE_CACHE 100 324//! Number of spans to transfer between thread and global cache for large spans 325#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6 326 327static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two"); 328static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two"); 329 330#if ENABLE_VALIDATE_ARGS 331//! Maximum allocation size to avoid integer overflow 332#undef MAX_ALLOC_SIZE 333#define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size) 334#endif 335 336#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs)) 337#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second)) 338 339#define INVALID_POINTER ((void*)((uintptr_t)-1)) 340 341#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT 342#define SIZE_CLASS_HUGE ((uint32_t)-1) 343 344//////////// 345/// 346/// Data types 347/// 348////// 349 350namespace tracy 351{ 352 353//! A memory heap, per thread 354typedef struct heap_t heap_t; 355//! Span of memory pages 356typedef struct span_t span_t; 357//! Span list 358typedef struct span_list_t span_list_t; 359//! Span active data 360typedef struct span_active_t span_active_t; 361//! Size class definition 362typedef struct size_class_t size_class_t; 363//! Global cache 364typedef struct global_cache_t global_cache_t; 365 366//! Flag indicating span is the first (master) span of a split superspan 367#define SPAN_FLAG_MASTER 1U 368//! Flag indicating span is a secondary (sub) span of a split superspan 369#define SPAN_FLAG_SUBSPAN 2U 370//! Flag indicating span has blocks with increased alignment 371#define SPAN_FLAG_ALIGNED_BLOCKS 4U 372//! Flag indicating an unmapped master span 373#define SPAN_FLAG_UNMAPPED_MASTER 8U 374 375#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 376struct span_use_t { 377 //! Current number of spans used (actually used, not in cache) 378 atomic32_t current; 379 //! High water mark of spans used 380 atomic32_t high; 381#if ENABLE_STATISTICS 382 //! Number of spans in deferred list 383 atomic32_t spans_deferred; 384 //! Number of spans transitioned to global cache 385 atomic32_t spans_to_global; 386 //! Number of spans transitioned from global cache 387 atomic32_t spans_from_global; 388 //! Number of spans transitioned to thread cache 389 atomic32_t spans_to_cache; 390 //! Number of spans transitioned from thread cache 391 atomic32_t spans_from_cache; 392 //! Number of spans transitioned to reserved state 393 atomic32_t spans_to_reserved; 394 //! Number of spans transitioned from reserved state 395 atomic32_t spans_from_reserved; 396 //! Number of raw memory map calls 397 atomic32_t spans_map_calls; 398#endif 399}; 400typedef struct span_use_t span_use_t; 401#endif 402 403#if ENABLE_STATISTICS 404struct size_class_use_t { 405 //! Current number of allocations 406 atomic32_t alloc_current; 407 //! Peak number of allocations 408 int32_t alloc_peak; 409 //! Total number of allocations 410 atomic32_t alloc_total; 411 //! Total number of frees 412 atomic32_t free_total; 413 //! Number of spans in use 414 atomic32_t spans_current; 415 //! Number of spans transitioned to cache 416 int32_t spans_peak; 417 //! Number of spans transitioned to cache 418 atomic32_t spans_to_cache; 419 //! Number of spans transitioned from cache 420 atomic32_t spans_from_cache; 421 //! Number of spans transitioned from reserved state 422 atomic32_t spans_from_reserved; 423 //! Number of spans mapped 424 atomic32_t spans_map_calls; 425 int32_t unused; 426}; 427typedef struct size_class_use_t size_class_use_t; 428#endif 429 430// A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, 431// or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single 432// span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first 433// (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans 434// that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire 435// superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released 436// in the same call to release the virtual memory range, but individual subranges can be decommitted individually 437// to reduce physical memory use). 438struct span_t { 439 //! Free list 440 void* free_list; 441 //! Total block count of size class 442 uint32_t block_count; 443 //! Size class 444 uint32_t size_class; 445 //! Index of last block initialized in free list 446 uint32_t free_list_limit; 447 //! Number of used blocks remaining when in partial state 448 uint32_t used_count; 449 //! Deferred free list 450 atomicptr_t free_list_deferred; 451 //! Size of deferred free list, or list of spans when part of a cache list 452 uint32_t list_size; 453 //! Size of a block 454 uint32_t block_size; 455 //! Flags and counters 456 uint32_t flags; 457 //! Number of spans 458 uint32_t span_count; 459 //! Total span counter for master spans 460 uint32_t total_spans; 461 //! Offset from master span for subspans 462 uint32_t offset_from_master; 463 //! Remaining span counter, for master spans 464 atomic32_t remaining_spans; 465 //! Alignment offset 466 uint32_t align_offset; 467 //! Owning heap 468 heap_t* heap; 469 //! Next span 470 span_t* next; 471 //! Previous span 472 span_t* prev; 473}; 474static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch"); 475 476struct span_cache_t { 477 size_t count; 478 span_t* span[MAX_THREAD_SPAN_CACHE]; 479}; 480typedef struct span_cache_t span_cache_t; 481 482struct span_large_cache_t { 483 size_t count; 484 span_t* span[MAX_THREAD_SPAN_LARGE_CACHE]; 485}; 486typedef struct span_large_cache_t span_large_cache_t; 487 488struct heap_size_class_t { 489 //! Free list of active span 490 void* free_list; 491 //! Double linked list of partially used spans with free blocks. 492 // Previous span pointer in head points to tail span of list. 493 span_t* partial_span; 494 //! Early level cache of fully free spans 495 span_t* cache; 496}; 497typedef struct heap_size_class_t heap_size_class_t; 498 499// Control structure for a heap, either a thread heap or a first class heap if enabled 500struct heap_t { 501 //! Owning thread ID 502 uintptr_t owner_thread; 503 //! Free lists for each size class 504 heap_size_class_t size_class[SIZE_CLASS_COUNT]; 505#if ENABLE_THREAD_CACHE 506 //! Arrays of fully freed spans, single span 507 span_cache_t span_cache; 508#endif 509 //! List of deferred free spans (single linked list) 510 atomicptr_t span_free_deferred; 511 //! Number of full spans 512 size_t full_span_count; 513 //! Mapped but unused spans 514 span_t* span_reserve; 515 //! Master span for mapped but unused spans 516 span_t* span_reserve_master; 517 //! Number of mapped but unused spans 518 uint32_t spans_reserved; 519 //! Child count 520 atomic32_t child_count; 521 //! Next heap in id list 522 heap_t* next_heap; 523 //! Next heap in orphan list 524 heap_t* next_orphan; 525 //! Heap ID 526 int32_t id; 527 //! Finalization state flag 528 int finalize; 529 //! Master heap owning the memory pages 530 heap_t* master_heap; 531#if ENABLE_THREAD_CACHE 532 //! Arrays of fully freed spans, large spans with > 1 span count 533 span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1]; 534#endif 535#if RPMALLOC_FIRST_CLASS_HEAPS 536 //! Double linked list of fully utilized spans with free blocks for each size class. 537 // Previous span pointer in head points to tail span of list. 538 span_t* full_span[SIZE_CLASS_COUNT]; 539 //! Double linked list of large and huge spans allocated by this heap 540 span_t* large_huge_span; 541#endif 542#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 543 //! Current and high water mark of spans used per span count 544 span_use_t span_use[LARGE_CLASS_COUNT]; 545#endif 546#if ENABLE_STATISTICS 547 //! Allocation stats per size class 548 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1]; 549 //! Number of bytes transitioned thread -> global 550 atomic64_t thread_to_global; 551 //! Number of bytes transitioned global -> thread 552 atomic64_t global_to_thread; 553#endif 554}; 555 556// Size class for defining a block size bucket 557struct size_class_t { 558 //! Size of blocks in this class 559 uint32_t block_size; 560 //! Number of blocks in each chunk 561 uint16_t block_count; 562 //! Class index this class is merged with 563 uint16_t class_idx; 564}; 565static_assert(sizeof(size_class_t) == 8, "Size class size mismatch"); 566 567struct global_cache_t { 568 //! Cache lock 569 atomic32_t lock; 570 //! Cache count 571 uint32_t count; 572#if ENABLE_STATISTICS 573 //! Insert count 574 size_t insert_count; 575 //! Extract count 576 size_t extract_count; 577#endif 578 //! Cached spans 579 span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE]; 580 //! Unlimited cache overflow 581 span_t* overflow; 582}; 583 584//////////// 585/// 586/// Global data 587/// 588////// 589 590//! Default span size (64KiB) 591#define _memory_default_span_size (64 * 1024) 592#define _memory_default_span_size_shift 16 593#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1))) 594 595//! Initialized flag 596static int _rpmalloc_initialized; 597//! Main thread ID 598static uintptr_t _rpmalloc_main_thread_id; 599//! Configuration 600static rpmalloc_config_t _memory_config; 601//! Memory page size 602static size_t _memory_page_size; 603//! Shift to divide by page size 604static size_t _memory_page_size_shift; 605//! Granularity at which memory pages are mapped by OS 606static size_t _memory_map_granularity; 607#if RPMALLOC_CONFIGURABLE 608//! Size of a span of memory pages 609static size_t _memory_span_size; 610//! Shift to divide by span size 611static size_t _memory_span_size_shift; 612//! Mask to get to start of a memory span 613static uintptr_t _memory_span_mask; 614#else 615//! Hardwired span size 616#define _memory_span_size _memory_default_span_size 617#define _memory_span_size_shift _memory_default_span_size_shift 618#define _memory_span_mask _memory_default_span_mask 619#endif 620//! Number of spans to map in each map call 621static size_t _memory_span_map_count; 622//! Number of spans to keep reserved in each heap 623static size_t _memory_heap_reserve_count; 624//! Global size classes 625static size_class_t _memory_size_class[SIZE_CLASS_COUNT]; 626//! Run-time size limit of medium blocks 627static size_t _memory_medium_size_limit; 628//! Heap ID counter 629static atomic32_t _memory_heap_id; 630//! Huge page support 631static int _memory_huge_pages; 632#if ENABLE_GLOBAL_CACHE 633//! Global span cache 634static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT]; 635#endif 636//! Global reserved spans 637static span_t* _memory_global_reserve; 638//! Global reserved count 639static size_t _memory_global_reserve_count; 640//! Global reserved master 641static span_t* _memory_global_reserve_master; 642//! All heaps 643static heap_t* _memory_heaps[HEAP_ARRAY_SIZE]; 644//! Used to restrict access to mapping memory for huge pages 645static atomic32_t _memory_global_lock; 646//! Orphaned heaps 647static heap_t* _memory_orphan_heaps; 648#if RPMALLOC_FIRST_CLASS_HEAPS 649//! Orphaned heaps (first class heaps) 650static heap_t* _memory_first_class_orphan_heaps; 651#endif 652#if ENABLE_STATISTICS 653//! Allocations counter 654static atomic64_t _allocation_counter; 655//! Deallocations counter 656static atomic64_t _deallocation_counter; 657//! Active heap count 658static atomic32_t _memory_active_heaps; 659//! Number of currently mapped memory pages 660static atomic32_t _mapped_pages; 661//! Peak number of concurrently mapped memory pages 662static int32_t _mapped_pages_peak; 663//! Number of mapped master spans 664static atomic32_t _master_spans; 665//! Number of unmapped dangling master spans 666static atomic32_t _unmapped_master_spans; 667//! Running counter of total number of mapped memory pages since start 668static atomic32_t _mapped_total; 669//! Running counter of total number of unmapped memory pages since start 670static atomic32_t _unmapped_total; 671//! Number of currently mapped memory pages in OS calls 672static atomic32_t _mapped_pages_os; 673//! Number of currently allocated pages in huge allocations 674static atomic32_t _huge_pages_current; 675//! Peak number of currently allocated pages in huge allocations 676static int32_t _huge_pages_peak; 677#endif 678 679//////////// 680/// 681/// Thread local heap and ID 682/// 683////// 684 685//! Current thread heap 686#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 687static pthread_key_t _memory_thread_heap; 688#else 689# ifdef _MSC_VER 690# define _Thread_local __declspec(thread) 691# define TLS_MODEL 692# else 693# ifndef __HAIKU__ 694# define TLS_MODEL __attribute__((tls_model("initial-exec"))) 695# else 696# define TLS_MODEL 697# endif 698# if !defined(__clang__) && defined(__GNUC__) 699# define _Thread_local __thread 700# endif 701# endif 702static thread_local heap_t* _memory_thread_heap TLS_MODEL; 703#endif 704 705static inline heap_t* 706get_thread_heap_raw(void) { 707#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 708 return pthread_getspecific(_memory_thread_heap); 709#else 710 return _memory_thread_heap; 711#endif 712} 713 714//! Get the current thread heap 715static inline heap_t* 716get_thread_heap(void) { 717 heap_t* heap = get_thread_heap_raw(); 718#if ENABLE_PRELOAD 719 if (EXPECTED(heap != 0)) 720 return heap; 721 rpmalloc_initialize(); 722 return get_thread_heap_raw(); 723#else 724 return heap; 725#endif 726} 727 728//! Fast thread ID 729static inline uintptr_t 730get_thread_id(void) { 731#if defined(_WIN32) 732 return (uintptr_t)((void*)NtCurrentTeb()); 733#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__) 734 uintptr_t tid; 735# if defined(__i386__) 736 __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); 737# elif defined(__x86_64__) 738# if defined(__MACH__) 739 __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); 740# else 741 __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); 742# endif 743# elif defined(__arm__) 744 __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); 745# elif defined(__aarch64__) 746# if defined(__MACH__) 747 // tpidr_el0 likely unused, always return 0 on iOS 748 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tid)); 749# else 750 __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tid)); 751# endif 752# else 753 tid = (uintptr_t)((void*)get_thread_heap_raw()); 754# endif 755 return tid; 756#else 757 return (uintptr_t)((void*)get_thread_heap_raw()); 758#endif 759} 760 761//! Set the current thread heap 762static void 763set_thread_heap(heap_t* heap) { 764#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 765 pthread_setspecific(_memory_thread_heap, heap); 766#else 767 _memory_thread_heap = heap; 768#endif 769 if (heap) 770 heap->owner_thread = get_thread_id(); 771} 772 773//! Set main thread ID 774extern void 775rpmalloc_set_main_thread(void); 776 777void 778rpmalloc_set_main_thread(void) { 779 _rpmalloc_main_thread_id = get_thread_id(); 780} 781 782static void 783_rpmalloc_spin(void) { 784#if defined(_MSC_VER) 785 _mm_pause(); 786#elif defined(__x86_64__) || defined(__i386__) 787 __asm__ volatile("pause" ::: "memory"); 788#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7) 789 __asm__ volatile("yield" ::: "memory"); 790#elif defined(__powerpc__) || defined(__powerpc64__) 791 // No idea if ever been compiled in such archs but ... as precaution 792 __asm__ volatile("or 27,27,27"); 793#elif defined(__sparc__) 794 __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); 795#else 796 struct timespec ts = {0}; 797 nanosleep(&ts, 0); 798#endif 799} 800 801#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 802static void NTAPI 803_rpmalloc_thread_destructor(void* value) { 804#if ENABLE_OVERRIDE 805 // If this is called on main thread it means rpmalloc_finalize 806 // has not been called and shutdown is forced (through _exit) or unclean 807 if (get_thread_id() == _rpmalloc_main_thread_id) 808 return; 809#endif 810 if (value) 811 rpmalloc_thread_finalize(1); 812} 813#endif 814 815 816//////////// 817/// 818/// Low level memory map/unmap 819/// 820////// 821 822static void 823_rpmalloc_set_name(void* address, size_t size) { 824#if defined(__linux__) || defined(__ANDROID__) 825 const char *name = _memory_huge_pages ? _memory_config.huge_page_name : _memory_config.page_name; 826 if (address == MAP_FAILED || !name) 827 return; 828 // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails 829 // (e.g. invalid name) it is a no-op basically. 830 (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size, (uintptr_t)name); 831#else 832 (void)sizeof(size); 833 (void)sizeof(address); 834#endif 835} 836 837 838//! Map more virtual memory 839// size is number of bytes to map 840// offset receives the offset in bytes from start of mapped region 841// returns address to start of mapped region to use 842static void* 843_rpmalloc_mmap(size_t size, size_t* offset) { 844 rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size"); 845 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 846 void* address = _memory_config.memory_map(size, offset); 847 if (EXPECTED(address != 0)) { 848 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak); 849 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift)); 850 } 851 return address; 852} 853 854//! Unmap virtual memory 855// address is the memory address to unmap, as returned from _memory_map 856// size is the number of bytes to unmap, which might be less than full region for a partial unmap 857// offset is the offset in bytes to the actual mapped region, as set by _memory_map 858// release is set to 0 for partial unmap, or size of entire range for a full unmap 859static void 860_rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) { 861 rpmalloc_assert(!release || (release >= size), "Invalid unmap size"); 862 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 863 if (release) { 864 rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size"); 865 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift)); 866 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift)); 867 } 868 _memory_config.memory_unmap(address, size, offset, release); 869} 870 871//! Default implementation to map new pages to virtual memory 872static void* 873_rpmalloc_mmap_os(size_t size, size_t* offset) { 874 //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity 875 size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0; 876 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size"); 877#if PLATFORM_WINDOWS 878 //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed" 879 void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); 880 if (!ptr) { 881 if (_memory_config.map_fail_callback) { 882 if (_memory_config.map_fail_callback(size + padding)) 883 return _rpmalloc_mmap_os(size, offset); 884 } else { 885 rpmalloc_assert(ptr, "Failed to map virtual memory block"); 886 } 887 return 0; 888 } 889#else 890 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED; 891# if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR 892 int fd = (int)VM_MAKE_TAG(240U); 893 if (_memory_huge_pages) 894 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; 895 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0); 896# elif defined(MAP_HUGETLB) 897 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE), (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0); 898# if defined(MADV_HUGEPAGE) 899 // In some configurations, huge pages allocations might fail thus 900 // we fallback to normal allocations and promote the region as transparent huge page 901 if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) { 902 ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 903 if (ptr && ptr != MAP_FAILED) { 904 int prm = madvise(ptr, size + padding, MADV_HUGEPAGE); 905 (void)prm; 906 rpmalloc_assert((prm == 0), "Failed to promote the page to THP"); 907 } 908 } 909# endif 910 _rpmalloc_set_name(ptr, size + padding); 911# elif defined(MAP_ALIGNED) 912 const size_t align = (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1)); 913 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0); 914# elif defined(MAP_ALIGN) 915 caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0); 916 void* ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0); 917# else 918 void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0); 919# endif 920 if ((ptr == MAP_FAILED) || !ptr) { 921 if (_memory_config.map_fail_callback) { 922 if (_memory_config.map_fail_callback(size + padding)) 923 return _rpmalloc_mmap_os(size, offset); 924 } else if (errno != ENOMEM) { 925 rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block"); 926 } 927 return 0; 928 } 929#endif 930 _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift)); 931 if (padding) { 932 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask); 933 rpmalloc_assert(final_padding <= _memory_span_size, "Internal failure in padding"); 934 rpmalloc_assert(final_padding <= padding, "Internal failure in padding"); 935 rpmalloc_assert(!(final_padding % 8), "Internal failure in padding"); 936 ptr = pointer_offset(ptr, final_padding); 937 *offset = final_padding >> 3; 938 } 939 rpmalloc_assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask), "Internal failure in padding"); 940 return ptr; 941} 942 943//! Default implementation to unmap pages from virtual memory 944static void 945_rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) { 946 rpmalloc_assert(release || (offset == 0), "Invalid unmap size"); 947 rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size"); 948 rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size"); 949 if (release && offset) { 950 offset <<= 3; 951 address = pointer_offset(address, -(int32_t)offset); 952 if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) { 953 //Padding is always one span size 954 release += _memory_span_size; 955 } 956 } 957#if !DISABLE_UNMAP 958#if PLATFORM_WINDOWS 959 if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) { 960 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 961 } 962#else 963 if (release) { 964 if (munmap(address, release)) { 965 rpmalloc_assert(0, "Failed to unmap virtual memory block"); 966 } 967 } else { 968#if defined(MADV_FREE_REUSABLE) 969 int ret; 970 while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN)) 971 errno = 0; 972 if ((ret == -1) && (errno != 0)) { 973#elif defined(MADV_DONTNEED) 974 if (madvise(address, size, MADV_DONTNEED)) { 975#elif defined(MADV_PAGEOUT) 976 if (madvise(address, size, MADV_PAGEOUT)) { 977#elif defined(MADV_FREE) 978 if (madvise(address, size, MADV_FREE)) { 979#else 980 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) { 981#endif 982 rpmalloc_assert(0, "Failed to madvise virtual memory block as free"); 983 } 984 } 985#endif 986#endif 987 if (release) 988 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift); 989} 990 991static void 992_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count); 993 994//! Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller) 995static span_t* 996_rpmalloc_global_get_reserved_spans(size_t span_count) { 997 span_t* span = _memory_global_reserve; 998 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, span, span_count); 999 _memory_global_reserve_count -= span_count; 1000 if (_memory_global_reserve_count) 1001 _memory_global_reserve = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1002 else 1003 _memory_global_reserve = 0; 1004 return span; 1005} 1006 1007//! Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe) 1008static void 1009_rpmalloc_global_set_reserved_spans(span_t* master, span_t* reserve, size_t reserve_span_count) { 1010 _memory_global_reserve_master = master; 1011 _memory_global_reserve_count = reserve_span_count; 1012 _memory_global_reserve = reserve; 1013} 1014 1015 1016//////////// 1017/// 1018/// Span linked list management 1019/// 1020////// 1021 1022//! Add a span to double linked list at the head 1023static void 1024_rpmalloc_span_double_link_list_add(span_t** head, span_t* span) { 1025 if (*head) 1026 (*head)->prev = span; 1027 span->next = *head; 1028 *head = span; 1029} 1030 1031//! Pop head span from double linked list 1032static void 1033_rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) { 1034 rpmalloc_assert(*head == span, "Linked list corrupted"); 1035 span = *head; 1036 *head = span->next; 1037} 1038 1039//! Remove a span from double linked list 1040static void 1041_rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) { 1042 rpmalloc_assert(*head, "Linked list corrupted"); 1043 if (*head == span) { 1044 *head = span->next; 1045 } else { 1046 span_t* next_span = span->next; 1047 span_t* prev_span = span->prev; 1048 prev_span->next = next_span; 1049 if (EXPECTED(next_span != 0)) 1050 next_span->prev = prev_span; 1051 } 1052} 1053 1054 1055//////////// 1056/// 1057/// Span control 1058/// 1059////// 1060 1061static void 1062_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span); 1063 1064static void 1065_rpmalloc_heap_finalize(heap_t* heap); 1066 1067static void 1068_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count); 1069 1070//! Declare the span to be a subspan and store distance from master span and span count 1071static void 1072_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) { 1073 rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER), "Span master pointer and/or flag mismatch"); 1074 if (subspan != master) { 1075 subspan->flags = SPAN_FLAG_SUBSPAN; 1076 subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift); 1077 subspan->align_offset = 0; 1078 } 1079 subspan->span_count = (uint32_t)span_count; 1080} 1081 1082//! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller) 1083static span_t* 1084_rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) { 1085 //Update the heap span reserve 1086 span_t* span = heap->span_reserve; 1087 heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1088 heap->spans_reserved -= (uint32_t)span_count; 1089 1090 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count); 1091 if (span_count <= LARGE_CLASS_COUNT) 1092 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved); 1093 1094 return span; 1095} 1096 1097//! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size 1098static size_t 1099_rpmalloc_span_align_count(size_t span_count) { 1100 size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count; 1101 if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size)) 1102 request_count += _memory_span_map_count - (request_count % _memory_span_map_count); 1103 return request_count; 1104} 1105 1106//! Setup a newly mapped span 1107static void 1108_rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) { 1109 span->total_spans = (uint32_t)total_span_count; 1110 span->span_count = (uint32_t)span_count; 1111 span->align_offset = (uint32_t)align_offset; 1112 span->flags = SPAN_FLAG_MASTER; 1113 atomic_store32(&span->remaining_spans, (int32_t)total_span_count); 1114} 1115 1116static void 1117_rpmalloc_span_unmap(span_t* span); 1118 1119//! Map an aligned set of spans, taking configured mapping granularity and the page size into account 1120static span_t* 1121_rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) { 1122 //If we already have some, but not enough, reserved spans, release those to heap cache and map a new 1123 //full set of spans. Otherwise we would waste memory if page size > span size (huge pages) 1124 size_t aligned_span_count = _rpmalloc_span_align_count(span_count); 1125 size_t align_offset = 0; 1126 span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset); 1127 if (!span) 1128 return 0; 1129 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset); 1130 _rpmalloc_stat_inc(&_master_spans); 1131 if (span_count <= LARGE_CLASS_COUNT) 1132 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls); 1133 if (aligned_span_count > span_count) { 1134 span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size); 1135 size_t reserved_count = aligned_span_count - span_count; 1136 if (heap->spans_reserved) { 1137 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved); 1138 _rpmalloc_heap_cache_insert(heap, heap->span_reserve); 1139 } 1140 if (reserved_count > _memory_heap_reserve_count) { 1141 // If huge pages or eager spam map count, the global reserve spin lock is held by caller, _rpmalloc_span_map 1142 rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1, "Global spin lock not held as expected"); 1143 size_t remain_count = reserved_count - _memory_heap_reserve_count; 1144 reserved_count = _memory_heap_reserve_count; 1145 span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size); 1146 if (_memory_global_reserve) { 1147 _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count); 1148 _rpmalloc_span_unmap(_memory_global_reserve); 1149 } 1150 _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count); 1151 } 1152 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count); 1153 } 1154 return span; 1155} 1156 1157//! Map in memory pages for the given number of spans (or use previously reserved pages) 1158static span_t* 1159_rpmalloc_span_map(heap_t* heap, size_t span_count) { 1160 if (span_count <= heap->spans_reserved) 1161 return _rpmalloc_span_map_from_reserve(heap, span_count); 1162 span_t* span = 0; 1163 int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count); 1164 if (use_global_reserve) { 1165 // If huge pages, make sure only one thread maps more memory to avoid bloat 1166 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1167 _rpmalloc_spin(); 1168 if (_memory_global_reserve_count >= span_count) { 1169 size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count); 1170 if (_memory_global_reserve_count < reserve_count) 1171 reserve_count = _memory_global_reserve_count; 1172 span = _rpmalloc_global_get_reserved_spans(reserve_count); 1173 if (span) { 1174 if (reserve_count > span_count) { 1175 span_t* reserved_span = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift); 1176 _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master, reserved_span, reserve_count - span_count); 1177 } 1178 // Already marked as subspan in _rpmalloc_global_get_reserved_spans 1179 span->span_count = (uint32_t)span_count; 1180 } 1181 } 1182 } 1183 if (!span) 1184 span = _rpmalloc_span_map_aligned_count(heap, span_count); 1185 if (use_global_reserve) 1186 atomic_store32_release(&_memory_global_lock, 0); 1187 return span; 1188} 1189 1190//! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings) 1191static void 1192_rpmalloc_span_unmap(span_t* span) { 1193 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1194 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1195 1196 int is_master = !!(span->flags & SPAN_FLAG_MASTER); 1197 span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size))); 1198 rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1199 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 1200 1201 size_t span_count = span->span_count; 1202 if (!is_master) { 1203 //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master) 1204 rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted"); 1205 if (_memory_span_size >= _memory_page_size) 1206 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0); 1207 } else { 1208 //Special double flag to denote an unmapped master 1209 //It must be kept in memory since span header must be used 1210 span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER; 1211 _rpmalloc_stat_add(&_unmapped_master_spans, 1); 1212 } 1213 1214 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) { 1215 //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span 1216 rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 1217 size_t unmap_count = master->span_count; 1218 if (_memory_span_size < _memory_page_size) 1219 unmap_count = master->total_spans; 1220 _rpmalloc_stat_sub(&_master_spans, 1); 1221 _rpmalloc_stat_sub(&_unmapped_master_spans, 1); 1222 _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size); 1223 } 1224} 1225 1226//! Move the span (used for small or medium allocations) to the heap thread cache 1227static void 1228_rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) { 1229 rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted"); 1230 rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class"); 1231 rpmalloc_assert(span->span_count == 1, "Invalid span count"); 1232#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1233 atomic_decr32(&heap->span_use[0].current); 1234#endif 1235 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1236 if (!heap->finalize) { 1237 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache); 1238 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache); 1239 if (heap->size_class[span->size_class].cache) 1240 _rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache); 1241 heap->size_class[span->size_class].cache = span; 1242 } else { 1243 _rpmalloc_span_unmap(span); 1244 } 1245} 1246 1247//! Initialize a (partial) free list up to next system memory page, while reserving the first block 1248//! as allocated, returning number of blocks in list 1249static uint32_t 1250free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start, uint32_t block_count, uint32_t block_size) { 1251 rpmalloc_assert(block_count, "Internal failure"); 1252 *first_block = block_start; 1253 if (block_count > 1) { 1254 void* free_block = pointer_offset(block_start, block_size); 1255 void* block_end = pointer_offset(block_start, (size_t)block_size * block_count); 1256 //If block size is less than half a memory page, bound init to next memory page boundary 1257 if (block_size < (_memory_page_size >> 1)) { 1258 void* page_end = pointer_offset(page_start, _memory_page_size); 1259 if (page_end < block_end) 1260 block_end = page_end; 1261 } 1262 *list = free_block; 1263 block_count = 2; 1264 void* next_block = pointer_offset(free_block, block_size); 1265 while (next_block < block_end) { 1266 *((void**)free_block) = next_block; 1267 free_block = next_block; 1268 ++block_count; 1269 next_block = pointer_offset(next_block, block_size); 1270 } 1271 *((void**)free_block) = 0; 1272 } else { 1273 *list = 0; 1274 } 1275 return block_count; 1276} 1277 1278//! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list 1279static void* 1280_rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) { 1281 rpmalloc_assert(span->span_count == 1, "Internal failure"); 1282 size_class_t* size_class = _memory_size_class + class_idx; 1283 span->size_class = class_idx; 1284 span->heap = heap; 1285 span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS; 1286 span->block_size = size_class->block_size; 1287 span->block_count = size_class->block_count; 1288 span->free_list = 0; 1289 span->list_size = 0; 1290 atomic_store_ptr_release(&span->free_list_deferred, 0); 1291 1292 //Setup free list. Only initialize one system page worth of free blocks in list 1293 void* block; 1294 span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block, 1295 span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size); 1296 //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized 1297 if (span->free_list_limit < span->block_count) { 1298 _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span); 1299 span->used_count = span->free_list_limit; 1300 } else { 1301#if RPMALLOC_FIRST_CLASS_HEAPS 1302 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 1303#endif 1304 ++heap->full_span_count; 1305 span->used_count = span->block_count; 1306 } 1307 return block; 1308} 1309 1310static void 1311_rpmalloc_span_extract_free_list_deferred(span_t* span) { 1312 // We need acquire semantics on the CAS operation since we are interested in the list size 1313 // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency 1314 do { 1315 span->free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 1316 } while (span->free_list == INVALID_POINTER); 1317 span->used_count -= span->list_size; 1318 span->list_size = 0; 1319 atomic_store_ptr_release(&span->free_list_deferred, 0); 1320} 1321 1322static int 1323_rpmalloc_span_is_fully_utilized(span_t* span) { 1324 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span free list corrupted"); 1325 return !span->free_list && (span->free_list_limit >= span->block_count); 1326} 1327 1328static int 1329_rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) { 1330 void* free_list = heap->size_class[iclass].free_list; 1331 span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask); 1332 if (span == class_span) { 1333 // Adopt the heap class free list back into the span free list 1334 void* block = span->free_list; 1335 void* last_block = 0; 1336 while (block) { 1337 last_block = block; 1338 block = *((void**)block); 1339 } 1340 uint32_t free_count = 0; 1341 block = free_list; 1342 while (block) { 1343 ++free_count; 1344 block = *((void**)block); 1345 } 1346 if (last_block) { 1347 *((void**)last_block) = free_list; 1348 } else { 1349 span->free_list = free_list; 1350 } 1351 heap->size_class[iclass].free_list = 0; 1352 span->used_count -= free_count; 1353 } 1354 //If this assert triggers you have memory leaks 1355 rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected"); 1356 if (span->list_size == span->used_count) { 1357 _rpmalloc_stat_dec(&heap->span_use[0].current); 1358 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current); 1359 // This function only used for spans in double linked lists 1360 if (list_head) 1361 _rpmalloc_span_double_link_list_remove(list_head, span); 1362 _rpmalloc_span_unmap(span); 1363 return 1; 1364 } 1365 return 0; 1366} 1367 1368 1369//////////// 1370/// 1371/// Global cache 1372/// 1373////// 1374 1375#if ENABLE_GLOBAL_CACHE 1376 1377//! Finalize a global cache 1378static void 1379_rpmalloc_global_cache_finalize(global_cache_t* cache) { 1380 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1381 _rpmalloc_spin(); 1382 1383 for (size_t ispan = 0; ispan < cache->count; ++ispan) 1384 _rpmalloc_span_unmap(cache->span[ispan]); 1385 cache->count = 0; 1386 1387 while (cache->overflow) { 1388 span_t* span = cache->overflow; 1389 cache->overflow = span->next; 1390 _rpmalloc_span_unmap(span); 1391 } 1392 1393 atomic_store32_release(&cache->lock, 0); 1394} 1395 1396static void 1397_rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) { 1398 const size_t cache_limit = (span_count == 1) ? 1399 GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE : 1400 GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1401 1402 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1403 1404 size_t insert_count = count; 1405 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1406 _rpmalloc_spin(); 1407 1408#if ENABLE_STATISTICS 1409 cache->insert_count += count; 1410#endif 1411 if ((cache->count + insert_count) > cache_limit) 1412 insert_count = cache_limit - cache->count; 1413 1414 memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count); 1415 cache->count += (uint32_t)insert_count; 1416 1417#if ENABLE_UNLIMITED_CACHE 1418 while (insert_count < count) { 1419#else 1420 // Enable unlimited cache if huge pages, or we will leak since it is unlikely that an entire huge page 1421 // will be unmapped, and we're unable to partially decommit a huge page 1422 while ((_memory_page_size > _memory_span_size) && (insert_count < count)) { 1423#endif 1424 span_t* current_span = span[insert_count++]; 1425 current_span->next = cache->overflow; 1426 cache->overflow = current_span; 1427 } 1428 atomic_store32_release(&cache->lock, 0); 1429 1430 span_t* keep = 0; 1431 for (size_t ispan = insert_count; ispan < count; ++ispan) { 1432 span_t* current_span = span[ispan]; 1433 // Keep master spans that has remaining subspans to avoid dangling them 1434 if ((current_span->flags & SPAN_FLAG_MASTER) && 1435 (atomic_load32(&current_span->remaining_spans) > (int32_t)current_span->span_count)) { 1436 current_span->next = keep; 1437 keep = current_span; 1438 } else { 1439 _rpmalloc_span_unmap(current_span); 1440 } 1441 } 1442 1443 if (keep) { 1444 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1445 _rpmalloc_spin(); 1446 1447 size_t islot = 0; 1448 while (keep) { 1449 for (; islot < cache->count; ++islot) { 1450 span_t* current_span = cache->span[islot]; 1451 if (!(current_span->flags & SPAN_FLAG_MASTER) || ((current_span->flags & SPAN_FLAG_MASTER) && 1452 (atomic_load32(&current_span->remaining_spans) <= (int32_t)current_span->span_count))) { 1453 _rpmalloc_span_unmap(current_span); 1454 cache->span[islot] = keep; 1455 break; 1456 } 1457 } 1458 if (islot == cache->count) 1459 break; 1460 keep = keep->next; 1461 } 1462 1463 if (keep) { 1464 span_t* tail = keep; 1465 while (tail->next) 1466 tail = tail->next; 1467 tail->next = cache->overflow; 1468 cache->overflow = keep; 1469 } 1470 1471 atomic_store32_release(&cache->lock, 0); 1472 } 1473} 1474 1475static size_t 1476_rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) { 1477 global_cache_t* cache = &_memory_span_cache[span_count - 1]; 1478 1479 size_t extract_count = 0; 1480 while (!atomic_cas32_acquire(&cache->lock, 1, 0)) 1481 _rpmalloc_spin(); 1482 1483#if ENABLE_STATISTICS 1484 cache->extract_count += count; 1485#endif 1486 size_t want = count - extract_count; 1487 if (want > cache->count) 1488 want = cache->count; 1489 1490 memcpy(span + extract_count, cache->span + (cache->count - want), sizeof(span_t*) * want); 1491 cache->count -= (uint32_t)want; 1492 extract_count += want; 1493 1494 while ((extract_count < count) && cache->overflow) { 1495 span_t* current_span = cache->overflow; 1496 span[extract_count++] = current_span; 1497 cache->overflow = current_span->next; 1498 } 1499 1500#if ENABLE_ASSERTS 1501 for (size_t ispan = 0; ispan < extract_count; ++ispan) { 1502 assert(span[ispan]->span_count == span_count); 1503 } 1504#endif 1505 1506 atomic_store32_release(&cache->lock, 0); 1507 1508 return extract_count; 1509} 1510 1511#endif 1512 1513//////////// 1514/// 1515/// Heap control 1516/// 1517////// 1518 1519static void _rpmalloc_deallocate_huge(span_t*); 1520 1521//! Store the given spans as reserve in the given heap 1522static void 1523_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) { 1524 heap->span_reserve_master = master; 1525 heap->span_reserve = reserve; 1526 heap->spans_reserved = (uint32_t)reserve_span_count; 1527} 1528 1529//! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use 1530static void 1531_rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) { 1532 span_t* span = (span_t*)((void*)atomic_exchange_ptr_acquire(&heap->span_free_deferred, 0)); 1533 while (span) { 1534 span_t* next_span = (span_t*)span->free_list; 1535 rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted"); 1536 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 1537 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1538 --heap->full_span_count; 1539 _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred); 1540#if RPMALLOC_FIRST_CLASS_HEAPS 1541 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 1542#endif 1543 _rpmalloc_stat_dec(&heap->span_use[0].current); 1544 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current); 1545 if (single_span && !*single_span) 1546 *single_span = span; 1547 else 1548 _rpmalloc_heap_cache_insert(heap, span); 1549 } else { 1550 if (span->size_class == SIZE_CLASS_HUGE) { 1551 _rpmalloc_deallocate_huge(span); 1552 } else { 1553 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Span size class invalid"); 1554 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted"); 1555 --heap->full_span_count; 1556#if RPMALLOC_FIRST_CLASS_HEAPS 1557 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span); 1558#endif 1559 uint32_t idx = span->span_count - 1; 1560 _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred); 1561 _rpmalloc_stat_dec(&heap->span_use[idx].current); 1562 if (!idx && single_span && !*single_span) 1563 *single_span = span; 1564 else 1565 _rpmalloc_heap_cache_insert(heap, span); 1566 } 1567 } 1568 span = next_span; 1569 } 1570} 1571 1572static void 1573_rpmalloc_heap_unmap(heap_t* heap) { 1574 if (!heap->master_heap) { 1575 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) { 1576 span_t* span = (span_t*)((uintptr_t)heap & _memory_span_mask); 1577 _rpmalloc_span_unmap(span); 1578 } 1579 } else { 1580 if (atomic_decr32(&heap->master_heap->child_count) == 0) { 1581 _rpmalloc_heap_unmap(heap->master_heap); 1582 } 1583 } 1584} 1585 1586static void 1587_rpmalloc_heap_global_finalize(heap_t* heap) { 1588 if (heap->finalize++ > 1) { 1589 --heap->finalize; 1590 return; 1591 } 1592 1593 _rpmalloc_heap_finalize(heap); 1594 1595#if ENABLE_THREAD_CACHE 1596 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1597 span_cache_t* span_cache; 1598 if (!iclass) 1599 span_cache = &heap->span_cache; 1600 else 1601 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1602 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1603 _rpmalloc_span_unmap(span_cache->span[ispan]); 1604 span_cache->count = 0; 1605 } 1606#endif 1607 1608 if (heap->full_span_count) { 1609 --heap->finalize; 1610 return; 1611 } 1612 1613 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 1614 if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) { 1615 --heap->finalize; 1616 return; 1617 } 1618 } 1619 //Heap is now completely free, unmap and remove from heap list 1620 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1621 heap_t* list_heap = _memory_heaps[list_idx]; 1622 if (list_heap == heap) { 1623 _memory_heaps[list_idx] = heap->next_heap; 1624 } else { 1625 while (list_heap->next_heap != heap) 1626 list_heap = list_heap->next_heap; 1627 list_heap->next_heap = heap->next_heap; 1628 } 1629 1630 _rpmalloc_heap_unmap(heap); 1631} 1632 1633//! Insert a single span into thread heap cache, releasing to global cache if overflow 1634static void 1635_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) { 1636 if (UNEXPECTED(heap->finalize != 0)) { 1637 _rpmalloc_span_unmap(span); 1638 _rpmalloc_heap_global_finalize(heap); 1639 return; 1640 } 1641#if ENABLE_THREAD_CACHE 1642 size_t span_count = span->span_count; 1643 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache); 1644 if (span_count == 1) { 1645 span_cache_t* span_cache = &heap->span_cache; 1646 span_cache->span[span_cache->count++] = span; 1647 if (span_cache->count == MAX_THREAD_SPAN_CACHE) { 1648 const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER; 1649#if ENABLE_GLOBAL_CACHE 1650 _rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size); 1651 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER); 1652 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER); 1653#else 1654 for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan) 1655 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1656#endif 1657 span_cache->count = remain_count; 1658 } 1659 } else { 1660 size_t cache_idx = span_count - 2; 1661 span_large_cache_t* span_cache = heap->span_large_cache + cache_idx; 1662 span_cache->span[span_cache->count++] = span; 1663 const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1664 if (span_cache->count == cache_limit) { 1665 const size_t transfer_limit = 2 + (cache_limit >> 2); 1666 const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit); 1667 const size_t remain_count = cache_limit - transfer_count; 1668#if ENABLE_GLOBAL_CACHE 1669 _rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size); 1670 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count); 1671 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count); 1672#else 1673 for (size_t ispan = 0; ispan < transfer_count; ++ispan) 1674 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]); 1675#endif 1676 span_cache->count = remain_count; 1677 } 1678 } 1679#else 1680 (void)sizeof(heap); 1681 _rpmalloc_span_unmap(span); 1682#endif 1683} 1684 1685//! Extract the given number of spans from the different cache levels 1686static span_t* 1687_rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) { 1688 span_t* span = 0; 1689#if ENABLE_THREAD_CACHE 1690 span_cache_t* span_cache; 1691 if (span_count == 1) 1692 span_cache = &heap->span_cache; 1693 else 1694 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1695 if (span_cache->count) { 1696 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache); 1697 return span_cache->span[--span_cache->count]; 1698 } 1699#endif 1700 return span; 1701} 1702 1703static span_t* 1704_rpmalloc_heap_thread_cache_deferred_extract(heap_t* heap, size_t span_count) { 1705 span_t* span = 0; 1706 if (span_count == 1) { 1707 _rpmalloc_heap_cache_adopt_deferred(heap, &span); 1708 } else { 1709 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1710 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1711 } 1712 return span; 1713} 1714 1715static span_t* 1716_rpmalloc_heap_reserved_extract(heap_t* heap, size_t span_count) { 1717 if (heap->spans_reserved >= span_count) 1718 return _rpmalloc_span_map(heap, span_count); 1719 return 0; 1720} 1721 1722//! Extract a span from the global cache 1723static span_t* 1724_rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) { 1725#if ENABLE_GLOBAL_CACHE 1726#if ENABLE_THREAD_CACHE 1727 span_cache_t* span_cache; 1728 size_t wanted_count; 1729 if (span_count == 1) { 1730 span_cache = &heap->span_cache; 1731 wanted_count = THREAD_SPAN_CACHE_TRANSFER; 1732 } else { 1733 span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2)); 1734 wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER; 1735 } 1736 span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count); 1737 if (span_cache->count) { 1738 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size); 1739 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count); 1740 return span_cache->span[--span_cache->count]; 1741 } 1742#else 1743 span_t* span = 0; 1744 size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1); 1745 if (count) { 1746 _rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size); 1747 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count); 1748 return span; 1749 } 1750#endif 1751#endif 1752 (void)sizeof(heap); 1753 (void)sizeof(span_count); 1754 return 0; 1755} 1756 1757static void 1758_rpmalloc_inc_span_statistics(heap_t* heap, size_t span_count, uint32_t class_idx) { 1759 (void)sizeof(heap); 1760 (void)sizeof(span_count); 1761 (void)sizeof(class_idx); 1762#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 1763 uint32_t idx = (uint32_t)span_count - 1; 1764 uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current); 1765 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high)) 1766 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count); 1767 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak); 1768#endif 1769} 1770 1771//! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory 1772static span_t* 1773_rpmalloc_heap_extract_new_span(heap_t* heap, heap_size_class_t* heap_size_class, size_t span_count, uint32_t class_idx) { 1774 span_t* span; 1775#if ENABLE_THREAD_CACHE 1776 if (heap_size_class && heap_size_class->cache) { 1777 span = heap_size_class->cache; 1778 heap_size_class->cache = (heap->span_cache.count ? heap->span_cache.span[--heap->span_cache.count] : 0); 1779 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1780 return span; 1781 } 1782#endif 1783 (void)sizeof(class_idx); 1784 // Allow 50% overhead to increase cache hits 1785 size_t base_span_count = span_count; 1786 size_t limit_span_count = (span_count > 2) ? (span_count + (span_count >> 1)) : span_count; 1787 if (limit_span_count > LARGE_CLASS_COUNT) 1788 limit_span_count = LARGE_CLASS_COUNT; 1789 do { 1790 span = _rpmalloc_heap_thread_cache_extract(heap, span_count); 1791 if (EXPECTED(span != 0)) { 1792 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1793 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1794 return span; 1795 } 1796 span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count); 1797 if (EXPECTED(span != 0)) { 1798 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1799 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1800 return span; 1801 } 1802 span = _rpmalloc_heap_reserved_extract(heap, span_count); 1803 if (EXPECTED(span != 0)) { 1804 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved); 1805 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1806 return span; 1807 } 1808 span = _rpmalloc_heap_global_cache_extract(heap, span_count); 1809 if (EXPECTED(span != 0)) { 1810 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache); 1811 _rpmalloc_inc_span_statistics(heap, span_count, class_idx); 1812 return span; 1813 } 1814 ++span_count; 1815 } while (span_count <= limit_span_count); 1816 //Final fallback, map in more virtual memory 1817 span = _rpmalloc_span_map(heap, base_span_count); 1818 _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx); 1819 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls); 1820 return span; 1821} 1822 1823static void 1824_rpmalloc_heap_initialize(heap_t* heap) { 1825 memset((void*)heap, 0, sizeof(heap_t)); 1826 //Get a new heap ID 1827 heap->id = 1 + atomic_incr32(&_memory_heap_id); 1828 1829 //Link in heap in heap ID map 1830 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE; 1831 heap->next_heap = _memory_heaps[list_idx]; 1832 _memory_heaps[list_idx] = heap; 1833} 1834 1835static void 1836_rpmalloc_heap_orphan(heap_t* heap, int first_class) { 1837 heap->owner_thread = (uintptr_t)-1; 1838#if RPMALLOC_FIRST_CLASS_HEAPS 1839 heap_t** heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps); 1840#else 1841 (void)sizeof(first_class); 1842 heap_t** heap_list = &_memory_orphan_heaps; 1843#endif 1844 heap->next_orphan = *heap_list; 1845 *heap_list = heap; 1846} 1847 1848//! Allocate a new heap from newly mapped memory pages 1849static heap_t* 1850_rpmalloc_heap_allocate_new(void) { 1851 // Map in pages for a 16 heaps. If page size is greater than required size for this, map a page and 1852 // use first part for heaps and remaining part for spans for allocations. Adds a lot of complexity, 1853 // but saves a lot of memory on systems where page size > 64 spans (4MiB) 1854 size_t heap_size = sizeof(heap_t); 1855 size_t aligned_heap_size = 16 * ((heap_size + 15) / 16); 1856 size_t request_heap_count = 16; 1857 size_t heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1858 size_t block_size = _memory_span_size * heap_span_count; 1859 size_t span_count = heap_span_count; 1860 span_t* span = 0; 1861 // If there are global reserved spans, use these first 1862 if (_memory_global_reserve_count >= heap_span_count) { 1863 span = _rpmalloc_global_get_reserved_spans(heap_span_count); 1864 } 1865 if (!span) { 1866 if (_memory_page_size > block_size) { 1867 span_count = _memory_page_size / _memory_span_size; 1868 block_size = _memory_page_size; 1869 // If using huge pages, make sure to grab enough heaps to avoid reallocating a huge page just to serve new heaps 1870 size_t possible_heap_count = (block_size - sizeof(span_t)) / aligned_heap_size; 1871 if (possible_heap_count >= (request_heap_count * 16)) 1872 request_heap_count *= 16; 1873 else if (possible_heap_count < request_heap_count) 1874 request_heap_count = possible_heap_count; 1875 heap_span_count = ((aligned_heap_size * request_heap_count) + sizeof(span_t) + _memory_span_size - 1) / _memory_span_size; 1876 } 1877 1878 size_t align_offset = 0; 1879 span = (span_t*)_rpmalloc_mmap(block_size, &align_offset); 1880 if (!span) 1881 return 0; 1882 1883 // Master span will contain the heaps 1884 _rpmalloc_stat_inc(&_master_spans); 1885 _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset); 1886 } 1887 1888 size_t remain_size = _memory_span_size - sizeof(span_t); 1889 heap_t* heap = (heap_t*)pointer_offset(span, sizeof(span_t)); 1890 _rpmalloc_heap_initialize(heap); 1891 1892 // Put extra heaps as orphans 1893 size_t num_heaps = remain_size / aligned_heap_size; 1894 if (num_heaps < request_heap_count) 1895 num_heaps = request_heap_count; 1896 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1); 1897 heap_t* extra_heap = (heap_t*)pointer_offset(heap, aligned_heap_size); 1898 while (num_heaps > 1) { 1899 _rpmalloc_heap_initialize(extra_heap); 1900 extra_heap->master_heap = heap; 1901 _rpmalloc_heap_orphan(extra_heap, 1); 1902 extra_heap = (heap_t*)pointer_offset(extra_heap, aligned_heap_size); 1903 --num_heaps; 1904 } 1905 1906 if (span_count > heap_span_count) { 1907 // Cap reserved spans 1908 size_t remain_count = span_count - heap_span_count; 1909 size_t reserve_count = (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count : remain_count); 1910 span_t* remain_span = (span_t*)pointer_offset(span, heap_span_count * _memory_span_size); 1911 _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count); 1912 1913 if (remain_count > reserve_count) { 1914 // Set to global reserved spans 1915 remain_span = (span_t*)pointer_offset(remain_span, reserve_count * _memory_span_size); 1916 reserve_count = remain_count - reserve_count; 1917 _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count); 1918 } 1919 } 1920 1921 return heap; 1922} 1923 1924static heap_t* 1925_rpmalloc_heap_extract_orphan(heap_t** heap_list) { 1926 heap_t* heap = *heap_list; 1927 *heap_list = (heap ? heap->next_orphan : 0); 1928 return heap; 1929} 1930 1931//! Allocate a new heap, potentially reusing a previously orphaned heap 1932static heap_t* 1933_rpmalloc_heap_allocate(int first_class) { 1934 heap_t* heap = 0; 1935 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 1936 _rpmalloc_spin(); 1937 if (first_class == 0) 1938 heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps); 1939#if RPMALLOC_FIRST_CLASS_HEAPS 1940 if (!heap) 1941 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps); 1942#endif 1943 if (!heap) 1944 heap = _rpmalloc_heap_allocate_new(); 1945 atomic_store32_release(&_memory_global_lock, 0); 1946 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1947 return heap; 1948} 1949 1950extern thread_local bool RpThreadShutdown; 1951 1952static void 1953_rpmalloc_heap_release(void* heapptr, int first_class, int release_cache) { 1954 heap_t* heap = (heap_t*)heapptr; 1955 if (!heap) 1956 return; 1957 RpThreadShutdown = true; 1958 //Release thread cache spans back to global cache 1959 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 1960 if (release_cache || heap->finalize) { 1961#if ENABLE_THREAD_CACHE 1962 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 1963 span_cache_t* span_cache; 1964 if (!iclass) 1965 span_cache = &heap->span_cache; 1966 else 1967 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 1968 if (!span_cache->count) 1969 continue; 1970#if ENABLE_GLOBAL_CACHE 1971 if (heap->finalize) { 1972 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1973 _rpmalloc_span_unmap(span_cache->span[ispan]); 1974 } else { 1975 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 1976 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 1977 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 1978 } 1979#else 1980 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 1981 _rpmalloc_span_unmap(span_cache->span[ispan]); 1982#endif 1983 span_cache->count = 0; 1984 } 1985#endif 1986 } 1987 1988 if (get_thread_heap_raw() == heap) 1989 set_thread_heap(0); 1990 1991#if ENABLE_STATISTICS 1992 atomic_decr32(&_memory_active_heaps); 1993 rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0, "Still active heaps during finalization"); 1994#endif 1995 1996 // If we are forcibly terminating with _exit the state of the 1997 // lock atomic is unknown and it's best to just go ahead and exit 1998 if (get_thread_id() != _rpmalloc_main_thread_id) { 1999 while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0)) 2000 _rpmalloc_spin(); 2001 } 2002 _rpmalloc_heap_orphan(heap, first_class); 2003 atomic_store32_release(&_memory_global_lock, 0); 2004} 2005 2006static void 2007_rpmalloc_heap_release_raw(void* heapptr, int release_cache) { 2008 _rpmalloc_heap_release(heapptr, 0, release_cache); 2009} 2010 2011static void 2012_rpmalloc_heap_release_raw_fc(void* heapptr) { 2013 _rpmalloc_heap_release_raw(heapptr, 1); 2014} 2015 2016static void 2017_rpmalloc_heap_finalize(heap_t* heap) { 2018 if (heap->spans_reserved) { 2019 span_t* span = _rpmalloc_span_map(heap, heap->spans_reserved); 2020 _rpmalloc_span_unmap(span); 2021 heap->spans_reserved = 0; 2022 } 2023 2024 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2025 2026 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 2027 if (heap->size_class[iclass].cache) 2028 _rpmalloc_span_unmap(heap->size_class[iclass].cache); 2029 heap->size_class[iclass].cache = 0; 2030 span_t* span = heap->size_class[iclass].partial_span; 2031 while (span) { 2032 span_t* next = span->next; 2033 _rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span); 2034 span = next; 2035 } 2036 // If class still has a free list it must be a full span 2037 if (heap->size_class[iclass].free_list) { 2038 span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask); 2039 span_t** list = 0; 2040#if RPMALLOC_FIRST_CLASS_HEAPS 2041 list = &heap->full_span[iclass]; 2042#endif 2043 --heap->full_span_count; 2044 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) { 2045 if (list) 2046 _rpmalloc_span_double_link_list_remove(list, class_span); 2047 _rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span); 2048 } 2049 } 2050 } 2051 2052#if ENABLE_THREAD_CACHE 2053 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 2054 span_cache_t* span_cache; 2055 if (!iclass) 2056 span_cache = &heap->span_cache; 2057 else 2058 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 2059 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 2060 _rpmalloc_span_unmap(span_cache->span[ispan]); 2061 span_cache->count = 0; 2062 } 2063#endif 2064 rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred), "Heaps still active during finalization"); 2065} 2066 2067 2068//////////// 2069/// 2070/// Allocation entry points 2071/// 2072////// 2073 2074//! Pop first block from a free list 2075static void* 2076free_list_pop(void** list) { 2077 void* block = *list; 2078 *list = *((void**)block); 2079 return block; 2080} 2081 2082//! Allocate a small/medium sized memory block from the given heap 2083static void* 2084_rpmalloc_allocate_from_heap_fallback(heap_t* heap, heap_size_class_t* heap_size_class, uint32_t class_idx) { 2085 span_t* span = heap_size_class->partial_span; 2086 if (EXPECTED(span != 0)) { 2087 rpmalloc_assert(span->block_count == _memory_size_class[span->size_class].block_count, "Span block count corrupted"); 2088 rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span), "Internal failure"); 2089 void* block; 2090 if (span->free_list) { 2091 //Span local free list is not empty, swap to size class free list 2092 block = free_list_pop(&span->free_list); 2093 heap_size_class->free_list = span->free_list; 2094 span->free_list = 0; 2095 } else { 2096 //If the span did not fully initialize free list, link up another page worth of blocks 2097 void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size)); 2098 span->free_list_limit += free_list_partial_init(&heap_size_class->free_list, &block, 2099 (void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start, 2100 span->block_count - span->free_list_limit, span->block_size); 2101 } 2102 rpmalloc_assert(span->free_list_limit <= span->block_count, "Span block count corrupted"); 2103 span->used_count = span->free_list_limit; 2104 2105 //Swap in deferred free list if present 2106 if (atomic_load_ptr(&span->free_list_deferred)) 2107 _rpmalloc_span_extract_free_list_deferred(span); 2108 2109 //If span is still not fully utilized keep it in partial list and early return block 2110 if (!_rpmalloc_span_is_fully_utilized(span)) 2111 return block; 2112 2113 //The span is fully utilized, unlink from partial list and add to fully utilized list 2114 _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span, span); 2115#if RPMALLOC_FIRST_CLASS_HEAPS 2116 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span); 2117#endif 2118 ++heap->full_span_count; 2119 return block; 2120 } 2121 2122 //Find a span in one of the cache levels 2123 span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx); 2124 if (EXPECTED(span != 0)) { 2125 //Mark span as owned by this heap and set base data, return first block 2126 return _rpmalloc_span_initialize_new(heap, heap_size_class, span, class_idx); 2127 } 2128 2129 return 0; 2130} 2131 2132//! Allocate a small sized memory block from the given heap 2133static void* 2134_rpmalloc_allocate_small(heap_t* heap, size_t size) { 2135 rpmalloc_assert(heap, "No thread heap"); 2136 //Small sizes have unique size classes 2137 const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT); 2138 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2139 _rpmalloc_stat_inc_alloc(heap, class_idx); 2140 if (EXPECTED(heap_size_class->free_list != 0)) 2141 return free_list_pop(&heap_size_class->free_list); 2142 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2143} 2144 2145//! Allocate a medium sized memory block from the given heap 2146static void* 2147_rpmalloc_allocate_medium(heap_t* heap, size_t size) { 2148 rpmalloc_assert(heap, "No thread heap"); 2149 //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes) 2150 const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT)); 2151 const uint32_t class_idx = _memory_size_class[base_idx].class_idx; 2152 heap_size_class_t* heap_size_class = heap->size_class + class_idx; 2153 _rpmalloc_stat_inc_alloc(heap, class_idx); 2154 if (EXPECTED(heap_size_class->free_list != 0)) 2155 return free_list_pop(&heap_size_class->free_list); 2156 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class, class_idx); 2157} 2158 2159//! Allocate a large sized memory block from the given heap 2160static void* 2161_rpmalloc_allocate_large(heap_t* heap, size_t size) { 2162 rpmalloc_assert(heap, "No thread heap"); 2163 //Calculate number of needed max sized spans (including header) 2164 //Since this function is never called if size > LARGE_SIZE_LIMIT 2165 //the span_count is guaranteed to be <= LARGE_CLASS_COUNT 2166 size += SPAN_HEADER_SIZE; 2167 size_t span_count = size >> _memory_span_size_shift; 2168 if (size & (_memory_span_size - 1)) 2169 ++span_count; 2170 2171 //Find a span in one of the cache levels 2172 span_t* span = _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE); 2173 if (!span) 2174 return span; 2175 2176 //Mark span as owned by this heap and set base data 2177 rpmalloc_assert(span->span_count >= span_count, "Internal failure"); 2178 span->size_class = SIZE_CLASS_LARGE; 2179 span->heap = heap; 2180 2181#if RPMALLOC_FIRST_CLASS_HEAPS 2182 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2183#endif 2184 ++heap->full_span_count; 2185 2186 return pointer_offset(span, SPAN_HEADER_SIZE); 2187} 2188 2189//! Allocate a huge block by mapping memory pages directly 2190static void* 2191_rpmalloc_allocate_huge(heap_t* heap, size_t size) { 2192 rpmalloc_assert(heap, "No thread heap"); 2193 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 2194 size += SPAN_HEADER_SIZE; 2195 size_t num_pages = size >> _memory_page_size_shift; 2196 if (size & (_memory_page_size - 1)) 2197 ++num_pages; 2198 size_t align_offset = 0; 2199 span_t* span = (span_t*)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset); 2200 if (!span) 2201 return span; 2202 2203 //Store page count in span_count 2204 span->size_class = SIZE_CLASS_HUGE; 2205 span->span_count = (uint32_t)num_pages; 2206 span->align_offset = (uint32_t)align_offset; 2207 span->heap = heap; 2208 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2209 2210#if RPMALLOC_FIRST_CLASS_HEAPS 2211 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2212#endif 2213 ++heap->full_span_count; 2214 2215 return pointer_offset(span, SPAN_HEADER_SIZE); 2216} 2217 2218//! Allocate a block of the given size 2219static void* 2220_rpmalloc_allocate(heap_t* heap, size_t size) { 2221 _rpmalloc_stat_add64(&_allocation_counter, 1); 2222 if (EXPECTED(size <= SMALL_SIZE_LIMIT)) 2223 return _rpmalloc_allocate_small(heap, size); 2224 else if (size <= _memory_medium_size_limit) 2225 return _rpmalloc_allocate_medium(heap, size); 2226 else if (size <= LARGE_SIZE_LIMIT) 2227 return _rpmalloc_allocate_large(heap, size); 2228 return _rpmalloc_allocate_huge(heap, size); 2229} 2230 2231static void* 2232_rpmalloc_aligned_allocate(heap_t* heap, size_t alignment, size_t size) { 2233 if (alignment <= SMALL_GRANULARITY) 2234 return _rpmalloc_allocate(heap, size); 2235 2236#if ENABLE_VALIDATE_ARGS 2237 if ((size + alignment) < size) { 2238 errno = EINVAL; 2239 return 0; 2240 } 2241 if (alignment & (alignment - 1)) { 2242 errno = EINVAL; 2243 return 0; 2244 } 2245#endif 2246 2247 if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) { 2248 // If alignment is less or equal to span header size (which is power of two), 2249 // and size aligned to span header size multiples is less than size + alignment, 2250 // then use natural alignment of blocks to provide alignment 2251 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE; 2252 rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE), "Failed alignment calculation"); 2253 if (multiple_size <= (size + alignment)) 2254 return _rpmalloc_allocate(heap, multiple_size); 2255 } 2256 2257 void* ptr = 0; 2258 size_t align_mask = alignment - 1; 2259 if (alignment <= _memory_page_size) { 2260 ptr = _rpmalloc_allocate(heap, size + alignment); 2261 if ((uintptr_t)ptr & align_mask) { 2262 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2263 //Mark as having aligned blocks 2264 span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask); 2265 span->flags |= SPAN_FLAG_ALIGNED_BLOCKS; 2266 } 2267 return ptr; 2268 } 2269 2270 // Fallback to mapping new pages for this request. Since pointers passed 2271 // to rpfree must be able to reach the start of the span by bitmasking of 2272 // the address with the span size, the returned aligned pointer from this 2273 // function must be with a span size of the start of the mapped area. 2274 // In worst case this requires us to loop and map pages until we get a 2275 // suitable memory address. It also means we can never align to span size 2276 // or greater, since the span header will push alignment more than one 2277 // span size away from span start (thus causing pointer mask to give us 2278 // an invalid span start on free) 2279 if (alignment & align_mask) { 2280 errno = EINVAL; 2281 return 0; 2282 } 2283 if (alignment >= _memory_span_size) { 2284 errno = EINVAL; 2285 return 0; 2286 } 2287 2288 size_t extra_pages = alignment / _memory_page_size; 2289 2290 // Since each span has a header, we will at least need one extra memory page 2291 size_t num_pages = 1 + (size / _memory_page_size); 2292 if (size & (_memory_page_size - 1)) 2293 ++num_pages; 2294 2295 if (extra_pages > num_pages) 2296 num_pages = 1 + extra_pages; 2297 2298 size_t original_pages = num_pages; 2299 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2; 2300 if (limit_pages < (original_pages * 2)) 2301 limit_pages = original_pages * 2; 2302 2303 size_t mapped_size, align_offset; 2304 span_t* span; 2305 2306retry: 2307 align_offset = 0; 2308 mapped_size = num_pages * _memory_page_size; 2309 2310 span = (span_t*)_rpmalloc_mmap(mapped_size, &align_offset); 2311 if (!span) { 2312 errno = ENOMEM; 2313 return 0; 2314 } 2315 ptr = pointer_offset(span, SPAN_HEADER_SIZE); 2316 2317 if ((uintptr_t)ptr & align_mask) 2318 ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment); 2319 2320 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) || 2321 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) || 2322 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) { 2323 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size); 2324 ++num_pages; 2325 if (num_pages > limit_pages) { 2326 errno = EINVAL; 2327 return 0; 2328 } 2329 goto retry; 2330 } 2331 2332 //Store page count in span_count 2333 span->size_class = SIZE_CLASS_HUGE; 2334 span->span_count = (uint32_t)num_pages; 2335 span->align_offset = (uint32_t)align_offset; 2336 span->heap = heap; 2337 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak); 2338 2339#if RPMALLOC_FIRST_CLASS_HEAPS 2340 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span); 2341#endif 2342 ++heap->full_span_count; 2343 2344 _rpmalloc_stat_add64(&_allocation_counter, 1); 2345 2346 return ptr; 2347} 2348 2349 2350//////////// 2351/// 2352/// Deallocation entry points 2353/// 2354////// 2355 2356//! Deallocate the given small/medium memory block in the current thread local heap 2357static void 2358_rpmalloc_deallocate_direct_small_or_medium(span_t* span, void* block) { 2359 heap_t* heap = span->heap; 2360 rpmalloc_assert(heap->owner_thread == get_thread_id() || !heap->owner_thread || heap->finalize, "Internal failure"); 2361 //Add block to free list 2362 if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) { 2363 span->used_count = span->block_count; 2364#if RPMALLOC_FIRST_CLASS_HEAPS 2365 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span); 2366#endif 2367 _rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span); 2368 --heap->full_span_count; 2369 } 2370 *((void**)block) = span->free_list; 2371 --span->used_count; 2372 span->free_list = block; 2373 if (UNEXPECTED(span->used_count == span->list_size)) { 2374 // If there are no used blocks it is guaranteed that no other external thread is accessing the span 2375 if (span->used_count) { 2376 // Make sure we have synchronized the deferred list and list size by using acquire semantics 2377 // and guarantee that no external thread is accessing span concurrently 2378 void* free_list; 2379 do { 2380 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2381 } while (free_list == INVALID_POINTER); 2382 atomic_store_ptr_release(&span->free_list_deferred, free_list); 2383 } 2384 _rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span); 2385 _rpmalloc_span_release_to_cache(heap, span); 2386 } 2387} 2388 2389static void 2390_rpmalloc_deallocate_defer_free_span(heap_t* heap, span_t* span) { 2391 if (span->size_class != SIZE_CLASS_HUGE) 2392 _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred); 2393 //This list does not need ABA protection, no mutable side state 2394 do { 2395 span->free_list = (void*)atomic_load_ptr(&heap->span_free_deferred); 2396 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list)); 2397} 2398 2399//! Put the block in the deferred free list of the owning span 2400static void 2401_rpmalloc_deallocate_defer_small_or_medium(span_t* span, void* block) { 2402 // The memory ordering here is a bit tricky, to avoid having to ABA protect 2403 // the deferred free list to avoid desynchronization of list and list size 2404 // we need to have acquire semantics on successful CAS of the pointer to 2405 // guarantee the list_size variable validity + release semantics on pointer store 2406 void* free_list; 2407 do { 2408 free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER); 2409 } while (free_list == INVALID_POINTER); 2410 *((void**)block) = free_list; 2411 uint32_t free_count = ++span->list_size; 2412 int all_deferred_free = (free_count == span->block_count); 2413 atomic_store_ptr_release(&span->free_list_deferred, block); 2414 if (all_deferred_free) { 2415 // Span was completely freed by this block. Due to the INVALID_POINTER spin lock 2416 // no other thread can reach this state simultaneously on this span. 2417 // Safe to move to owner heap deferred cache 2418 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2419 } 2420} 2421 2422static void 2423_rpmalloc_deallocate_small_or_medium(span_t* span, void* p) { 2424 _rpmalloc_stat_inc_free(span->heap, span->size_class); 2425 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) { 2426 //Realign pointer to block start 2427 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2428 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2429 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size)); 2430 } 2431 //Check if block belongs to this heap or if deallocation should be deferred 2432#if RPMALLOC_FIRST_CLASS_HEAPS 2433 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2434#else 2435 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2436#endif 2437 if (!defer) 2438 _rpmalloc_deallocate_direct_small_or_medium(span, p); 2439 else 2440 _rpmalloc_deallocate_defer_small_or_medium(span, p); 2441} 2442 2443//! Deallocate the given large memory block to the current heap 2444static void 2445_rpmalloc_deallocate_large(span_t* span) { 2446 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class"); 2447 rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2448 rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted"); 2449 //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap 2450#if RPMALLOC_FIRST_CLASS_HEAPS 2451 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2452#else 2453 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2454#endif 2455 if (defer) { 2456 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2457 return; 2458 } 2459 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2460 --span->heap->full_span_count; 2461#if RPMALLOC_FIRST_CLASS_HEAPS 2462 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2463#endif 2464#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS 2465 //Decrease counter 2466 size_t idx = span->span_count - 1; 2467 atomic_decr32(&span->heap->span_use[idx].current); 2468#endif 2469 heap_t* heap = span->heap; 2470 rpmalloc_assert(heap, "No thread heap"); 2471#if ENABLE_THREAD_CACHE 2472 const int set_as_reserved = ((span->span_count > 1) && (heap->span_cache.count == 0) && !heap->finalize && !heap->spans_reserved); 2473#else 2474 const int set_as_reserved = ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved); 2475#endif 2476 if (set_as_reserved) { 2477 heap->span_reserve = span; 2478 heap->spans_reserved = span->span_count; 2479 if (span->flags & SPAN_FLAG_MASTER) { 2480 heap->span_reserve_master = span; 2481 } else { //SPAN_FLAG_SUBSPAN 2482 span_t* master = (span_t*)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size)); 2483 heap->span_reserve_master = master; 2484 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted"); 2485 rpmalloc_assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count, "Master span count corrupted"); 2486 } 2487 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved); 2488 } else { 2489 //Insert into cache list 2490 _rpmalloc_heap_cache_insert(heap, span); 2491 } 2492} 2493 2494//! Deallocate the given huge span 2495static void 2496_rpmalloc_deallocate_huge(span_t* span) { 2497 rpmalloc_assert(span->heap, "No span heap"); 2498#if RPMALLOC_FIRST_CLASS_HEAPS 2499 int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2500#else 2501 int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize); 2502#endif 2503 if (defer) { 2504 _rpmalloc_deallocate_defer_free_span(span->heap, span); 2505 return; 2506 } 2507 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted"); 2508 --span->heap->full_span_count; 2509#if RPMALLOC_FIRST_CLASS_HEAPS 2510 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span); 2511#endif 2512 2513 //Oversized allocation, page count is stored in span_count 2514 size_t num_pages = span->span_count; 2515 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size); 2516 _rpmalloc_stat_sub(&_huge_pages_current, num_pages); 2517} 2518 2519//! Deallocate the given block 2520static void 2521_rpmalloc_deallocate(void* p) { 2522 _rpmalloc_stat_add64(&_deallocation_counter, 1); 2523 //Grab the span (always at start of span, using span alignment) 2524 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2525 if (UNEXPECTED(!span)) 2526 return; 2527 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) 2528 _rpmalloc_deallocate_small_or_medium(span, p); 2529 else if (span->size_class == SIZE_CLASS_LARGE) 2530 _rpmalloc_deallocate_large(span); 2531 else 2532 _rpmalloc_deallocate_huge(span); 2533} 2534 2535//////////// 2536/// 2537/// Reallocation entry points 2538/// 2539////// 2540 2541static size_t 2542_rpmalloc_usable_size(void* p); 2543 2544//! Reallocate the given block to the given size 2545static void* 2546_rpmalloc_reallocate(heap_t* heap, void* p, size_t size, size_t oldsize, unsigned int flags) { 2547 if (p) { 2548 //Grab the span using guaranteed span alignment 2549 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2550 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) { 2551 //Small/medium sized block 2552 rpmalloc_assert(span->span_count == 1, "Span counter corrupted"); 2553 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2554 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start); 2555 uint32_t block_idx = block_offset / span->block_size; 2556 void* block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size); 2557 if (!oldsize) 2558 oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block)); 2559 if ((size_t)span->block_size >= size) { 2560 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2561 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2562 memmove(block, p, oldsize); 2563 return block; 2564 } 2565 } else if (span->size_class == SIZE_CLASS_LARGE) { 2566 //Large block 2567 size_t total_size = size + SPAN_HEADER_SIZE; 2568 size_t num_spans = total_size >> _memory_span_size_shift; 2569 if (total_size & (_memory_span_mask - 1)) 2570 ++num_spans; 2571 size_t current_spans = span->span_count; 2572 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2573 if (!oldsize) 2574 oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2575 if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) { 2576 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2577 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2578 memmove(block, p, oldsize); 2579 return block; 2580 } 2581 } else { 2582 //Oversized block 2583 size_t total_size = size + SPAN_HEADER_SIZE; 2584 size_t num_pages = total_size >> _memory_page_size_shift; 2585 if (total_size & (_memory_page_size - 1)) 2586 ++num_pages; 2587 //Page count is stored in span_count 2588 size_t current_pages = span->span_count; 2589 void* block = pointer_offset(span, SPAN_HEADER_SIZE); 2590 if (!oldsize) 2591 oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE; 2592 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) { 2593 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed 2594 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE)) 2595 memmove(block, p, oldsize); 2596 return block; 2597 } 2598 } 2599 } else { 2600 oldsize = 0; 2601 } 2602 2603 if (!!(flags & RPMALLOC_GROW_OR_FAIL)) 2604 return 0; 2605 2606 //Size is greater than block size, need to allocate a new block and deallocate the old 2607 //Avoid hysteresis by overallocating if increase is small (below 37%) 2608 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3); 2609 size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size); 2610 void* block = _rpmalloc_allocate(heap, new_size); 2611 if (p && block) { 2612 if (!(flags & RPMALLOC_NO_PRESERVE)) 2613 memcpy(block, p, oldsize < new_size ? oldsize : new_size); 2614 _rpmalloc_deallocate(p); 2615 } 2616 2617 return block; 2618} 2619 2620static void* 2621_rpmalloc_aligned_reallocate(heap_t* heap, void* ptr, size_t alignment, size_t size, size_t oldsize, 2622 unsigned int flags) { 2623 if (alignment <= SMALL_GRANULARITY) 2624 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags); 2625 2626 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL); 2627 size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0); 2628 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) { 2629 if (no_alloc || (size >= (usablesize / 2))) 2630 return ptr; 2631 } 2632 // Aligned alloc marks span as having aligned blocks 2633 void* block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0); 2634 if (EXPECTED(block != 0)) { 2635 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) { 2636 if (!oldsize) 2637 oldsize = usablesize; 2638 memcpy(block, ptr, oldsize < size ? oldsize : size); 2639 } 2640 _rpmalloc_deallocate(ptr); 2641 } 2642 return block; 2643} 2644 2645 2646//////////// 2647/// 2648/// Initialization, finalization and utility 2649/// 2650////// 2651 2652//! Get the usable size of the given block 2653static size_t 2654_rpmalloc_usable_size(void* p) { 2655 //Grab the span using guaranteed span alignment 2656 span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask); 2657 if (span->size_class < SIZE_CLASS_COUNT) { 2658 //Small/medium block 2659 void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE); 2660 return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size); 2661 } 2662 if (span->size_class == SIZE_CLASS_LARGE) { 2663 //Large block 2664 size_t current_spans = span->span_count; 2665 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span); 2666 } 2667 //Oversized block, page count is stored in span_count 2668 size_t current_pages = span->span_count; 2669 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span); 2670} 2671 2672//! Adjust and optimize the size class properties for the given class 2673static void 2674_rpmalloc_adjust_size_class(size_t iclass) { 2675 size_t block_size = _memory_size_class[iclass].block_size; 2676 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size; 2677 2678 _memory_size_class[iclass].block_count = (uint16_t)block_count; 2679 _memory_size_class[iclass].class_idx = (uint16_t)iclass; 2680 2681 //Check if previous size classes can be merged 2682 if (iclass >= SMALL_CLASS_COUNT) { 2683 size_t prevclass = iclass; 2684 while (prevclass > 0) { 2685 --prevclass; 2686 //A class can be merged if number of pages and number of blocks are equal 2687 if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count) 2688 memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass])); 2689 else 2690 break; 2691 } 2692 } 2693} 2694 2695//! Initialize the allocator and setup global data 2696TRACY_API int 2697rpmalloc_initialize(void) { 2698 if (_rpmalloc_initialized) { 2699 rpmalloc_thread_initialize(); 2700 return 0; 2701 } 2702 return rpmalloc_initialize_config(0); 2703} 2704 2705int 2706rpmalloc_initialize_config(const rpmalloc_config_t* config) { 2707 if (_rpmalloc_initialized) { 2708 rpmalloc_thread_initialize(); 2709 return 0; 2710 } 2711 _rpmalloc_initialized = 1; 2712 2713 if (config) 2714 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t)); 2715 else 2716 memset(&_memory_config, 0, sizeof(rpmalloc_config_t)); 2717 2718 if (!_memory_config.memory_map || !_memory_config.memory_unmap) { 2719 _memory_config.memory_map = _rpmalloc_mmap_os; 2720 _memory_config.memory_unmap = _rpmalloc_unmap_os; 2721 } 2722 2723#if PLATFORM_WINDOWS 2724 SYSTEM_INFO system_info; 2725 memset(&system_info, 0, sizeof(system_info)); 2726 GetSystemInfo(&system_info); 2727 _memory_map_granularity = system_info.dwAllocationGranularity; 2728#else 2729 _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE); 2730#endif 2731 2732#if RPMALLOC_CONFIGURABLE 2733 _memory_page_size = _memory_config.page_size; 2734#else 2735 _memory_page_size = 0; 2736#endif 2737 _memory_huge_pages = 0; 2738 if (!_memory_page_size) { 2739#if PLATFORM_WINDOWS 2740 _memory_page_size = system_info.dwPageSize; 2741#else 2742 _memory_page_size = _memory_map_granularity; 2743 if (_memory_config.enable_huge_pages) { 2744#if defined(__linux__) 2745 size_t huge_page_size = 0; 2746 FILE* meminfo = fopen("/proc/meminfo", "r"); 2747 if (meminfo) { 2748 char line[128]; 2749 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) { 2750 line[sizeof(line) - 1] = 0; 2751 if (strstr(line, "Hugepagesize:")) 2752 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024; 2753 } 2754 fclose(meminfo); 2755 } 2756 if (huge_page_size) { 2757 _memory_huge_pages = 1; 2758 _memory_page_size = huge_page_size; 2759 _memory_map_granularity = huge_page_size; 2760 } 2761#elif defined(__FreeBSD__) 2762 int rc; 2763 size_t sz = sizeof(rc); 2764 2765 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) { 2766 _memory_huge_pages = 1; 2767 _memory_page_size = 2 * 1024 * 1024; 2768 _memory_map_granularity = _memory_page_size; 2769 } 2770#elif defined(__APPLE__) || defined(__NetBSD__) 2771 _memory_huge_pages = 1; 2772 _memory_page_size = 2 * 1024 * 1024; 2773 _memory_map_granularity = _memory_page_size; 2774#endif 2775 } 2776#endif 2777 } else { 2778 if (_memory_config.enable_huge_pages) 2779 _memory_huge_pages = 1; 2780 } 2781 2782#if PLATFORM_WINDOWS 2783 if (_memory_config.enable_huge_pages) { 2784 HANDLE token = 0; 2785 size_t large_page_minimum = GetLargePageMinimum(); 2786 if (large_page_minimum) 2787 OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); 2788 if (token) { 2789 LUID luid; 2790 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) { 2791 TOKEN_PRIVILEGES token_privileges; 2792 memset(&token_privileges, 0, sizeof(token_privileges)); 2793 token_privileges.PrivilegeCount = 1; 2794 token_privileges.Privileges[0].Luid = luid; 2795 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; 2796 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) { 2797 if (GetLastError() == ERROR_SUCCESS) 2798 _memory_huge_pages = 1; 2799 } 2800 } 2801 CloseHandle(token); 2802 } 2803 if (_memory_huge_pages) { 2804 if (large_page_minimum > _memory_page_size) 2805 _memory_page_size = large_page_minimum; 2806 if (large_page_minimum > _memory_map_granularity) 2807 _memory_map_granularity = large_page_minimum; 2808 } 2809 } 2810#endif 2811 2812 size_t min_span_size = 256; 2813 size_t max_page_size; 2814#if UINTPTR_MAX > 0xFFFFFFFF 2815 max_page_size = 4096ULL * 1024ULL * 1024ULL; 2816#else 2817 max_page_size = 4 * 1024 * 1024; 2818#endif 2819 if (_memory_page_size < min_span_size) 2820 _memory_page_size = min_span_size; 2821 if (_memory_page_size > max_page_size) 2822 _memory_page_size = max_page_size; 2823 _memory_page_size_shift = 0; 2824 size_t page_size_bit = _memory_page_size; 2825 while (page_size_bit != 1) { 2826 ++_memory_page_size_shift; 2827 page_size_bit >>= 1; 2828 } 2829 _memory_page_size = ((size_t)1 << _memory_page_size_shift); 2830 2831#if RPMALLOC_CONFIGURABLE 2832 if (!_memory_config.span_size) { 2833 _memory_span_size = _memory_default_span_size; 2834 _memory_span_size_shift = _memory_default_span_size_shift; 2835 _memory_span_mask = _memory_default_span_mask; 2836 } else { 2837 size_t span_size = _memory_config.span_size; 2838 if (span_size > (256 * 1024)) 2839 span_size = (256 * 1024); 2840 _memory_span_size = 4096; 2841 _memory_span_size_shift = 12; 2842 while (_memory_span_size < span_size) { 2843 _memory_span_size <<= 1; 2844 ++_memory_span_size_shift; 2845 } 2846 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1); 2847 } 2848#endif 2849 2850 _memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT); 2851 if ((_memory_span_size * _memory_span_map_count) < _memory_page_size) 2852 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2853 if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size)) 2854 _memory_span_map_count = (_memory_page_size / _memory_span_size); 2855 _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT) ? DEFAULT_SPAN_MAP_COUNT : _memory_span_map_count; 2856 2857 _memory_config.page_size = _memory_page_size; 2858 _memory_config.span_size = _memory_span_size; 2859 _memory_config.span_map_count = _memory_span_map_count; 2860 _memory_config.enable_huge_pages = _memory_huge_pages; 2861 2862#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || defined(__TINYC__) 2863 if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc)) 2864 return -1; 2865#endif 2866#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2867 fls_key = FlsAlloc(&_rpmalloc_thread_destructor); 2868#endif 2869 2870 //Setup all small and medium size classes 2871 size_t iclass = 0; 2872 _memory_size_class[iclass].block_size = SMALL_GRANULARITY; 2873 _rpmalloc_adjust_size_class(iclass); 2874 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) { 2875 size_t size = iclass * SMALL_GRANULARITY; 2876 _memory_size_class[iclass].block_size = (uint32_t)size; 2877 _rpmalloc_adjust_size_class(iclass); 2878 } 2879 //At least two blocks per span, then fall back to large allocations 2880 _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1; 2881 if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT) 2882 _memory_medium_size_limit = MEDIUM_SIZE_LIMIT; 2883 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) { 2884 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY); 2885 if (size > _memory_medium_size_limit) 2886 break; 2887 _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size; 2888 _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass); 2889 } 2890 2891 _memory_orphan_heaps = 0; 2892#if RPMALLOC_FIRST_CLASS_HEAPS 2893 _memory_first_class_orphan_heaps = 0; 2894#endif 2895#if ENABLE_STATISTICS 2896 atomic_store32(&_memory_active_heaps, 0); 2897 atomic_store32(&_mapped_pages, 0); 2898 _mapped_pages_peak = 0; 2899 atomic_store32(&_master_spans, 0); 2900 atomic_store32(&_mapped_total, 0); 2901 atomic_store32(&_unmapped_total, 0); 2902 atomic_store32(&_mapped_pages_os, 0); 2903 atomic_store32(&_huge_pages_current, 0); 2904 _huge_pages_peak = 0; 2905#endif 2906 memset(_memory_heaps, 0, sizeof(_memory_heaps)); 2907 atomic_store32_release(&_memory_global_lock, 0); 2908 2909 //Initialize this thread 2910 rpmalloc_thread_initialize(); 2911 return 0; 2912} 2913 2914//! Finalize the allocator 2915TRACY_API void 2916rpmalloc_finalize(void) { 2917 rpmalloc_thread_finalize(1); 2918 //rpmalloc_dump_statistics(stdout); 2919 2920 if (_memory_global_reserve) { 2921 atomic_add32(&_memory_global_reserve_master->remaining_spans, -(int32_t)_memory_global_reserve_count); 2922 _memory_global_reserve_master = 0; 2923 _memory_global_reserve_count = 0; 2924 _memory_global_reserve = 0; 2925 } 2926 atomic_store32_release(&_memory_global_lock, 0); 2927 2928 //Free all thread caches and fully free spans 2929 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 2930 heap_t* heap = _memory_heaps[list_idx]; 2931 while (heap) { 2932 heap_t* next_heap = heap->next_heap; 2933 heap->finalize = 1; 2934 _rpmalloc_heap_global_finalize(heap); 2935 heap = next_heap; 2936 } 2937 } 2938 2939#if ENABLE_GLOBAL_CACHE 2940 //Free global caches 2941 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 2942 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]); 2943#endif 2944 2945#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD 2946 pthread_key_delete(_memory_thread_heap); 2947#endif 2948#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2949 FlsFree(fls_key); 2950 fls_key = 0; 2951#endif 2952#if ENABLE_STATISTICS 2953 //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code 2954 rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected"); 2955 rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0, "Memory leak detected"); 2956#endif 2957 2958 _rpmalloc_initialized = 0; 2959} 2960 2961//! Initialize thread, assign heap 2962TRACY_API void 2963rpmalloc_thread_initialize(void) { 2964 if (!get_thread_heap_raw()) { 2965 heap_t* heap = _rpmalloc_heap_allocate(0); 2966 if (heap) { 2967 _rpmalloc_stat_inc(&_memory_active_heaps); 2968 set_thread_heap(heap); 2969#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2970 FlsSetValue(fls_key, heap); 2971#endif 2972 } 2973 } 2974} 2975 2976//! Finalize thread, orphan heap 2977TRACY_API void 2978rpmalloc_thread_finalize(int release_caches) { 2979 heap_t* heap = get_thread_heap_raw(); 2980 if (heap) 2981 _rpmalloc_heap_release_raw(heap, release_caches); 2982 set_thread_heap(0); 2983#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK) 2984 FlsSetValue(fls_key, 0); 2985#endif 2986} 2987 2988int 2989rpmalloc_is_thread_initialized(void) { 2990 return (get_thread_heap_raw() != 0) ? 1 : 0; 2991} 2992 2993const rpmalloc_config_t* 2994rpmalloc_config(void) { 2995 return &_memory_config; 2996} 2997 2998// Extern interface 2999 3000TRACY_API RPMALLOC_ALLOCATOR void* 3001rpmalloc(size_t size) { 3002#if ENABLE_VALIDATE_ARGS 3003 if (size >= MAX_ALLOC_SIZE) { 3004 errno = EINVAL; 3005 return 0; 3006 } 3007#endif 3008 heap_t* heap = get_thread_heap(); 3009 return _rpmalloc_allocate(heap, size); 3010} 3011 3012TRACY_API void 3013rpfree(void* ptr) { 3014 _rpmalloc_deallocate(ptr); 3015} 3016 3017extern inline RPMALLOC_ALLOCATOR void* 3018rpcalloc(size_t num, size_t size) { 3019 size_t total; 3020#if ENABLE_VALIDATE_ARGS 3021#if PLATFORM_WINDOWS 3022 int err = SizeTMult(num, size, &total); 3023 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3024 errno = EINVAL; 3025 return 0; 3026 } 3027#else 3028 int err = __builtin_umull_overflow(num, size, &total); 3029 if (err || (total >= MAX_ALLOC_SIZE)) { 3030 errno = EINVAL; 3031 return 0; 3032 } 3033#endif 3034#else 3035 total = num * size; 3036#endif 3037 heap_t* heap = get_thread_heap(); 3038 void* block = _rpmalloc_allocate(heap, total); 3039 if (block) 3040 memset(block, 0, total); 3041 return block; 3042} 3043 3044TRACY_API RPMALLOC_ALLOCATOR void* 3045rprealloc(void* ptr, size_t size) { 3046#if ENABLE_VALIDATE_ARGS 3047 if (size >= MAX_ALLOC_SIZE) { 3048 errno = EINVAL; 3049 return ptr; 3050 } 3051#endif 3052 heap_t* heap = get_thread_heap(); 3053 return _rpmalloc_reallocate(heap, ptr, size, 0, 0); 3054} 3055 3056extern RPMALLOC_ALLOCATOR void* 3057rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, 3058 unsigned int flags) { 3059#if ENABLE_VALIDATE_ARGS 3060 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3061 errno = EINVAL; 3062 return 0; 3063 } 3064#endif 3065 heap_t* heap = get_thread_heap(); 3066 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags); 3067} 3068 3069extern RPMALLOC_ALLOCATOR void* 3070rpaligned_alloc(size_t alignment, size_t size) { 3071 heap_t* heap = get_thread_heap(); 3072 return _rpmalloc_aligned_allocate(heap, alignment, size); 3073} 3074 3075extern inline RPMALLOC_ALLOCATOR void* 3076rpaligned_calloc(size_t alignment, size_t num, size_t size) { 3077 size_t total; 3078#if ENABLE_VALIDATE_ARGS 3079#if PLATFORM_WINDOWS 3080 int err = SizeTMult(num, size, &total); 3081 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3082 errno = EINVAL; 3083 return 0; 3084 } 3085#else 3086 int err = __builtin_umull_overflow(num, size, &total); 3087 if (err || (total >= MAX_ALLOC_SIZE)) { 3088 errno = EINVAL; 3089 return 0; 3090 } 3091#endif 3092#else 3093 total = num * size; 3094#endif 3095 void* block = rpaligned_alloc(alignment, total); 3096 if (block) 3097 memset(block, 0, total); 3098 return block; 3099} 3100 3101extern inline RPMALLOC_ALLOCATOR void* 3102rpmemalign(size_t alignment, size_t size) { 3103 return rpaligned_alloc(alignment, size); 3104} 3105 3106extern inline int 3107rpposix_memalign(void **memptr, size_t alignment, size_t size) { 3108 if (memptr) 3109 *memptr = rpaligned_alloc(alignment, size); 3110 else 3111 return EINVAL; 3112 return *memptr ? 0 : ENOMEM; 3113} 3114 3115extern inline size_t 3116rpmalloc_usable_size(void* ptr) { 3117 return (ptr ? _rpmalloc_usable_size(ptr) : 0); 3118} 3119 3120extern inline void 3121rpmalloc_thread_collect(void) { 3122} 3123 3124void 3125rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) { 3126 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t)); 3127 heap_t* heap = get_thread_heap_raw(); 3128 if (!heap) 3129 return; 3130 3131 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3132 size_class_t* size_class = _memory_size_class + iclass; 3133 span_t* span = heap->size_class[iclass].partial_span; 3134 while (span) { 3135 size_t free_count = span->list_size; 3136 size_t block_count = size_class->block_count; 3137 if (span->free_list_limit < block_count) 3138 block_count = span->free_list_limit; 3139 free_count += (block_count - span->used_count); 3140 stats->sizecache = free_count * size_class->block_size; 3141 span = span->next; 3142 } 3143 } 3144 3145#if ENABLE_THREAD_CACHE 3146 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3147 span_cache_t* span_cache; 3148 if (!iclass) 3149 span_cache = &heap->span_cache; 3150 else 3151 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3152 stats->spancache = span_cache->count * (iclass + 1) * _memory_span_size; 3153 } 3154#endif 3155 3156 span_t* deferred = (span_t*)atomic_load_ptr(&heap->span_free_deferred); 3157 while (deferred) { 3158 if (deferred->size_class != SIZE_CLASS_HUGE) 3159 stats->spancache = (size_t)deferred->span_count * _memory_span_size; 3160 deferred = (span_t*)deferred->free_list; 3161 } 3162 3163#if ENABLE_STATISTICS 3164 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global); 3165 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread); 3166 3167 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3168 stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current); 3169 stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high); 3170 stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global); 3171 stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global); 3172 stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache); 3173 stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache); 3174 stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved); 3175 stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved); 3176 stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls); 3177 } 3178 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3179 stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current); 3180 stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak; 3181 stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total); 3182 stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total); 3183 stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache); 3184 stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache); 3185 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved); 3186 stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls); 3187 } 3188#endif 3189} 3190 3191void 3192rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) { 3193 memset(stats, 0, sizeof(rpmalloc_global_statistics_t)); 3194#if ENABLE_STATISTICS 3195 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3196 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3197 stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3198 stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3199 stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3200 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size; 3201#endif 3202#if ENABLE_GLOBAL_CACHE 3203 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) 3204 stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size; 3205#endif 3206} 3207 3208#if ENABLE_STATISTICS 3209 3210static void 3211_memory_heap_dump_statistics(heap_t* heap, void* file) { 3212 fprintf(file, "Heap %d stats:\n", heap->id); 3213 fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n"); 3214 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3215 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) 3216 continue; 3217 fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass, 3218 atomic_load32(&heap->size_class_use[iclass].alloc_current), 3219 heap->size_class_use[iclass].alloc_peak, 3220 atomic_load32(&heap->size_class_use[iclass].alloc_total), 3221 atomic_load32(&heap->size_class_use[iclass].free_total), 3222 _memory_size_class[iclass].block_size, 3223 _memory_size_class[iclass].block_count, 3224 atomic_load32(&heap->size_class_use[iclass].spans_current), 3225 heap->size_class_use[iclass].spans_peak, 3226 ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024), 3227 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3228 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024), 3229 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024), 3230 atomic_load32(&heap->size_class_use[iclass].spans_map_calls)); 3231 } 3232 fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n"); 3233 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3234 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3235 continue; 3236 fprintf(file, "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1), 3237 atomic_load32(&heap->span_use[iclass].current), 3238 atomic_load32(&heap->span_use[iclass].high), 3239 atomic_load32(&heap->span_use[iclass].spans_deferred), 3240 ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3241#if ENABLE_THREAD_CACHE 3242 (unsigned int)(!iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count), 3243 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3244 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3245#else 3246 0, (size_t)0, (size_t)0, 3247#endif 3248 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3249 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024), 3250 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3251 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024), 3252 atomic_load32(&heap->span_use[iclass].spans_map_calls)); 3253 } 3254 fprintf(file, "Full spans: %zu\n", heap->full_span_count); 3255 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n"); 3256 fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024), (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024)); 3257} 3258 3259#endif 3260 3261void 3262rpmalloc_dump_statistics(void* file) { 3263#if ENABLE_STATISTICS 3264 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) { 3265 heap_t* heap = _memory_heaps[list_idx]; 3266 while (heap) { 3267 int need_dump = 0; 3268 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) { 3269 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) { 3270 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].free_total), "Heap statistics counter mismatch"); 3271 rpmalloc_assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls), "Heap statistics counter mismatch"); 3272 continue; 3273 } 3274 need_dump = 1; 3275 } 3276 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) { 3277 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls)) 3278 continue; 3279 need_dump = 1; 3280 } 3281 if (need_dump) 3282 _memory_heap_dump_statistics(heap, file); 3283 heap = heap->next_heap; 3284 } 3285 } 3286 fprintf(file, "Global stats:\n"); 3287 size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size; 3288 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size; 3289 fprintf(file, "HugeCurrentMiB HugePeakMiB\n"); 3290 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024)); 3291 3292 fprintf(file, "GlobalCacheMiB\n"); 3293 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3294 global_cache_t* cache = _memory_span_cache + iclass; 3295 size_t global_cache = (size_t)cache->count * iclass * _memory_span_size; 3296 3297 size_t global_overflow_cache = 0; 3298 span_t* span = cache->overflow; 3299 while (span) { 3300 global_overflow_cache += iclass * _memory_span_size; 3301 span = span->next; 3302 } 3303 if (global_cache || global_overflow_cache || cache->insert_count || cache->extract_count) 3304 fprintf(file, "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n", iclass + 1, global_cache / (size_t)(1024 * 1024), global_overflow_cache / (size_t)(1024 * 1024), cache->insert_count, cache->extract_count); 3305 } 3306 3307 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size; 3308 size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size; 3309 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size; 3310 size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size; 3311 size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size; 3312 fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n"); 3313 fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n", 3314 mapped / (size_t)(1024 * 1024), 3315 mapped_os / (size_t)(1024 * 1024), 3316 mapped_peak / (size_t)(1024 * 1024), 3317 mapped_total / (size_t)(1024 * 1024), 3318 unmapped_total / (size_t)(1024 * 1024)); 3319 3320 fprintf(file, "\n"); 3321#if 0 3322 int64_t allocated = atomic_load64(&_allocation_counter); 3323 int64_t deallocated = atomic_load64(&_deallocation_counter); 3324 fprintf(file, "Allocation count: %lli\n", allocated); 3325 fprintf(file, "Deallocation count: %lli\n", deallocated); 3326 fprintf(file, "Current allocations: %lli\n", (allocated - deallocated)); 3327 fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans)); 3328 fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans)); 3329#endif 3330#endif 3331 (void)sizeof(file); 3332} 3333 3334#if RPMALLOC_FIRST_CLASS_HEAPS 3335 3336extern inline rpmalloc_heap_t* 3337rpmalloc_heap_acquire(void) { 3338 // Must be a pristine heap from newly mapped memory pages, or else memory blocks 3339 // could already be allocated from the heap which would (wrongly) be released when 3340 // heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be 3341 // pristine from the dedicated orphan list can be used. 3342 heap_t* heap = _rpmalloc_heap_allocate(1); 3343 heap->owner_thread = 0; 3344 _rpmalloc_stat_inc(&_memory_active_heaps); 3345 return heap; 3346} 3347 3348extern inline void 3349rpmalloc_heap_release(rpmalloc_heap_t* heap) { 3350 if (heap) 3351 _rpmalloc_heap_release(heap, 1, 1); 3352} 3353 3354extern inline RPMALLOC_ALLOCATOR void* 3355rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) { 3356#if ENABLE_VALIDATE_ARGS 3357 if (size >= MAX_ALLOC_SIZE) { 3358 errno = EINVAL; 3359 return 0; 3360 } 3361#endif 3362 return _rpmalloc_allocate(heap, size); 3363} 3364 3365extern inline RPMALLOC_ALLOCATOR void* 3366rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) { 3367#if ENABLE_VALIDATE_ARGS 3368 if (size >= MAX_ALLOC_SIZE) { 3369 errno = EINVAL; 3370 return 0; 3371 } 3372#endif 3373 return _rpmalloc_aligned_allocate(heap, alignment, size); 3374} 3375 3376extern inline RPMALLOC_ALLOCATOR void* 3377rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) { 3378 return rpmalloc_heap_aligned_calloc(heap, 0, num, size); 3379} 3380 3381extern inline RPMALLOC_ALLOCATOR void* 3382rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) { 3383 size_t total; 3384#if ENABLE_VALIDATE_ARGS 3385#if PLATFORM_WINDOWS 3386 int err = SizeTMult(num, size, &total); 3387 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) { 3388 errno = EINVAL; 3389 return 0; 3390 } 3391#else 3392 int err = __builtin_umull_overflow(num, size, &total); 3393 if (err || (total >= MAX_ALLOC_SIZE)) { 3394 errno = EINVAL; 3395 return 0; 3396 } 3397#endif 3398#else 3399 total = num * size; 3400#endif 3401 void* block = _rpmalloc_aligned_allocate(heap, alignment, total); 3402 if (block) 3403 memset(block, 0, total); 3404 return block; 3405} 3406 3407extern inline RPMALLOC_ALLOCATOR void* 3408rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) { 3409#if ENABLE_VALIDATE_ARGS 3410 if (size >= MAX_ALLOC_SIZE) { 3411 errno = EINVAL; 3412 return ptr; 3413 } 3414#endif 3415 return _rpmalloc_reallocate(heap, ptr, size, 0, flags); 3416} 3417 3418extern inline RPMALLOC_ALLOCATOR void* 3419rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) { 3420#if ENABLE_VALIDATE_ARGS 3421 if ((size + alignment < size) || (alignment > _memory_page_size)) { 3422 errno = EINVAL; 3423 return 0; 3424 } 3425#endif 3426 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags); 3427} 3428 3429extern inline void 3430rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) { 3431 (void)sizeof(heap); 3432 _rpmalloc_deallocate(ptr); 3433} 3434 3435extern inline void 3436rpmalloc_heap_free_all(rpmalloc_heap_t* heap) { 3437 span_t* span; 3438 span_t* next_span; 3439 3440 _rpmalloc_heap_cache_adopt_deferred(heap, 0); 3441 3442 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3443 span = heap->size_class[iclass].partial_span; 3444 while (span) { 3445 next_span = span->next; 3446 _rpmalloc_heap_cache_insert(heap, span); 3447 span = next_span; 3448 } 3449 heap->size_class[iclass].partial_span = 0; 3450 span = heap->full_span[iclass]; 3451 while (span) { 3452 next_span = span->next; 3453 _rpmalloc_heap_cache_insert(heap, span); 3454 span = next_span; 3455 } 3456 } 3457 memset(heap->size_class, 0, sizeof(heap->size_class)); 3458 memset(heap->full_span, 0, sizeof(heap->full_span)); 3459 3460 span = heap->large_huge_span; 3461 while (span) { 3462 next_span = span->next; 3463 if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE)) 3464 _rpmalloc_deallocate_huge(span); 3465 else 3466 _rpmalloc_heap_cache_insert(heap, span); 3467 span = next_span; 3468 } 3469 heap->large_huge_span = 0; 3470 heap->full_span_count = 0; 3471 3472#if ENABLE_THREAD_CACHE 3473 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3474 span_cache_t* span_cache; 3475 if (!iclass) 3476 span_cache = &heap->span_cache; 3477 else 3478 span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1)); 3479 if (!span_cache->count) 3480 continue; 3481#if ENABLE_GLOBAL_CACHE 3482 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size); 3483 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count); 3484 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count); 3485#else 3486 for (size_t ispan = 0; ispan < span_cache->count; ++ispan) 3487 _rpmalloc_span_unmap(span_cache->span[ispan]); 3488#endif 3489 span_cache->count = 0; 3490 } 3491#endif 3492 3493#if ENABLE_STATISTICS 3494 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) { 3495 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0); 3496 atomic_store32(&heap->size_class_use[iclass].spans_current, 0); 3497 } 3498 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) { 3499 atomic_store32(&heap->span_use[iclass].current, 0); 3500 } 3501#endif 3502} 3503 3504extern inline void 3505rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) { 3506 heap_t* prev_heap = get_thread_heap_raw(); 3507 if (prev_heap != heap) { 3508 set_thread_heap(heap); 3509 if (prev_heap) 3510 rpmalloc_heap_release(prev_heap); 3511 } 3512} 3513 3514#endif 3515 3516} 3517 3518#endif