Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib/stackdepot: replace CONFIG_STACK_HASH_ORDER with automatic sizing

As Linus explained [1], setting the stackdepot hash table size as a config
option is suboptimal, especially as stackdepot becomes a dependency of
less "expert" subsystems than initially (e.g. DRM, networking,
SLUB_DEBUG):

: (a) it introduces a new compile-time question that isn't sane to ask
: a regular user, but is now exposed to regular users.

: (b) this by default uses 1MB of memory for a feature that didn't in
: the past, so now if you have small machines you need to make sure you
: make a special kernel config for them.

Ideally we would employ rhashtable for fully automatic resizing, which
should be feasible for many of the new users, but problematic for the
original users with restricted context that call __stack_depot_save() with
can_alloc == false, i.e. KASAN.

However we can easily remove the config option and scale the hash table
automatically with system memory. The STACK_HASH_MASK constant becomes
stack_hash_mask variable and is used only in one mask operation, so the
overhead should be negligible to none. For early allocation we can employ
the existing alloc_large_system_hash() function and perform similar
scaling for the late allocation.

The existing limits of the config option (between 4k and 1M buckets) are
preserved, and scaling factor is set to one bucket per 16kB memory so on
64bit the max 1M buckets (8MB memory) is achieved with 16GB system, while
a 1GB system will use 512kB.

Because KASAN is reported to need the maximum number of buckets even with
smaller amounts of memory [2], set it as such when kasan_enabled().

If needed, the automatic scaling could be complemented with a boot-time
kernel parameter, but it feels pointless to add it without a specific use
case.

[1] https://lore.kernel.org/all/CAHk-=wjC5nS+fnf6EzRD9yQRJApAhxx7gRB87ZV+pAWo9oVrTg@mail.gmail.com/
[2] https://lore.kernel.org/all/CACT4Y+Y4GZfXOru2z5tFPzFdaSUd+GFc6KVL=bsa0+1m197cQQ@mail.gmail.com/

Link: https://lkml.kernel.org/r/20220620150249.16814-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Vlastimil Babka and committed by
akpm
f9987921 62df90b5

+49 -19
-9
lib/Kconfig
··· 685 685 bool 686 686 select STACKDEPOT 687 687 688 - config STACK_HASH_ORDER 689 - int "stack depot hash size (12 => 4KB, 20 => 1024KB)" 690 - range 12 20 691 - default 20 692 - depends on STACKDEPOT 693 - help 694 - Select the hash size as a power of 2 for the stackdepot hash table. 695 - Choose a lower value to reduce the memory impact. 696 - 697 688 config REF_TRACKER 698 689 bool 699 690 depends on STACKTRACE_SUPPORT
+49 -10
lib/stackdepot.c
··· 32 32 #include <linux/string.h> 33 33 #include <linux/types.h> 34 34 #include <linux/memblock.h> 35 + #include <linux/kasan-enabled.h> 35 36 36 37 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) 37 38 ··· 146 145 return stack; 147 146 } 148 147 149 - #define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER) 150 - #define STACK_HASH_MASK (STACK_HASH_SIZE - 1) 148 + /* one hash table bucket entry per 16kB of memory */ 149 + #define STACK_HASH_SCALE 14 150 + /* limited between 4k and 1M buckets */ 151 + #define STACK_HASH_ORDER_MIN 12 152 + #define STACK_HASH_ORDER_MAX 20 151 153 #define STACK_HASH_SEED 0x9747b28c 154 + 155 + static unsigned int stack_hash_order; 156 + static unsigned int stack_hash_mask; 152 157 153 158 static bool stack_depot_disable; 154 159 static struct stack_record **stack_table; ··· 182 175 183 176 int __init stack_depot_early_init(void) 184 177 { 185 - size_t size; 178 + unsigned long entries = 0; 186 179 187 180 /* This is supposed to be called only once, from mm_init() */ 188 181 if (WARN_ON(__stack_depot_early_init_passed)) ··· 190 183 191 184 __stack_depot_early_init_passed = true; 192 185 186 + if (kasan_enabled() && !stack_hash_order) 187 + stack_hash_order = STACK_HASH_ORDER_MAX; 188 + 193 189 if (!__stack_depot_want_early_init || stack_depot_disable) 194 190 return 0; 195 191 196 - size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); 197 - pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", 198 - size); 199 - stack_table = memblock_alloc(size, SMP_CACHE_BYTES); 192 + if (stack_hash_order) 193 + entries = 1UL << stack_hash_order; 194 + stack_table = alloc_large_system_hash("stackdepot", 195 + sizeof(struct stack_record *), 196 + entries, 197 + STACK_HASH_SCALE, 198 + HASH_EARLY | HASH_ZERO, 199 + NULL, 200 + &stack_hash_mask, 201 + 1UL << STACK_HASH_ORDER_MIN, 202 + 1UL << STACK_HASH_ORDER_MAX); 200 203 201 204 if (!stack_table) { 202 205 pr_err("Stack Depot hash table allocation failed, disabling\n"); ··· 224 207 225 208 mutex_lock(&stack_depot_init_mutex); 226 209 if (!stack_depot_disable && !stack_table) { 227 - pr_info("Stack Depot allocating hash table with kvcalloc\n"); 228 - stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); 210 + unsigned long entries; 211 + int scale = STACK_HASH_SCALE; 212 + 213 + if (stack_hash_order) { 214 + entries = 1UL << stack_hash_order; 215 + } else { 216 + entries = nr_free_buffer_pages(); 217 + entries = roundup_pow_of_two(entries); 218 + 219 + if (scale > PAGE_SHIFT) 220 + entries >>= (scale - PAGE_SHIFT); 221 + else 222 + entries <<= (PAGE_SHIFT - scale); 223 + } 224 + 225 + if (entries < 1UL << STACK_HASH_ORDER_MIN) 226 + entries = 1UL << STACK_HASH_ORDER_MIN; 227 + if (entries > 1UL << STACK_HASH_ORDER_MAX) 228 + entries = 1UL << STACK_HASH_ORDER_MAX; 229 + 230 + pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n", 231 + entries); 232 + stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); 229 233 if (!stack_table) { 230 234 pr_err("Stack Depot hash table allocation failed, disabling\n"); 231 235 stack_depot_disable = true; 232 236 ret = -ENOMEM; 233 237 } 238 + stack_hash_mask = entries - 1; 234 239 } 235 240 mutex_unlock(&stack_depot_init_mutex); 236 241 return ret; ··· 425 386 goto fast_exit; 426 387 427 388 hash = hash_stack(entries, nr_entries); 428 - bucket = &stack_table[hash & STACK_HASH_MASK]; 389 + bucket = &stack_table[hash & stack_hash_mask]; 429 390 430 391 /* 431 392 * Fast path: look the stack trace up without locking.