Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

modules: Use a better scheme for refcounting

Current refcounting for modules (done if CONFIG_MODULE_UNLOAD=y) is
using a lot of memory.

Each 'struct module' contains an [NR_CPUS] array of full cache lines.

This patch uses existing infrastructure (percpu_modalloc() &
percpu_modfree()) to allocate percpu space for the refcount storage.

Instead of wasting NR_CPUS*128 bytes (on i386), we now use
nr_cpu_ids*sizeof(local_t) bytes.

On a typical distro, where NR_CPUS=8, shiping 2000 modules, we reduce
size of module files by about 2 Mbytes. (1Kb per module)

Instead of having all refcounters in the same memory node - with TLB misses
because of vmalloc() - this new implementation permits to have better
NUMA properties, since each CPU will use storage on its preferred node,
thanks to percpu storage.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Eric Dumazet and committed by
Linus Torvalds
720eba31 27421e21

+41 -19
+16 -9
include/linux/module.h
··· 219 219 220 220 #endif 221 221 222 - struct module_ref 223 - { 224 - local_t count; 225 - } ____cacheline_aligned; 226 - 227 222 enum module_state 228 223 { 229 224 MODULE_STATE_LIVE, ··· 339 344 /* Destruction function. */ 340 345 void (*exit)(void); 341 346 342 - /* Reference counts */ 343 - struct module_ref ref[NR_CPUS]; 347 + #ifdef CONFIG_SMP 348 + char *refptr; 349 + #else 350 + local_t ref; 351 + #endif 344 352 #endif 345 353 }; 346 354 #ifndef MODULE_ARCH_INIT ··· 393 395 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) 394 396 void symbol_put_addr(void *addr); 395 397 398 + static inline local_t *__module_ref_addr(struct module *mod, int cpu) 399 + { 400 + #ifdef CONFIG_SMP 401 + return (local_t *) (mod->refptr + per_cpu_offset(cpu)); 402 + #else 403 + return &mod->ref; 404 + #endif 405 + } 406 + 396 407 /* Sometimes we know we already have a refcount, and it's easier not 397 408 to handle the error case (which only happens with rmmod --wait). */ 398 409 static inline void __module_get(struct module *module) 399 410 { 400 411 if (module) { 401 412 BUG_ON(module_refcount(module) == 0); 402 - local_inc(&module->ref[get_cpu()].count); 413 + local_inc(__module_ref_addr(module, get_cpu())); 403 414 put_cpu(); 404 415 } 405 416 } ··· 420 413 if (module) { 421 414 unsigned int cpu = get_cpu(); 422 415 if (likely(module_is_live(module))) 423 - local_inc(&module->ref[cpu].count); 416 + local_inc(__module_ref_addr(module, cpu)); 424 417 else 425 418 ret = 0; 426 419 put_cpu();
+25 -10
kernel/module.c
··· 573 573 /* Init the unload section of the module. */ 574 574 static void module_unload_init(struct module *mod) 575 575 { 576 - unsigned int i; 576 + int cpu; 577 577 578 578 INIT_LIST_HEAD(&mod->modules_which_use_me); 579 - for (i = 0; i < NR_CPUS; i++) 580 - local_set(&mod->ref[i].count, 0); 579 + for_each_possible_cpu(cpu) 580 + local_set(__module_ref_addr(mod, cpu), 0); 581 581 /* Hold reference count during initialization. */ 582 - local_set(&mod->ref[raw_smp_processor_id()].count, 1); 582 + local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); 583 583 /* Backwards compatibility macros put refcount during init. */ 584 584 mod->waiter = current; 585 585 } ··· 717 717 718 718 unsigned int module_refcount(struct module *mod) 719 719 { 720 - unsigned int i, total = 0; 720 + unsigned int total = 0; 721 + int cpu; 721 722 722 - for (i = 0; i < NR_CPUS; i++) 723 - total += local_read(&mod->ref[i].count); 723 + for_each_possible_cpu(cpu) 724 + total += local_read(__module_ref_addr(mod, cpu)); 724 725 return total; 725 726 } 726 727 EXPORT_SYMBOL(module_refcount); ··· 895 894 { 896 895 if (module) { 897 896 unsigned int cpu = get_cpu(); 898 - local_dec(&module->ref[cpu].count); 897 + local_dec(__module_ref_addr(module, cpu)); 899 898 /* Maybe they're waiting for us to drop reference? */ 900 899 if (unlikely(!module_is_live(module))) 901 900 wake_up_process(module->waiter); ··· 1465 1464 kfree(mod->args); 1466 1465 if (mod->percpu) 1467 1466 percpu_modfree(mod->percpu); 1468 - 1467 + #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) 1468 + if (mod->refptr) 1469 + percpu_modfree(mod->refptr); 1470 + #endif 1469 1471 /* Free lock-classes: */ 1470 1472 lockdep_free_key_range(mod->module_core, mod->core_size); 1471 1473 ··· 2015 2011 if (err < 0) 2016 2012 goto free_mod; 2017 2013 2014 + #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) 2015 + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), 2016 + mod->name); 2017 + if (!mod->refptr) { 2018 + err = -ENOMEM; 2019 + goto free_mod; 2020 + } 2021 + #endif 2018 2022 if (pcpuindex) { 2019 2023 /* We have a special allocation for this section. */ 2020 2024 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, ··· 2030 2018 mod->name); 2031 2019 if (!percpu) { 2032 2020 err = -ENOMEM; 2033 - goto free_mod; 2021 + goto free_percpu; 2034 2022 } 2035 2023 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2036 2024 mod->percpu = percpu; ··· 2294 2282 free_percpu: 2295 2283 if (percpu) 2296 2284 percpu_modfree(percpu); 2285 + #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) 2286 + percpu_modfree(mod->refptr); 2287 + #endif 2297 2288 free_mod: 2298 2289 kfree(args); 2299 2290 free_hdr: