Merge branch 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull percpu updates from Dennis Zhou:
"Percpu had a cleanup come in that makes use of the cpu bitmask helpers
instead of the current iterative approach.

This clean up then had an adverse interaction when clang's inlining
sensitivity is changed such that not all sites are inlined resulting
in modpost being upset with section mismatch due to percpu setup being
marked __init.

That was fixed by introducing __flatten to compiler_attributes.h"

* 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu:
percpu: fix clang modpost section mismatch
percpu: reduce the number of cpu distance comparisons

Linus Torvalds 5 years ago aa8e3291 5cf0fd59

+27 -15

2 changed files

expand all

include

linux

compiler_attributes.h

percpu.c

include/linux/compiler_attributes.h

··· 211 211 #endif 212 212 213 213 /* 214 + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes 215 + * clang: https://clang.llvm.org/docs/AttributeReference.html#flatten 216 + */ 217 + # define __flatten __attribute__((flatten)) 218 + 219 + /* 214 220 * Note the missing underscores. 215 221 * 216 222 * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute

+21 -15

mm/percpu.c

··· 69 69 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 70 70 71 71 #include <linux/bitmap.h> 72 + #include <linux/cpumask.h> 72 73 #include <linux/memblock.h> 73 74 #include <linux/err.h> 74 75 #include <linux/lcm.h> ··· 2663 2662 * On success, pointer to the new allocation_info is returned. On 2664 2663 * failure, ERR_PTR value is returned. 2665 2664 */ 2666 - static struct pcpu_alloc_info * __init pcpu_build_alloc_info( 2665 + static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info( 2667 2666 size_t reserved_size, size_t dyn_size, 2668 2667 size_t atom_size, 2669 2668 pcpu_fc_cpu_distance_fn_t cpu_distance_fn) 2670 2669 { 2671 2670 static int group_map[NR_CPUS] __initdata; 2672 2671 static int group_cnt[NR_CPUS] __initdata; 2672 + static struct cpumask mask __initdata; 2673 2673 const size_t static_size = __per_cpu_end - __per_cpu_start; 2674 2674 int nr_groups = 1, nr_units = 0; 2675 2675 size_t size_sum, min_unit_size, alloc_size; ··· 2683 2681 /* this function may be called multiple times */ 2684 2682 memset(group_map, 0, sizeof(group_map)); 2685 2683 memset(group_cnt, 0, sizeof(group_cnt)); 2684 + cpumask_clear(&mask); 2686 2685 2687 2686 /* calculate size_sum and ensure dyn_size is enough for early alloc */ 2688 2687 size_sum = PFN_ALIGN(static_size + reserved_size + ··· 2705 2702 upa--; 2706 2703 max_upa = upa; 2707 2704 2705 + cpumask_copy(&mask, cpu_possible_mask); 2706 + 2708 2707 /* group cpus according to their proximity */ 2709 - for_each_possible_cpu(cpu) { 2710 - group = 0; 2711 - next_group: 2712 - for_each_possible_cpu(tcpu) { 2713 - if (cpu == tcpu) 2714 - break; 2715 - if (group_map[tcpu] == group && cpu_distance_fn && 2716 - (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || 2717 - cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { 2718 - group++; 2719 - nr_groups = max(nr_groups, group + 1); 2720 - goto next_group; 2721 - } 2722 - } 2708 + for (group = 0; !cpumask_empty(&mask); group++) { 2709 + /* pop the group's first cpu */ 2710 + cpu = cpumask_first(&mask); 2723 2711 group_map[cpu] = group; 2724 2712 group_cnt[group]++; 2713 + cpumask_clear_cpu(cpu, &mask); 2714 + 2715 + for_each_cpu(tcpu, &mask) { 2716 + if (!cpu_distance_fn || 2717 + (cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE && 2718 + cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) { 2719 + group_map[tcpu] = group; 2720 + group_cnt[group]++; 2721 + cpumask_clear_cpu(tcpu, &mask); 2722 + } 2723 + } 2725 2724 } 2725 + nr_groups = group; 2726 2726 2727 2727 /* 2728 2728 * Wasted space is caused by a ratio imbalance of upa to group_cnt.