x86: cleanup remaining cpumask_t ops in smpboot code

Impact: use new cpumask API to reduce memory and stack usage

Allocate the following local cpumasks based on the number of cpus that
are present. References will use new cpumask API. (Currently only
modified for x86_64, x86_32 continues to use the *_map variants.)

cpu_callin_mask
cpu_callout_mask
cpu_initialized_mask
cpu_sibling_setup_mask

Provide the following accessor functions:

struct cpumask *cpu_sibling_mask(int cpu)
struct cpumask *cpu_core_mask(int cpu)

Other changes are when setting or clearing the cpu online, possible
or present maps, use the accessor functions.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Mike Travis and committed by
Ingo Molnar
c2d1cec1 588235bb

+152 -76
+29 -3
arch/x86/include/asm/smp.h
··· 18 18 #include <asm/pda.h> 19 19 #include <asm/thread_info.h> 20 20 21 + #ifdef CONFIG_X86_64 22 + 23 + extern cpumask_var_t cpu_callin_mask; 24 + extern cpumask_var_t cpu_callout_mask; 25 + extern cpumask_var_t cpu_initialized_mask; 26 + extern cpumask_var_t cpu_sibling_setup_mask; 27 + 28 + #else /* CONFIG_X86_32 */ 29 + 30 + extern cpumask_t cpu_callin_map; 21 31 extern cpumask_t cpu_callout_map; 22 32 extern cpumask_t cpu_initialized; 23 - extern cpumask_t cpu_callin_map; 33 + extern cpumask_t cpu_sibling_setup_map; 34 + 35 + #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) 36 + #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) 37 + #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) 38 + #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) 39 + 40 + #endif /* CONFIG_X86_32 */ 24 41 25 42 extern void (*mtrr_hook)(void); 26 43 extern void zap_low_mappings(void); ··· 46 29 47 30 extern int smp_num_siblings; 48 31 extern unsigned int num_processors; 49 - extern cpumask_t cpu_initialized; 50 32 51 33 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 52 34 DECLARE_PER_CPU(cpumask_t, cpu_core_map); ··· 53 37 #ifdef CONFIG_X86_32 54 38 DECLARE_PER_CPU(int, cpu_number); 55 39 #endif 40 + 41 + static inline struct cpumask *cpu_sibling_mask(int cpu) 42 + { 43 + return &per_cpu(cpu_sibling_map, cpu); 44 + } 45 + 46 + static inline struct cpumask *cpu_core_mask(int cpu) 47 + { 48 + return &per_cpu(cpu_core_map, cpu); 49 + } 56 50 57 51 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); 58 52 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); ··· 175 149 /* We don't mark CPUs online until __cpu_up(), so we need another measure */ 176 150 static inline int num_booting_cpus(void) 177 151 { 178 - return cpus_weight(cpu_callout_map); 152 + return cpumask_weight(cpu_callout_mask); 179 153 } 180 154 #else 181 155 static inline void prefill_possible_map(void)
+22 -4
arch/x86/kernel/cpu/common.c
··· 40 40 41 41 #include "cpu.h" 42 42 43 + #ifdef CONFIG_X86_64 44 + 45 + /* all of these masks are initialized in setup_cpu_local_masks() */ 46 + cpumask_var_t cpu_callin_mask; 47 + cpumask_var_t cpu_callout_mask; 48 + cpumask_var_t cpu_initialized_mask; 49 + 50 + /* representing cpus for which sibling maps can be computed */ 51 + cpumask_var_t cpu_sibling_setup_mask; 52 + 53 + #else /* CONFIG_X86_32 */ 54 + 55 + cpumask_t cpu_callin_map; 56 + cpumask_t cpu_callout_map; 57 + cpumask_t cpu_initialized; 58 + cpumask_t cpu_sibling_setup_map; 59 + 60 + #endif /* CONFIG_X86_32 */ 61 + 62 + 43 63 static struct cpu_dev *this_cpu __cpuinitdata; 44 64 45 65 #ifdef CONFIG_X86_64 ··· 876 856 } 877 857 __setup("clearcpuid=", setup_disablecpuid); 878 858 879 - cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 880 - 881 859 #ifdef CONFIG_X86_64 882 860 struct x8664_pda **_cpu_pda __read_mostly; 883 861 EXPORT_SYMBOL(_cpu_pda); ··· 994 976 995 977 me = current; 996 978 997 - if (cpu_test_and_set(cpu, cpu_initialized)) 979 + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) 998 980 panic("CPU#%d already initialized!\n", cpu); 999 981 1000 982 printk(KERN_INFO "Initializing CPU#%d\n", cpu); ··· 1103 1085 struct tss_struct *t = &per_cpu(init_tss, cpu); 1104 1086 struct thread_struct *thread = &curr->thread; 1105 1087 1106 - if (cpu_test_and_set(cpu, cpu_initialized)) { 1088 + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1107 1089 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1108 1090 for (;;) local_irq_enable(); 1109 1091 }
+24 -1
arch/x86/kernel/setup_percpu.c
··· 131 131 /* point to new pointer table */ 132 132 _cpu_pda = new_cpu_pda; 133 133 } 134 - #endif 134 + 135 + #endif /* CONFIG_SMP && CONFIG_X86_64 */ 136 + 137 + #ifdef CONFIG_X86_64 138 + 139 + /* correctly size the local cpu masks */ 140 + static void setup_cpu_local_masks(void) 141 + { 142 + alloc_bootmem_cpumask_var(&cpu_initialized_mask); 143 + alloc_bootmem_cpumask_var(&cpu_callin_mask); 144 + alloc_bootmem_cpumask_var(&cpu_callout_mask); 145 + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 146 + } 147 + 148 + #else /* CONFIG_X86_32 */ 149 + 150 + static inline void setup_cpu_local_masks(void) 151 + { 152 + } 153 + 154 + #endif /* CONFIG_X86_32 */ 135 155 136 156 /* 137 157 * Great future plan: ··· 207 187 208 188 /* Setup node to cpumask map */ 209 189 setup_node_to_cpumask_map(); 190 + 191 + /* Setup cpu initialized, callin, callout masks */ 192 + setup_cpu_local_masks(); 210 193 } 211 194 212 195 #endif
+12 -5
arch/x86/kernel/smp.c
··· 128 128 129 129 void native_send_call_func_ipi(const struct cpumask *mask) 130 130 { 131 - cpumask_t allbutself; 131 + cpumask_var_t allbutself; 132 132 133 - allbutself = cpu_online_map; 134 - cpu_clear(smp_processor_id(), allbutself); 133 + if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { 134 + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 135 + return; 136 + } 135 137 136 - if (cpus_equal(*mask, allbutself) && 137 - cpus_equal(cpu_online_map, cpu_callout_map)) 138 + cpumask_copy(allbutself, cpu_online_mask); 139 + cpumask_clear_cpu(smp_processor_id(), allbutself); 140 + 141 + if (cpumask_equal(mask, allbutself) && 142 + cpumask_equal(cpu_online_mask, cpu_callout_mask)) 138 143 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 139 144 else 140 145 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 146 + 147 + free_cpumask_var(allbutself); 141 148 } 142 149 143 150 /*
+65 -63
arch/x86/kernel/smpboot.c
··· 102 102 /* Last level cache ID of each logical CPU */ 103 103 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 104 104 105 - cpumask_t cpu_callin_map; 106 - cpumask_t cpu_callout_map; 107 - 108 105 /* representing HT siblings of each logical CPU */ 109 106 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 110 107 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); ··· 116 119 117 120 static atomic_t init_deasserted; 118 121 119 - 120 - /* representing cpus for which sibling maps can be computed */ 121 - static cpumask_t cpu_sibling_setup_map; 122 122 123 123 /* Set if we find a B stepping CPU */ 124 124 static int __cpuinitdata smp_b_stepping; ··· 134 140 static void map_cpu_to_node(int cpu, int node) 135 141 { 136 142 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); 137 - cpu_set(cpu, node_to_cpumask_map[node]); 143 + cpumask_set_cpu(cpu, &node_to_cpumask_map[node]); 138 144 cpu_to_node_map[cpu] = node; 139 145 } 140 146 ··· 145 151 146 152 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); 147 153 for (node = 0; node < MAX_NUMNODES; node++) 148 - cpu_clear(cpu, node_to_cpumask_map[node]); 154 + cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]); 149 155 cpu_to_node_map[cpu] = 0; 150 156 } 151 157 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ ··· 203 209 */ 204 210 phys_id = read_apic_id(); 205 211 cpuid = smp_processor_id(); 206 - if (cpu_isset(cpuid, cpu_callin_map)) { 212 + if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { 207 213 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 208 214 phys_id, cpuid); 209 215 } ··· 225 231 /* 226 232 * Has the boot CPU finished it's STARTUP sequence? 227 233 */ 228 - if (cpu_isset(cpuid, cpu_callout_map)) 234 + if (cpumask_test_cpu(cpuid, cpu_callout_mask)) 229 235 break; 230 236 cpu_relax(); 231 237 } ··· 268 274 /* 269 275 * Allow the master to continue. 270 276 */ 271 - cpu_set(cpuid, cpu_callin_map); 277 + cpumask_set_cpu(cpuid, cpu_callin_mask); 272 278 } 273 279 274 280 static int __cpuinitdata unsafe_smp; ··· 326 332 ipi_call_lock(); 327 333 lock_vector_lock(); 328 334 __setup_vector_irq(smp_processor_id()); 329 - cpu_set(smp_processor_id(), cpu_online_map); 335 + set_cpu_online(smp_processor_id(), true); 330 336 unlock_vector_lock(); 331 337 ipi_call_unlock(); 332 338 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; ··· 432 438 int i; 433 439 struct cpuinfo_x86 *c = &cpu_data(cpu); 434 440 435 - cpu_set(cpu, cpu_sibling_setup_map); 441 + cpumask_set_cpu(cpu, cpu_sibling_setup_mask); 436 442 437 443 if (smp_num_siblings > 1) { 438 - for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { 439 - if (c->phys_proc_id == cpu_data(i).phys_proc_id && 440 - c->cpu_core_id == cpu_data(i).cpu_core_id) { 441 - cpu_set(i, per_cpu(cpu_sibling_map, cpu)); 442 - cpu_set(cpu, per_cpu(cpu_sibling_map, i)); 443 - cpu_set(i, per_cpu(cpu_core_map, cpu)); 444 - cpu_set(cpu, per_cpu(cpu_core_map, i)); 445 - cpu_set(i, c->llc_shared_map); 446 - cpu_set(cpu, cpu_data(i).llc_shared_map); 444 + for_each_cpu(i, cpu_sibling_setup_mask) { 445 + struct cpuinfo_x86 *o = &cpu_data(i); 446 + 447 + if (c->phys_proc_id == o->phys_proc_id && 448 + c->cpu_core_id == o->cpu_core_id) { 449 + cpumask_set_cpu(i, cpu_sibling_mask(cpu)); 450 + cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 451 + cpumask_set_cpu(i, cpu_core_mask(cpu)); 452 + cpumask_set_cpu(cpu, cpu_core_mask(i)); 453 + cpumask_set_cpu(i, &c->llc_shared_map); 454 + cpumask_set_cpu(cpu, &o->llc_shared_map); 447 455 } 448 456 } 449 457 } else { 450 - cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); 458 + cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 451 459 } 452 460 453 - cpu_set(cpu, c->llc_shared_map); 461 + cpumask_set_cpu(cpu, &c->llc_shared_map); 454 462 455 463 if (current_cpu_data.x86_max_cores == 1) { 456 - per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); 464 + cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 457 465 c->booted_cores = 1; 458 466 return; 459 467 } 460 468 461 - for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { 469 + for_each_cpu(i, cpu_sibling_setup_mask) { 462 470 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 463 471 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 464 - cpu_set(i, c->llc_shared_map); 465 - cpu_set(cpu, cpu_data(i).llc_shared_map); 472 + cpumask_set_cpu(i, &c->llc_shared_map); 473 + cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map); 466 474 } 467 475 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 468 - cpu_set(i, per_cpu(cpu_core_map, cpu)); 469 - cpu_set(cpu, per_cpu(cpu_core_map, i)); 476 + cpumask_set_cpu(i, cpu_core_mask(cpu)); 477 + cpumask_set_cpu(cpu, cpu_core_mask(i)); 470 478 /* 471 479 * Does this new cpu bringup a new core? 472 480 */ 473 - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { 481 + if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) { 474 482 /* 475 483 * for each core in package, increment 476 484 * the booted_cores for this new cpu 477 485 */ 478 - if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) 486 + if (cpumask_first(cpu_sibling_mask(i)) == i) 479 487 c->booted_cores++; 480 488 /* 481 489 * increment the core count for all ··· 500 504 * And for power savings, we return cpu_core_map 501 505 */ 502 506 if (sched_mc_power_savings || sched_smt_power_savings) 503 - return &per_cpu(cpu_core_map, cpu); 507 + return cpu_core_mask(cpu); 504 508 else 505 509 return &c->llc_shared_map; 506 510 } ··· 519 523 */ 520 524 pr_debug("Before bogomips.\n"); 521 525 for_each_possible_cpu(cpu) 522 - if (cpu_isset(cpu, cpu_callout_map)) 526 + if (cpumask_test_cpu(cpu, cpu_callout_mask)) 523 527 bogosum += cpu_data(cpu).loops_per_jiffy; 524 528 printk(KERN_INFO 525 529 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", ··· 900 904 * allow APs to start initializing. 901 905 */ 902 906 pr_debug("Before Callout %d.\n", cpu); 903 - cpu_set(cpu, cpu_callout_map); 907 + cpumask_set_cpu(cpu, cpu_callout_mask); 904 908 pr_debug("After Callout %d.\n", cpu); 905 909 906 910 /* 907 911 * Wait 5s total for a response 908 912 */ 909 913 for (timeout = 0; timeout < 50000; timeout++) { 910 - if (cpu_isset(cpu, cpu_callin_map)) 914 + if (cpumask_test_cpu(cpu, cpu_callin_mask)) 911 915 break; /* It has booted */ 912 916 udelay(100); 913 917 } 914 918 915 - if (cpu_isset(cpu, cpu_callin_map)) { 919 + if (cpumask_test_cpu(cpu, cpu_callin_mask)) { 916 920 /* number CPUs logically, starting from 1 (BSP is 0) */ 917 921 pr_debug("OK.\n"); 918 922 printk(KERN_INFO "CPU%d: ", cpu); ··· 937 941 if (boot_error) { 938 942 /* Try to put things back the way they were before ... */ 939 943 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 940 - cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ 941 - cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 942 - cpu_clear(cpu, cpu_present_map); 944 + 945 + /* was set by do_boot_cpu() */ 946 + cpumask_clear_cpu(cpu, cpu_callout_mask); 947 + 948 + /* was set by cpu_init() */ 949 + cpumask_clear_cpu(cpu, cpu_initialized_mask); 950 + 951 + set_cpu_present(cpu, false); 943 952 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 944 953 } 945 954 ··· 978 977 /* 979 978 * Already booted CPU? 980 979 */ 981 - if (cpu_isset(cpu, cpu_callin_map)) { 980 + if (cpumask_test_cpu(cpu, cpu_callin_mask)) { 982 981 pr_debug("do_boot_cpu %d Already started\n", cpu); 983 982 return -ENOSYS; 984 983 } ··· 1033 1032 */ 1034 1033 static __init void disable_smp(void) 1035 1034 { 1036 - cpu_present_map = cpumask_of_cpu(0); 1037 - cpu_possible_map = cpumask_of_cpu(0); 1035 + /* use the read/write pointers to the present and possible maps */ 1036 + cpumask_copy(&cpu_present_map, cpumask_of(0)); 1037 + cpumask_copy(&cpu_possible_map, cpumask_of(0)); 1038 1038 smpboot_clear_io_apic_irqs(); 1039 1039 1040 1040 if (smp_found_config) ··· 1043 1041 else 1044 1042 physid_set_mask_of_physid(0, &phys_cpu_present_map); 1045 1043 map_cpu_to_logical_apicid(); 1046 - cpu_set(0, per_cpu(cpu_sibling_map, 0)); 1047 - cpu_set(0, per_cpu(cpu_core_map, 0)); 1044 + cpumask_set_cpu(0, cpu_sibling_mask(0)); 1045 + cpumask_set_cpu(0, cpu_core_mask(0)); 1048 1046 } 1049 1047 1050 1048 /* ··· 1066 1064 nr = 0; 1067 1065 for_each_present_cpu(cpu) { 1068 1066 if (nr >= 8) 1069 - cpu_clear(cpu, cpu_present_map); 1067 + set_cpu_present(cpu, false); 1070 1068 nr++; 1071 1069 } 1072 1070 1073 1071 nr = 0; 1074 1072 for_each_possible_cpu(cpu) { 1075 1073 if (nr >= 8) 1076 - cpu_clear(cpu, cpu_possible_map); 1074 + set_cpu_possible(cpu, false); 1077 1075 nr++; 1078 1076 } 1079 1077 ··· 1169 1167 preempt_disable(); 1170 1168 smp_cpu_index_default(); 1171 1169 current_cpu_data = boot_cpu_data; 1172 - cpu_callin_map = cpumask_of_cpu(0); 1170 + cpumask_copy(cpu_callin_mask, cpumask_of(0)); 1173 1171 mb(); 1174 1172 /* 1175 1173 * Setup boot CPU information ··· 1244 1242 init_gdt(me); 1245 1243 #endif 1246 1244 switch_to_new_gdt(); 1247 - /* already set me in cpu_online_map in boot_cpu_init() */ 1248 - cpu_set(me, cpu_callout_map); 1245 + /* already set me in cpu_online_mask in boot_cpu_init() */ 1246 + cpumask_set_cpu(me, cpu_callout_mask); 1249 1247 per_cpu(cpu_state, me) = CPU_ONLINE; 1250 1248 } 1251 1249 ··· 1313 1311 possible, max_t(int, possible - num_processors, 0)); 1314 1312 1315 1313 for (i = 0; i < possible; i++) 1316 - cpu_set(i, cpu_possible_map); 1314 + set_cpu_possible(i, true); 1317 1315 1318 1316 nr_cpu_ids = possible; 1319 1317 } ··· 1325 1323 int sibling; 1326 1324 struct cpuinfo_x86 *c = &cpu_data(cpu); 1327 1325 1328 - for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { 1329 - cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); 1326 + for_each_cpu(sibling, cpu_core_mask(cpu)) { 1327 + cpumask_clear_cpu(cpu, cpu_core_mask(sibling)); 1330 1328 /*/ 1331 1329 * last thread sibling in this cpu core going down 1332 1330 */ 1333 - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) 1331 + if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) 1334 1332 cpu_data(sibling).booted_cores--; 1335 1333 } 1336 1334 1337 - for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) 1338 - cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); 1339 - cpus_clear(per_cpu(cpu_sibling_map, cpu)); 1340 - cpus_clear(per_cpu(cpu_core_map, cpu)); 1335 + for_each_cpu(sibling, cpu_sibling_mask(cpu)) 1336 + cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); 1337 + cpumask_clear(cpu_sibling_mask(cpu)); 1338 + cpumask_clear(cpu_core_mask(cpu)); 1341 1339 c->phys_proc_id = 0; 1342 1340 c->cpu_core_id = 0; 1343 - cpu_clear(cpu, cpu_sibling_setup_map); 1341 + cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); 1344 1342 } 1345 1343 1346 1344 static void __ref remove_cpu_from_maps(int cpu) 1347 1345 { 1348 - cpu_clear(cpu, cpu_online_map); 1349 - cpu_clear(cpu, cpu_callout_map); 1350 - cpu_clear(cpu, cpu_callin_map); 1346 + set_cpu_online(cpu, false); 1347 + cpumask_clear_cpu(cpu, cpu_callout_mask); 1348 + cpumask_clear_cpu(cpu, cpu_callin_mask); 1351 1349 /* was set by cpu_init() */ 1352 - cpu_clear(cpu, cpu_initialized); 1350 + cpumask_clear_cpu(cpu, cpu_initialized_mask); 1353 1351 numa_remove_cpu(cpu); 1354 1352 } 1355 1353