x86, x2apic: Minimize IPI register writes using cluster groups

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

In the case of x2apic cluster mode we can group IPI register
writes based on the cluster group instead of individual per-cpu
destination messages.

This reduces the apic register writes and reduces the amount of
IPI messages (in the best case we can reduce it by a factor of
16).

With this change, the cost of flush_tlb_others(), with the flush
tlb IPI being sent from a cpu in the socket-1 to all the logical
cpus in socket-2 (on a Westmere-EX system that has 20 logical
cpus in a socket) is 3x times better now (compared to the former
'send one-by-one' algorithm).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: steiner@sgi.com
Cc: yinghai@kernel.org
Link: http://lkml.kernel.org/r/20110519234637.512271057@sbsiddha-MOBL3.sc.intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Cyrill Gorcunov and committed by

Ingo Molnar 15 years ago 9d0fa6c5 a39d1f3f

+44 -14

1 changed file

expand all

arch

x86

kernel

apic

x2apic_cluster.c

+44 -14

arch/x86/kernel/apic/x2apic_cluster.c

··· 5 5 #include <linux/ctype.h> 6 6 #include <linux/init.h> 7 7 #include <linux/dmar.h> 8 + #include <linux/cpu.h> 8 9 9 10 #include <asm/smp.h> 10 11 #include <asm/apic.h> ··· 13 12 14 13 static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); 15 14 static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); 15 + static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); 16 16 17 17 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 18 18 { ··· 56 54 return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; 57 55 } 58 56 59 - /* 60 - * for now, we send the IPI's one by one in the cpumask. 61 - * TBD: Based on the cpu mask, we can send the IPI's to the cluster group 62 - * at once. We have 16 cpu's in a cluster. This will minimize IPI register 63 - * writes. 64 - */ 65 57 static void 66 58 __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) 67 59 { 68 - unsigned long query_cpu; 69 - unsigned long this_cpu; 60 + struct cpumask *cpus_in_cluster_ptr; 61 + struct cpumask *ipi_mask_ptr; 62 + unsigned int cpu, this_cpu; 70 63 unsigned long flags; 64 + u32 dest; 71 65 72 66 x2apic_wrmsr_fence(); 73 67 74 68 local_irq_save(flags); 75 69 76 70 this_cpu = smp_processor_id(); 77 - for_each_cpu(query_cpu, mask) { 78 - if (apic_dest == APIC_DEST_ALLBUT && query_cpu == this_cpu) 71 + 72 + /* 73 + * We are to modify mask, so we need an own copy 74 + * and be sure it's manipulated with irq off. 75 + */ 76 + ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); 77 + cpumask_copy(ipi_mask_ptr, mask); 78 + 79 + /* 80 + * The idea is to send one IPI per cluster. 81 + */ 82 + for_each_cpu(cpu, ipi_mask_ptr) { 83 + unsigned long i; 84 + 85 + cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); 86 + dest = 0; 87 + 88 + /* Collect cpus in cluster. */ 89 + for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { 90 + if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) 91 + dest |= per_cpu(x86_cpu_to_logical_apicid, i); 92 + } 93 + 94 + if (!dest) 79 95 continue; 80 - __x2apic_send_IPI_dest( 81 - per_cpu(x86_cpu_to_logical_apicid, query_cpu), 82 - vector, apic->dest_logical); 96 + 97 + __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); 98 + /* 99 + * Cluster sibling cpus should be discared now so 100 + * we would not send IPI them second time. 101 + */ 102 + cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); 83 103 } 84 104 85 105 local_irq_restore(flags); ··· 222 198 if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), 223 199 GFP_KERNEL)) { 224 200 err = -ENOMEM; 201 + } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), 202 + GFP_KERNEL)) { 203 + free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); 204 + err = -ENOMEM; 225 205 } 226 206 break; 227 207 case CPU_UP_CANCELED: ··· 238 210 __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); 239 211 } 240 212 free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); 213 + free_cpumask_var(per_cpu(ipi_mask, this_cpu)); 241 214 break; 242 215 } 243 216 ··· 254 225 int cpu = smp_processor_id(); 255 226 256 227 zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); 228 + zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); 257 229 258 - BUG_ON(!per_cpu(cpus_in_cluster, cpu)); 230 + BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); 259 231 260 232 __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); 261 233 register_hotcpu_notifier(&x2apic_cpu_notifier);