Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] x86_64: Speed up numa_node_id by putting it directly into the PDA

Not go from the CPU number to an mapping array.
Mode number is often used now in fast paths.

This also adds a generic numa_node_id to all the topology includes

Suggested by Eric Dumazet

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Andi Kleen and committed by
Linus Torvalds
69d81fcd 50895c5d

+18 -5
+2 -2
arch/x86_64/kernel/setup.c
··· 823 823 if (!node_online(node)) 824 824 node = nearby_node(apicid); 825 825 } 826 - cpu_to_node[cpu] = node; 826 + numa_set_node(cpu, node); 827 827 828 828 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 829 829 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); ··· 975 975 node = apicid_to_node[hard_smp_processor_id()]; 976 976 if (node == NUMA_NO_NODE) 977 977 node = 0; 978 - cpu_to_node[cpu] = node; 978 + numa_set_node(cpu, node); 979 979 980 980 if (acpi_numa > 0) 981 981 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
+8 -2
arch/x86_64/mm/numa.c
··· 156 156 for (i = 0; i < NR_CPUS; i++) { 157 157 if (cpu_to_node[i] != NUMA_NO_NODE) 158 158 continue; 159 - cpu_to_node[i] = rr; 159 + numa_set_node(i, rr); 160 160 rr = next_node(rr, node_online_map); 161 161 if (rr == MAX_NUMNODES) 162 162 rr = first_node(node_online_map); ··· 242 242 nodes_clear(node_online_map); 243 243 node_set_online(0); 244 244 for (i = 0; i < NR_CPUS; i++) 245 - cpu_to_node[i] = 0; 245 + numa_set_node(i, 0); 246 246 node_to_cpumask[0] = cpumask_of_cpu(0); 247 247 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 248 248 } ··· 251 251 { 252 252 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); 253 253 } 254 + 255 + void __cpuinit numa_set_node(int cpu, int node) 256 + { 257 + cpu_pda[cpu].nodenumber = node; 258 + cpu_to_node[cpu] = node; 259 + } 254 260 255 261 unsigned long __init numa_free_all_bootmem(void) 256 262 {
+1 -1
arch/x86_64/mm/srat.c
··· 203 203 if (cpu_to_node[i] == NUMA_NO_NODE) 204 204 continue; 205 205 if (!node_isset(cpu_to_node[i], nodes_parsed)) 206 - cpu_to_node[i] = NUMA_NO_NODE; 206 + numa_set_node(i, NUMA_NO_NODE); 207 207 } 208 208 numa_init_array(); 209 209 return 0;
+2
include/asm-x86_64/numa.h
··· 17 17 extern void numa_init_array(void); 18 18 extern int numa_off; 19 19 20 + extern void numa_set_node(int cpu, int node); 21 + 20 22 extern unsigned char apicid_to_node[256]; 21 23 22 24 #define NUMA_NO_NODE 0xff
+1
include/asm-x86_64/pda.h
··· 15 15 int irqcount; /* Irq nesting counter. Starts with -1 */ 16 16 int cpunumber; /* Logical CPU number */ 17 17 char *irqstackptr; /* top of irqstack */ 18 + int nodenumber; /* number of current node */ 18 19 unsigned int __softirq_pending; 19 20 unsigned int __nmi_count; /* number of NMI on this CPUs */ 20 21 struct mm_struct *active_mm;
+2
include/asm-x86_64/topology.h
··· 28 28 #define pcibus_to_node(bus) ((long)(bus->sysdata)) 29 29 #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); 30 30 31 + #define numa_node_id() read_pda(nodenumber) 32 + 31 33 /* sched_domains SD_NODE_INIT for x86_64 machines */ 32 34 #define SD_NODE_INIT (struct sched_domain) { \ 33 35 .span = CPU_MASK_NONE, \
+2
include/linux/mmzone.h
··· 435 435 436 436 #include <linux/topology.h> 437 437 /* Returns the number of the current Node. */ 438 + #ifndef numa_node_id 438 439 #define numa_node_id() (cpu_to_node(raw_smp_processor_id())) 440 + #endif 439 441 440 442 #ifndef CONFIG_NEED_MULTIPLE_NODES 441 443