···185186#ifdef CONFIG_ACPI_NUMA187extern int acpi_numa;188-extern int acpi_get_nodes(struct bootnode *physnodes);0189extern int acpi_scan_nodes(unsigned long start, unsigned long end);190#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)00191extern void acpi_fake_nodes(const struct bootnode *fake_nodes,192 int num_nodes);193-#else194-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,195- int num_nodes)196-{197-}198#endif0199200#define acpi_unlazy_tlb(x) leave_mm(x)201
···185186#ifdef CONFIG_ACPI_NUMA187extern int acpi_numa;188+extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,189+ unsigned long end);190extern int acpi_scan_nodes(unsigned long start, unsigned long end);191#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)192+193+#ifdef CONFIG_NUMA_EMU194extern void acpi_fake_nodes(const struct bootnode *fake_nodes,195 int num_nodes);00000196#endif197+#endif /* CONFIG_ACPI_NUMA */198199#define acpi_unlazy_tlb(x) leave_mm(x)200
+5-1
arch/x86/include/asm/amd_nb.h
···9extern int early_is_amd_nb(u32 value);10extern int amd_cache_northbridges(void);11extern void amd_flush_garts(void);12-extern int amd_get_nodes(struct bootnode *nodes);13extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);14extern int amd_scan_nodes(void);000001516struct amd_northbridge {17 struct pci_dev *misc;
···9extern int early_is_amd_nb(u32 value);10extern int amd_cache_northbridges(void);11extern void amd_flush_garts(void);012extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);13extern int amd_scan_nodes(void);14+15+#ifdef CONFIG_NUMA_EMU16+extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);17+extern void amd_get_nodes(struct bootnode *nodes);18+#endif1920struct amd_northbridge {21 struct pci_dev *misc;
···27#include <asm/amd_nb.h>2829static struct bootnode __initdata nodes[8];030static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;3132static __init int find_northbridge(void)···69#endif70}7172-int __init amd_get_nodes(struct bootnode *physnodes)73-{74- int i;75- int ret = 0;76-77- for_each_node_mask(i, nodes_parsed) {78- physnodes[ret].start = nodes[i].start;79- physnodes[ret].end = nodes[i].end;80- ret++;81- }82- return ret;83-}84-85int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)86{87 unsigned long start = PFN_PHYS(start_pfn);···101 base = read_pci_config(0, nb, 1, 0x40 + i*8);102 limit = read_pci_config(0, nb, 1, 0x44 + i*8);103104- nodeid = limit & 7;105 if ((base & 3) == 0) {106 if (i < numnodes)107 pr_info("Skipping disabled node %d\n", i);···180 return -1;181 return 0;182}0000000000000000000000000000000000000000000000000000000000000000000000183184int __init amd_scan_nodes(void)185{
···27#include <asm/amd_nb.h>2829static struct bootnode __initdata nodes[8];30+static unsigned char __initdata nodeids[8];31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;3233static __init int find_northbridge(void)···68#endif69}70000000000000071int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)72{73 unsigned long start = PFN_PHYS(start_pfn);···113 base = read_pci_config(0, nb, 1, 0x40 + i*8);114 limit = read_pci_config(0, nb, 1, 0x44 + i*8);115116+ nodeids[i] = nodeid = limit & 7;117 if ((base & 3) == 0) {118 if (i < numnodes)119 pr_info("Skipping disabled node %d\n", i);···192 return -1;193 return 0;194}195+196+#ifdef CONFIG_NUMA_EMU197+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {198+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE199+};200+201+void __init amd_get_nodes(struct bootnode *physnodes)202+{203+ int i;204+205+ for_each_node_mask(i, nodes_parsed) {206+ physnodes[i].start = nodes[i].start;207+ physnodes[i].end = nodes[i].end;208+ }209+}210+211+static int __init find_node_by_addr(unsigned long addr)212+{213+ int ret = NUMA_NO_NODE;214+ int i;215+216+ for (i = 0; i < 8; i++)217+ if (addr >= nodes[i].start && addr < nodes[i].end) {218+ ret = i;219+ break;220+ }221+ return ret;222+}223+224+/*225+ * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be226+ * setup to represent the physical topology but reflect the emulated227+ * environment. For each emulated node, the real node which it appears on is228+ * found and a fake pxm to nid mapping is created which mirrors the actual229+ * locality. node_distance() then represents the correct distances between230+ * emulated nodes by using the fake acpi mappings to pxms.231+ */232+void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)233+{234+ unsigned int bits;235+ unsigned int cores;236+ unsigned int apicid_base = 0;237+ int i;238+239+ bits = boot_cpu_data.x86_coreid_bits;240+ cores = 1 << bits;241+ early_get_boot_cpu_id();242+ if (boot_cpu_physical_apicid > 0)243+ apicid_base = boot_cpu_physical_apicid;244+245+ for (i = 0; i < nr_nodes; i++) {246+ int index;247+ int nid;248+ int j;249+250+ nid = find_node_by_addr(nodes[i].start);251+ if (nid == NUMA_NO_NODE)252+ continue;253+254+ index = nodeids[nid] << bits;255+ if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)256+ for (j = apicid_base; j < cores + apicid_base; j++)257+ fake_apicid_to_node[index + j] = i;258+#ifdef CONFIG_ACPI_NUMA259+ __acpi_map_pxm_to_node(nid, i);260+#endif261+ }262+ memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));263+}264+#endif /* CONFIG_NUMA_EMU */265266int __init amd_scan_nodes(void)267{
+126-35
arch/x86/mm/numa_64.c
···260#ifdef CONFIG_NUMA_EMU261/* Numa emulation */262static struct bootnode nodes[MAX_NUMNODES] __initdata;263-static struct bootnode physnodes[MAX_NUMNODES] __initdata;264static char *cmdline __initdata;265266static int __init setup_physnodes(unsigned long start, unsigned long end,267 int acpi, int amd)268{269- int nr_nodes = 0;270 int ret = 0;271 int i;2720273#ifdef CONFIG_ACPI_NUMA274 if (acpi)275- nr_nodes = acpi_get_nodes(physnodes);276#endif277#ifdef CONFIG_AMD_NUMA278 if (amd)279- nr_nodes = amd_get_nodes(physnodes);280#endif281 /*282 * Basic sanity checking on the physical node map: there may be errors283 * if the SRAT or AMD code incorrectly reported the topology or the mem=284 * kernel parameter is used.285 */286- for (i = 0; i < nr_nodes; i++) {287 if (physnodes[i].start == physnodes[i].end)288 continue;289 if (physnodes[i].start > end) {···298 physnodes[i].start = start;299 if (physnodes[i].end > end)300 physnodes[i].end = end;301- }302-303- /*304- * Remove all nodes that have no memory or were truncated because of the305- * limited address range.306- */307- for (i = 0; i < nr_nodes; i++) {308- if (physnodes[i].start == physnodes[i].end)309- continue;310- physnodes[ret].start = physnodes[i].start;311- physnodes[ret].end = physnodes[i].end;312 ret++;313 }314···311 ret = 1;312 }313 return ret;000000000000000000314}315316/*···359 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr360 * to max_addr. The return value is the number of nodes allocated.361 */362-static int __init split_nodes_interleave(u64 addr, u64 max_addr,363- int nr_phys_nodes, int nr_nodes)364{365 nodemask_t physnode_mask = NODE_MASK_NONE;366 u64 size;···390 return -1;391 }392393- for (i = 0; i < nr_phys_nodes; i++)394 if (physnodes[i].start != physnodes[i].end)395 node_set(i, physnode_mask);396···559{560 u64 addr = start_pfn << PAGE_SHIFT;561 u64 max_addr = last_pfn << PAGE_SHIFT;562- int num_phys_nodes;563 int num_nodes;564 int i;565566- num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);567 /*568 * If the numa=fake command-line contains a 'M' or 'G', it represents569 * the fixed node size. Otherwise, if it is just a single number N,···576 unsigned long n;577578 n = simple_strtoul(cmdline, NULL, 0);579- num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);580 }581582 if (num_nodes < 0)···599 nodes[i].end >> PAGE_SHIFT);600 setup_node_bootmem(i, nodes[i].start, nodes[i].end);601 }602- acpi_fake_nodes(nodes, num_nodes);0603 numa_init_array();604 return 0;605}···615 nodes_clear(node_online_map);616617#ifdef CONFIG_NUMA_EMU00618 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))619 return;00620 nodes_clear(node_possible_map);621 nodes_clear(node_online_map);622#endif···776777#ifndef CONFIG_DEBUG_PER_CPU_MAPS7780779void __cpuinit numa_add_cpu(int cpu)780{781 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);···786{787 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);788}000000000000000000000000000000000000000000000789790#else /* CONFIG_DEBUG_PER_CPU_MAPS */791-792-/*793- * --------- debug versions of the numa functions ---------794- */795-static void __cpuinit numa_set_cpumask(int cpu, int enable)796{797 int node = early_cpu_to_node(cpu);798 struct cpumask *mask;799 char buf[64];800801 mask = node_to_cpumask_map[node];802- if (mask == NULL) {803- printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);804 dump_stack();805- return;806 }0000000000000000000807808 if (enable)809 cpumask_set_cpu(cpu, mask);810 else811 cpumask_clear_cpu(cpu, mask);812-813- cpulist_scnprintf(buf, sizeof(buf), mask);814- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",815- enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);816}0000000000000000000000000817818void __cpuinit numa_add_cpu(int cpu)819{
···260#ifdef CONFIG_NUMA_EMU261/* Numa emulation */262static struct bootnode nodes[MAX_NUMNODES] __initdata;263+static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;264static char *cmdline __initdata;265266static int __init setup_physnodes(unsigned long start, unsigned long end,267 int acpi, int amd)268{0269 int ret = 0;270 int i;271272+ memset(physnodes, 0, sizeof(physnodes));273#ifdef CONFIG_ACPI_NUMA274 if (acpi)275+ acpi_get_nodes(physnodes, start, end);276#endif277#ifdef CONFIG_AMD_NUMA278 if (amd)279+ amd_get_nodes(physnodes);280#endif281 /*282 * Basic sanity checking on the physical node map: there may be errors283 * if the SRAT or AMD code incorrectly reported the topology or the mem=284 * kernel parameter is used.285 */286+ for (i = 0; i < MAX_NUMNODES; i++) {287 if (physnodes[i].start == physnodes[i].end)288 continue;289 if (physnodes[i].start > end) {···298 physnodes[i].start = start;299 if (physnodes[i].end > end)300 physnodes[i].end = end;00000000000301 ret++;302 }303···322 ret = 1;323 }324 return ret;325+}326+327+static void __init fake_physnodes(int acpi, int amd, int nr_nodes)328+{329+ int i;330+331+ BUG_ON(acpi && amd);332+#ifdef CONFIG_ACPI_NUMA333+ if (acpi)334+ acpi_fake_nodes(nodes, nr_nodes);335+#endif336+#ifdef CONFIG_AMD_NUMA337+ if (amd)338+ amd_fake_nodes(nodes, nr_nodes);339+#endif340+ if (!acpi && !amd)341+ for (i = 0; i < nr_cpu_ids; i++)342+ numa_set_node(i, 0);343}344345/*···352 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr353 * to max_addr. The return value is the number of nodes allocated.354 */355+static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)0356{357 nodemask_t physnode_mask = NODE_MASK_NONE;358 u64 size;···384 return -1;385 }386387+ for (i = 0; i < MAX_NUMNODES; i++)388 if (physnodes[i].start != physnodes[i].end)389 node_set(i, physnode_mask);390···553{554 u64 addr = start_pfn << PAGE_SHIFT;555 u64 max_addr = last_pfn << PAGE_SHIFT;0556 int num_nodes;557 int i;5580559 /*560 * If the numa=fake command-line contains a 'M' or 'G', it represents561 * the fixed node size. Otherwise, if it is just a single number N,···572 unsigned long n;573574 n = simple_strtoul(cmdline, NULL, 0);575+ num_nodes = split_nodes_interleave(addr, max_addr, n);576 }577578 if (num_nodes < 0)···595 nodes[i].end >> PAGE_SHIFT);596 setup_node_bootmem(i, nodes[i].start, nodes[i].end);597 }598+ setup_physnodes(addr, max_addr, acpi, amd);599+ fake_physnodes(acpi, amd, num_nodes);600 numa_init_array();601 return 0;602}···610 nodes_clear(node_online_map);611612#ifdef CONFIG_NUMA_EMU613+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,614+ acpi, amd);615 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))616 return;617+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,618+ acpi, amd);619 nodes_clear(node_possible_map);620 nodes_clear(node_online_map);621#endif···767768#ifndef CONFIG_DEBUG_PER_CPU_MAPS769770+#ifndef CONFIG_NUMA_EMU771void __cpuinit numa_add_cpu(int cpu)772{773 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);···776{777 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);778}779+#else780+void __cpuinit numa_add_cpu(int cpu)781+{782+ unsigned long addr;783+ u16 apicid;784+ int physnid;785+ int nid = NUMA_NO_NODE;786+787+ apicid = early_per_cpu(x86_cpu_to_apicid, cpu);788+ if (apicid != BAD_APICID)789+ nid = apicid_to_node[apicid];790+ if (nid == NUMA_NO_NODE)791+ nid = early_cpu_to_node(cpu);792+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));793+794+ /*795+ * Use the starting address of the emulated node to find which physical796+ * node it is allocated on.797+ */798+ addr = node_start_pfn(nid) << PAGE_SHIFT;799+ for (physnid = 0; physnid < MAX_NUMNODES; physnid++)800+ if (addr >= physnodes[physnid].start &&801+ addr < physnodes[physnid].end)802+ break;803+804+ /*805+ * Map the cpu to each emulated node that is allocated on the physical806+ * node of the cpu's apic id.807+ */808+ for_each_online_node(nid) {809+ addr = node_start_pfn(nid) << PAGE_SHIFT;810+ if (addr >= physnodes[physnid].start &&811+ addr < physnodes[physnid].end)812+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);813+ }814+}815+816+void __cpuinit numa_remove_cpu(int cpu)817+{818+ int i;819+820+ for_each_online_node(i)821+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);822+}823+#endif /* !CONFIG_NUMA_EMU */824825#else /* CONFIG_DEBUG_PER_CPU_MAPS */826+static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)0000827{828 int node = early_cpu_to_node(cpu);829 struct cpumask *mask;830 char buf[64];831832 mask = node_to_cpumask_map[node];833+ if (!mask) {834+ pr_err("node_to_cpumask_map[%i] NULL\n", node);835 dump_stack();836+ return NULL;837 }838+839+ cpulist_scnprintf(buf, sizeof(buf), mask);840+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",841+ enable ? "numa_add_cpu" : "numa_remove_cpu",842+ cpu, node, buf);843+ return mask;844+}845+846+/*847+ * --------- debug versions of the numa functions ---------848+ */849+#ifndef CONFIG_NUMA_EMU850+static void __cpuinit numa_set_cpumask(int cpu, int enable)851+{852+ struct cpumask *mask;853+854+ mask = debug_cpumask_set_cpu(cpu, enable);855+ if (!mask)856+ return;857858 if (enable)859 cpumask_set_cpu(cpu, mask);860 else861 cpumask_clear_cpu(cpu, mask);0000862}863+#else864+static void __cpuinit numa_set_cpumask(int cpu, int enable)865+{866+ int node = early_cpu_to_node(cpu);867+ struct cpumask *mask;868+ int i;869+870+ for_each_online_node(i) {871+ unsigned long addr;872+873+ addr = node_start_pfn(i) << PAGE_SHIFT;874+ if (addr < physnodes[node].start ||875+ addr >= physnodes[node].end)876+ continue;877+ mask = debug_cpumask_set_cpu(cpu, enable);878+ if (!mask)879+ return;880+881+ if (enable)882+ cpumask_set_cpu(cpu, mask);883+ else884+ cpumask_clear_cpu(cpu, mask);885+ }886+}887+#endif /* CONFIG_NUMA_EMU */888889void __cpuinit numa_add_cpu(int cpu)890{
+18-8
arch/x86/mm/srat_64.c
···349350void __init acpi_numa_arch_fixup(void) {}351352-int __init acpi_get_nodes(struct bootnode *physnodes)00353{354 int i;355- int ret = 0;356357 for_each_node_mask(i, nodes_parsed) {358- physnodes[ret].start = nodes[i].start;359- physnodes[ret].end = nodes[i].end;360- ret++;361 }362- return ret;363}0364365/* Use the information discovered above to actually set up the nodes. */366int __init acpi_scan_nodes(unsigned long start, unsigned long end)···506{507 int i, j;508509- printk(KERN_INFO "Faking PXM affinity for fake nodes on real "510- "topology.\n");511 for (i = 0; i < num_nodes; i++) {512 int nid, pxm;513···525 fake_apicid_to_node[j] == NUMA_NO_NODE)526 fake_apicid_to_node[j] = i;527 }00000000000528 for (i = 0; i < num_nodes; i++)529 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);530 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
···349350void __init acpi_numa_arch_fixup(void) {}351352+#ifdef CONFIG_NUMA_EMU353+void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,354+ unsigned long end)355{356 int i;0357358 for_each_node_mask(i, nodes_parsed) {359+ cutoff_node(i, start, end);360+ physnodes[i].start = nodes[i].start;361+ physnodes[i].end = nodes[i].end;362 }0363}364+#endif /* CONFIG_NUMA_EMU */365366/* Use the information discovered above to actually set up the nodes. */367int __init acpi_scan_nodes(unsigned long start, unsigned long end)···505{506 int i, j;50700508 for (i = 0; i < num_nodes; i++) {509 int nid, pxm;510···526 fake_apicid_to_node[j] == NUMA_NO_NODE)527 fake_apicid_to_node[j] = i;528 }529+530+ /*531+ * If there are apicid-to-node mappings for physical nodes that do not532+ * have a corresponding emulated node, it should default to a guaranteed533+ * value.534+ */535+ for (i = 0; i < MAX_LOCAL_APIC; i++)536+ if (apicid_to_node[i] != NUMA_NO_NODE &&537+ fake_apicid_to_node[i] == NUMA_NO_NODE)538+ fake_apicid_to_node[i] = 0;539+540 for (i = 0; i < num_nodes; i++)541 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);542 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));