···185185186186#ifdef CONFIG_ACPI_NUMA187187extern int acpi_numa;188188-extern int acpi_get_nodes(struct bootnode *physnodes);188188+extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,189189+ unsigned long end);189190extern int acpi_scan_nodes(unsigned long start, unsigned long end);190191#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)192192+193193+#ifdef CONFIG_NUMA_EMU191194extern void acpi_fake_nodes(const struct bootnode *fake_nodes,192195 int num_nodes);193193-#else194194-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,195195- int num_nodes)196196-{197197-}198196#endif197197+#endif /* CONFIG_ACPI_NUMA */199198200199#define acpi_unlazy_tlb(x) leave_mm(x)201200
+5-1
arch/x86/include/asm/amd_nb.h
···99extern int early_is_amd_nb(u32 value);1010extern int amd_cache_northbridges(void);1111extern void amd_flush_garts(void);1212-extern int amd_get_nodes(struct bootnode *nodes);1312extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);1413extern int amd_scan_nodes(void);1414+1515+#ifdef CONFIG_NUMA_EMU1616+extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);1717+extern void amd_get_nodes(struct bootnode *nodes);1818+#endif15191620struct amd_northbridge {1721 struct pci_dev *misc;
···2727#include <asm/amd_nb.h>28282929static struct bootnode __initdata nodes[8];3030+static unsigned char __initdata nodeids[8];3031static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;31323233static __init int find_northbridge(void)···6968#endif7069}71707272-int __init amd_get_nodes(struct bootnode *physnodes)7373-{7474- int i;7575- int ret = 0;7676-7777- for_each_node_mask(i, nodes_parsed) {7878- physnodes[ret].start = nodes[i].start;7979- physnodes[ret].end = nodes[i].end;8080- ret++;8181- }8282- return ret;8383-}8484-8571int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)8672{8773 unsigned long start = PFN_PHYS(start_pfn);···101113 base = read_pci_config(0, nb, 1, 0x40 + i*8);102114 limit = read_pci_config(0, nb, 1, 0x44 + i*8);103115104104- nodeid = limit & 7;116116+ nodeids[i] = nodeid = limit & 7;105117 if ((base & 3) == 0) {106118 if (i < numnodes)107119 pr_info("Skipping disabled node %d\n", i);···180192 return -1;181193 return 0;182194}195195+196196+#ifdef CONFIG_NUMA_EMU197197+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {198198+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE199199+};200200+201201+void __init amd_get_nodes(struct bootnode *physnodes)202202+{203203+ int i;204204+205205+ for_each_node_mask(i, nodes_parsed) {206206+ physnodes[i].start = nodes[i].start;207207+ physnodes[i].end = nodes[i].end;208208+ }209209+}210210+211211+static int __init find_node_by_addr(unsigned long addr)212212+{213213+ int ret = NUMA_NO_NODE;214214+ int i;215215+216216+ for (i = 0; i < 8; i++)217217+ if (addr >= nodes[i].start && addr < nodes[i].end) {218218+ ret = i;219219+ break;220220+ }221221+ return ret;222222+}223223+224224+/*225225+ * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be226226+ * setup to represent the physical topology but reflect the emulated227227+ * environment. For each emulated node, the real node which it appears on is228228+ * found and a fake pxm to nid mapping is created which mirrors the actual229229+ * locality. node_distance() then represents the correct distances between230230+ * emulated nodes by using the fake acpi mappings to pxms.231231+ */232232+void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)233233+{234234+ unsigned int bits;235235+ unsigned int cores;236236+ unsigned int apicid_base = 0;237237+ int i;238238+239239+ bits = boot_cpu_data.x86_coreid_bits;240240+ cores = 1 << bits;241241+ early_get_boot_cpu_id();242242+ if (boot_cpu_physical_apicid > 0)243243+ apicid_base = boot_cpu_physical_apicid;244244+245245+ for (i = 0; i < nr_nodes; i++) {246246+ int index;247247+ int nid;248248+ int j;249249+250250+ nid = find_node_by_addr(nodes[i].start);251251+ if (nid == NUMA_NO_NODE)252252+ continue;253253+254254+ index = nodeids[nid] << bits;255255+ if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)256256+ for (j = apicid_base; j < cores + apicid_base; j++)257257+ fake_apicid_to_node[index + j] = i;258258+#ifdef CONFIG_ACPI_NUMA259259+ __acpi_map_pxm_to_node(nid, i);260260+#endif261261+ }262262+ memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));263263+}264264+#endif /* CONFIG_NUMA_EMU */183265184266int __init amd_scan_nodes(void)185267{
+126-35
arch/x86/mm/numa_64.c
···260260#ifdef CONFIG_NUMA_EMU261261/* Numa emulation */262262static struct bootnode nodes[MAX_NUMNODES] __initdata;263263-static struct bootnode physnodes[MAX_NUMNODES] __initdata;263263+static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;264264static char *cmdline __initdata;265265266266static int __init setup_physnodes(unsigned long start, unsigned long end,267267 int acpi, int amd)268268{269269- int nr_nodes = 0;270269 int ret = 0;271270 int i;272271272272+ memset(physnodes, 0, sizeof(physnodes));273273#ifdef CONFIG_ACPI_NUMA274274 if (acpi)275275- nr_nodes = acpi_get_nodes(physnodes);275275+ acpi_get_nodes(physnodes, start, end);276276#endif277277#ifdef CONFIG_AMD_NUMA278278 if (amd)279279- nr_nodes = amd_get_nodes(physnodes);279279+ amd_get_nodes(physnodes);280280#endif281281 /*282282 * Basic sanity checking on the physical node map: there may be errors283283 * if the SRAT or AMD code incorrectly reported the topology or the mem=284284 * kernel parameter is used.285285 */286286- for (i = 0; i < nr_nodes; i++) {286286+ for (i = 0; i < MAX_NUMNODES; i++) {287287 if (physnodes[i].start == physnodes[i].end)288288 continue;289289 if (physnodes[i].start > end) {···298298 physnodes[i].start = start;299299 if (physnodes[i].end > end)300300 physnodes[i].end = end;301301- }302302-303303- /*304304- * Remove all nodes that have no memory or were truncated because of the305305- * limited address range.306306- */307307- for (i = 0; i < nr_nodes; i++) {308308- if (physnodes[i].start == physnodes[i].end)309309- continue;310310- physnodes[ret].start = physnodes[i].start;311311- physnodes[ret].end = physnodes[i].end;312301 ret++;313302 }314303···311322 ret = 1;312323 }313324 return ret;325325+}326326+327327+static void __init fake_physnodes(int acpi, int amd, int nr_nodes)328328+{329329+ int i;330330+331331+ BUG_ON(acpi && amd);332332+#ifdef CONFIG_ACPI_NUMA333333+ if (acpi)334334+ acpi_fake_nodes(nodes, nr_nodes);335335+#endif336336+#ifdef CONFIG_AMD_NUMA337337+ if (amd)338338+ amd_fake_nodes(nodes, nr_nodes);339339+#endif340340+ if (!acpi && !amd)341341+ for (i = 0; i < nr_cpu_ids; i++)342342+ numa_set_node(i, 0);314343}315344316345/*···359352 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr360353 * to max_addr. The return value is the number of nodes allocated.361354 */362362-static int __init split_nodes_interleave(u64 addr, u64 max_addr,363363- int nr_phys_nodes, int nr_nodes)355355+static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)364356{365357 nodemask_t physnode_mask = NODE_MASK_NONE;366358 u64 size;···390384 return -1;391385 }392386393393- for (i = 0; i < nr_phys_nodes; i++)387387+ for (i = 0; i < MAX_NUMNODES; i++)394388 if (physnodes[i].start != physnodes[i].end)395389 node_set(i, physnode_mask);396390···559553{560554 u64 addr = start_pfn << PAGE_SHIFT;561555 u64 max_addr = last_pfn << PAGE_SHIFT;562562- int num_phys_nodes;563556 int num_nodes;564557 int i;565558566566- num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);567559 /*568560 * If the numa=fake command-line contains a 'M' or 'G', it represents569561 * the fixed node size. Otherwise, if it is just a single number N,···576572 unsigned long n;577573578574 n = simple_strtoul(cmdline, NULL, 0);579579- num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);575575+ num_nodes = split_nodes_interleave(addr, max_addr, n);580576 }581577582578 if (num_nodes < 0)···599595 nodes[i].end >> PAGE_SHIFT);600596 setup_node_bootmem(i, nodes[i].start, nodes[i].end);601597 }602602- acpi_fake_nodes(nodes, num_nodes);598598+ setup_physnodes(addr, max_addr, acpi, amd);599599+ fake_physnodes(acpi, amd, num_nodes);603600 numa_init_array();604601 return 0;605602}···615610 nodes_clear(node_online_map);616611617612#ifdef CONFIG_NUMA_EMU613613+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,614614+ acpi, amd);618615 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))619616 return;617617+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,618618+ acpi, amd);620619 nodes_clear(node_possible_map);621620 nodes_clear(node_online_map);622621#endif···776767777768#ifndef CONFIG_DEBUG_PER_CPU_MAPS778769770770+#ifndef CONFIG_NUMA_EMU779771void __cpuinit numa_add_cpu(int cpu)780772{781773 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);···786776{787777 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);788778}779779+#else780780+void __cpuinit numa_add_cpu(int cpu)781781+{782782+ unsigned long addr;783783+ u16 apicid;784784+ int physnid;785785+ int nid = NUMA_NO_NODE;786786+787787+ apicid = early_per_cpu(x86_cpu_to_apicid, cpu);788788+ if (apicid != BAD_APICID)789789+ nid = apicid_to_node[apicid];790790+ if (nid == NUMA_NO_NODE)791791+ nid = early_cpu_to_node(cpu);792792+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));793793+794794+ /*795795+ * Use the starting address of the emulated node to find which physical796796+ * node it is allocated on.797797+ */798798+ addr = node_start_pfn(nid) << PAGE_SHIFT;799799+ for (physnid = 0; physnid < MAX_NUMNODES; physnid++)800800+ if (addr >= physnodes[physnid].start &&801801+ addr < physnodes[physnid].end)802802+ break;803803+804804+ /*805805+ * Map the cpu to each emulated node that is allocated on the physical806806+ * node of the cpu's apic id.807807+ */808808+ for_each_online_node(nid) {809809+ addr = node_start_pfn(nid) << PAGE_SHIFT;810810+ if (addr >= physnodes[physnid].start &&811811+ addr < physnodes[physnid].end)812812+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);813813+ }814814+}815815+816816+void __cpuinit numa_remove_cpu(int cpu)817817+{818818+ int i;819819+820820+ for_each_online_node(i)821821+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);822822+}823823+#endif /* !CONFIG_NUMA_EMU */789824790825#else /* CONFIG_DEBUG_PER_CPU_MAPS */791791-792792-/*793793- * --------- debug versions of the numa functions ---------794794- */795795-static void __cpuinit numa_set_cpumask(int cpu, int enable)826826+static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)796827{797828 int node = early_cpu_to_node(cpu);798829 struct cpumask *mask;799830 char buf[64];800831801832 mask = node_to_cpumask_map[node];802802- if (mask == NULL) {803803- printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);833833+ if (!mask) {834834+ pr_err("node_to_cpumask_map[%i] NULL\n", node);804835 dump_stack();805805- return;836836+ return NULL;806837 }838838+839839+ cpulist_scnprintf(buf, sizeof(buf), mask);840840+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",841841+ enable ? "numa_add_cpu" : "numa_remove_cpu",842842+ cpu, node, buf);843843+ return mask;844844+}845845+846846+/*847847+ * --------- debug versions of the numa functions ---------848848+ */849849+#ifndef CONFIG_NUMA_EMU850850+static void __cpuinit numa_set_cpumask(int cpu, int enable)851851+{852852+ struct cpumask *mask;853853+854854+ mask = debug_cpumask_set_cpu(cpu, enable);855855+ if (!mask)856856+ return;807857808858 if (enable)809859 cpumask_set_cpu(cpu, mask);810860 else811861 cpumask_clear_cpu(cpu, mask);812812-813813- cpulist_scnprintf(buf, sizeof(buf), mask);814814- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",815815- enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);816862}863863+#else864864+static void __cpuinit numa_set_cpumask(int cpu, int enable)865865+{866866+ int node = early_cpu_to_node(cpu);867867+ struct cpumask *mask;868868+ int i;869869+870870+ for_each_online_node(i) {871871+ unsigned long addr;872872+873873+ addr = node_start_pfn(i) << PAGE_SHIFT;874874+ if (addr < physnodes[node].start ||875875+ addr >= physnodes[node].end)876876+ continue;877877+ mask = debug_cpumask_set_cpu(cpu, enable);878878+ if (!mask)879879+ return;880880+881881+ if (enable)882882+ cpumask_set_cpu(cpu, mask);883883+ else884884+ cpumask_clear_cpu(cpu, mask);885885+ }886886+}887887+#endif /* CONFIG_NUMA_EMU */817888818889void __cpuinit numa_add_cpu(int cpu)819890{
+18-8
arch/x86/mm/srat_64.c
···349349350350void __init acpi_numa_arch_fixup(void) {}351351352352-int __init acpi_get_nodes(struct bootnode *physnodes)352352+#ifdef CONFIG_NUMA_EMU353353+void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,354354+ unsigned long end)353355{354356 int i;355355- int ret = 0;356357357358 for_each_node_mask(i, nodes_parsed) {358358- physnodes[ret].start = nodes[i].start;359359- physnodes[ret].end = nodes[i].end;360360- ret++;359359+ cutoff_node(i, start, end);360360+ physnodes[i].start = nodes[i].start;361361+ physnodes[i].end = nodes[i].end;361362 }362362- return ret;363363}364364+#endif /* CONFIG_NUMA_EMU */364365365366/* Use the information discovered above to actually set up the nodes. */366367int __init acpi_scan_nodes(unsigned long start, unsigned long end)···506505{507506 int i, j;508507509509- printk(KERN_INFO "Faking PXM affinity for fake nodes on real "510510- "topology.\n");511508 for (i = 0; i < num_nodes; i++) {512509 int nid, pxm;513510···525526 fake_apicid_to_node[j] == NUMA_NO_NODE)526527 fake_apicid_to_node[j] = i;527528 }529529+530530+ /*531531+ * If there are apicid-to-node mappings for physical nodes that do not532532+ * have a corresponding emulated node, it should default to a guaranteed533533+ * value.534534+ */535535+ for (i = 0; i < MAX_LOCAL_APIC; i++)536536+ if (apicid_to_node[i] != NUMA_NO_NODE &&537537+ fake_apicid_to_node[i] == NUMA_NO_NODE)538538+ fake_apicid_to_node[i] = 0;539539+528540 for (i = 0; i < num_nodes; i++)529541 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);530542 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));