Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86, NUMA: Make 32bit use common NUMA init path

With both _numa_init() methods converted and the rest of init code
adjusted, numa_32.c now can switch from the 32bit only init code to
the common one in numa.c.

* Shim get_memcfg_*()'s are dropped and initmem_init() calls
x86_numa_init(), which is updated to handle NUMAQ.

* All boilerplate operations including node range limiting, pgdat
alloc/init are handled by numa_init(). 32bit only implementation is
removed.

* 32bit numa_add_memblk(), numa_set_distance() and
memory_add_physaddr_to_nid() removed and common versions in
numa_32.c enabled for 32bit.

This change causes the following behavior changes.

* NODE_DATA()->node_start_pfn/node_spanned_pages properly initialized
for 32bit too.

* Much more sanity checks and configuration cleanups.

* Proper handling of node distances.

* The same NUMA init messages as 64bit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>

Tejun Heo bd6709a9 7888e96b

+7 -241
-7
arch/x86/include/asm/topology.h
··· 93 93 #define pcibus_to_node(bus) __pcibus_to_node(bus) 94 94 95 95 #ifdef CONFIG_X86_32 96 - extern unsigned long node_start_pfn[]; 97 - extern unsigned long node_end_pfn[]; 98 - #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) 99 - 100 96 # define SD_CACHE_NICE_TRIES 1 101 97 # define SD_IDLE_IDX 1 102 - 103 98 #else 104 - 105 99 # define SD_CACHE_NICE_TRIES 2 106 100 # define SD_IDLE_IDX 2 107 - 108 101 #endif 109 102 110 103 /* sched_domains SD_NODE_INIT for NUMA machines */
+5 -5
arch/x86/mm/numa.c
··· 173 173 (mi->nr_blks - idx) * sizeof(mi->blk[0])); 174 174 } 175 175 176 - #ifdef CONFIG_X86_64 177 176 /** 178 177 * numa_add_memblk - Add one numa_memblk to numa_meminfo 179 178 * @nid: NUMA node ID of the new memblk ··· 188 189 { 189 190 return numa_add_memblk_to(nid, start, end, &numa_meminfo); 190 191 } 191 - #endif 192 192 193 193 /* Initialize bootmem allocator for a node */ 194 194 static void __init setup_node_bootmem(int nid, u64 start, u64 end) ··· 411 413 return 0; 412 414 } 413 415 414 - #ifdef CONFIG_X86_64 415 416 /** 416 417 * numa_set_distance - Set NUMA distance from one NUMA to another 417 418 * @from: the 'from' node to set distance ··· 449 452 450 453 numa_distance[from * numa_distance_cnt + to] = distance; 451 454 } 452 - #endif 453 455 454 456 int __node_distance(int from, int to) 455 457 { ··· 622 626 void __init x86_numa_init(void) 623 627 { 624 628 if (!numa_off) { 629 + #ifdef CONFIG_X86_NUMAQ 630 + if (!numa_init(numaq_numa_init)) 631 + return; 632 + #endif 625 633 #ifdef CONFIG_ACPI_NUMA 626 634 if (!numa_init(x86_acpi_numa_init)) 627 635 return; ··· 805 805 806 806 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 807 807 808 - #if defined(CONFIG_X86_64) && defined(CONFIG_MEMORY_HOTPLUG) 808 + #ifdef CONFIG_MEMORY_HOTPLUG 809 809 int memory_add_physaddr_to_nid(u64 start) 810 810 { 811 811 struct numa_meminfo *mi = &numa_meminfo;
+2 -229
arch/x86/mm/numa_32.c
··· 22 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 23 23 */ 24 24 25 - #include <linux/mm.h> 26 25 #include <linux/bootmem.h> 27 26 #include <linux/memblock.h> 28 - #include <linux/mmzone.h> 29 - #include <linux/highmem.h> 30 - #include <linux/initrd.h> 31 - #include <linux/nodemask.h> 32 27 #include <linux/module.h> 33 - #include <linux/kexec.h> 34 - #include <linux/pfn.h> 35 - #include <linux/swap.h> 36 - #include <linux/acpi.h> 37 28 38 - #include <asm/e820.h> 39 - #include <asm/setup.h> 40 - #include <asm/mmzone.h> 41 - #include <asm/bios_ebda.h> 42 - #include <asm/proto.h> 43 - 44 - /* 45 - * numa interface - we expect the numa architecture specific code to have 46 - * populated the following initialisation. 47 - * 48 - * 1) node_online_map - the map of all nodes configured (online) in the system 49 - * 2) node_start_pfn - the starting page frame number for a node 50 - * 3) node_end_pfn - the ending page fram number for a node 51 - */ 52 - unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; 53 - unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; 54 - 29 + #include "numa_internal.h" 55 30 56 31 #ifdef CONFIG_DISCONTIGMEM 57 32 /* ··· 71 96 } 72 97 #endif 73 98 74 - extern unsigned long find_max_low_pfn(void); 75 99 extern unsigned long highend_pfn, highstart_pfn; 76 100 77 101 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) 78 102 79 103 static void *node_remap_start_vaddr[MAX_NUMNODES]; 80 104 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 81 - 82 - /* 83 - * FLAT - support for basic PC memory model with discontig enabled, essentially 84 - * a single node with all available processors in it with a flat 85 - * memory map. 86 - */ 87 - static int __init get_memcfg_numa_flat(void) 88 - { 89 - printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); 90 - 91 - node_start_pfn[0] = 0; 92 - node_end_pfn[0] = max_pfn; 93 - memblock_x86_register_active_regions(0, 0, max_pfn); 94 - 95 - /* Indicate there is one node available. */ 96 - nodes_clear(node_online_map); 97 - node_set_online(0); 98 - return 1; 99 - } 100 - 101 - /* 102 - * Find the highest page frame number we have available for the node 103 - */ 104 - static void __init propagate_e820_map_node(int nid) 105 - { 106 - if (node_end_pfn[nid] > max_pfn) 107 - node_end_pfn[nid] = max_pfn; 108 - /* 109 - * if a user has given mem=XXXX, then we need to make sure 110 - * that the node _starts_ before that, too, not just ends 111 - */ 112 - if (node_start_pfn[nid] > max_pfn) 113 - node_start_pfn[nid] = max_pfn; 114 - BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); 115 - } 116 - 117 - /* 118 - * Allocate memory for the pg_data_t for this node via a crude pre-bootmem 119 - * method. For node zero take this from the bottom of memory, for 120 - * subsequent nodes place them at node_remap_start_vaddr which contains 121 - * node local data in physically node local memory. See setup_memory() 122 - * for details. 123 - */ 124 - static void __init allocate_pgdat(int nid) 125 - { 126 - char buf[16]; 127 - 128 - NODE_DATA(nid) = alloc_remap(nid, ALIGN(sizeof(pg_data_t), PAGE_SIZE)); 129 - if (!NODE_DATA(nid)) { 130 - unsigned long pgdat_phys; 131 - pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, 132 - max_pfn_mapped<<PAGE_SHIFT, 133 - sizeof(pg_data_t), 134 - PAGE_SIZE); 135 - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); 136 - memset(buf, 0, sizeof(buf)); 137 - sprintf(buf, "NODE_DATA %d", nid); 138 - memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); 139 - } 140 - printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", 141 - nid, (unsigned long)NODE_DATA(nid)); 142 - } 143 105 144 106 /* 145 107 * Remap memory allocator ··· 234 322 nid, node_pa, node_pa + size, remap_va, remap_va + size); 235 323 } 236 324 237 - static int get_memcfg_numaq(void) 238 - { 239 - #ifdef CONFIG_X86_NUMAQ 240 - int nid; 241 - 242 - if (numa_off) 243 - return 0; 244 - 245 - if (numaq_numa_init() < 0) { 246 - nodes_clear(numa_nodes_parsed); 247 - remove_all_active_ranges(); 248 - return 0; 249 - } 250 - 251 - for_each_node_mask(nid, numa_nodes_parsed) 252 - node_set_online(nid); 253 - sort_node_map(); 254 - return 1; 255 - #else 256 - return 0; 257 - #endif 258 - } 259 - 260 - static int get_memcfg_from_srat(void) 261 - { 262 - #ifdef CONFIG_ACPI_NUMA 263 - int nid; 264 - 265 - if (numa_off) 266 - return 0; 267 - 268 - if (x86_acpi_numa_init() < 0) { 269 - nodes_clear(numa_nodes_parsed); 270 - remove_all_active_ranges(); 271 - return 0; 272 - } 273 - 274 - for_each_node_mask(nid, numa_nodes_parsed) 275 - node_set_online(nid); 276 - sort_node_map(); 277 - return 1; 278 - #else 279 - return 0; 280 - #endif 281 - } 282 - 283 - static void get_memcfg_numa(void) 284 - { 285 - if (get_memcfg_numaq()) 286 - return; 287 - if (get_memcfg_from_srat()) 288 - return; 289 - get_memcfg_numa_flat(); 290 - } 291 - 292 325 void __init initmem_init(void) 293 326 { 294 - int nid; 295 - 296 - get_memcfg_numa(); 297 - numa_init_array(); 298 - 299 - for_each_online_node(nid) { 300 - u64 start = (u64)node_start_pfn[nid] << PAGE_SHIFT; 301 - u64 end = min((u64)node_end_pfn[nid] << PAGE_SHIFT, 302 - (u64)max_pfn << PAGE_SHIFT); 303 - 304 - if (start < end) 305 - init_alloc_remap(nid, start, end); 306 - } 327 + x86_numa_init(); 307 328 308 329 #ifdef CONFIG_HIGHMEM 309 330 highstart_pfn = highend_pfn = max_pfn; ··· 257 412 258 413 printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", 259 414 (ulong) pfn_to_kaddr(max_low_pfn)); 260 - for_each_online_node(nid) 261 - allocate_pgdat(nid); 262 415 263 416 printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", 264 417 (ulong) pfn_to_kaddr(highstart_pfn)); 265 - for_each_online_node(nid) 266 - propagate_e820_map_node(nid); 267 - 268 - for_each_online_node(nid) { 269 - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 270 - NODE_DATA(nid)->node_id = nid; 271 - } 272 418 273 419 setup_bootmem_allocator(); 274 - } 275 - 276 - #ifdef CONFIG_MEMORY_HOTPLUG 277 - static int paddr_to_nid(u64 addr) 278 - { 279 - int nid; 280 - unsigned long pfn = PFN_DOWN(addr); 281 - 282 - for_each_node(nid) 283 - if (node_start_pfn[nid] <= pfn && 284 - pfn < node_end_pfn[nid]) 285 - return nid; 286 - 287 - return -1; 288 - } 289 - 290 - /* 291 - * This function is used to ask node id BEFORE memmap and mem_section's 292 - * initialization (pfn_to_nid() can't be used yet). 293 - * If _PXM is not defined on ACPI's DSDT, node id must be found by this. 294 - */ 295 - int memory_add_physaddr_to_nid(u64 addr) 296 - { 297 - int nid = paddr_to_nid(addr); 298 - return (nid >= 0) ? nid : 0; 299 - } 300 - 301 - EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 302 - #endif 303 - 304 - /* temporary shim, will go away soon */ 305 - int __init numa_add_memblk(int nid, u64 start, u64 end) 306 - { 307 - unsigned long start_pfn = start >> PAGE_SHIFT; 308 - unsigned long end_pfn = end >> PAGE_SHIFT; 309 - 310 - printk(KERN_DEBUG "nid %d start_pfn %08lx end_pfn %08lx\n", 311 - nid, start_pfn, end_pfn); 312 - 313 - if (start >= (u64)max_pfn << PAGE_SHIFT) { 314 - printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", 315 - start_pfn, end_pfn); 316 - return 0; 317 - } 318 - 319 - node_set_online(nid); 320 - memblock_x86_register_active_regions(nid, start_pfn, 321 - min(end_pfn, max_pfn)); 322 - 323 - if (!node_has_online_mem(nid)) { 324 - node_start_pfn[nid] = start_pfn; 325 - node_end_pfn[nid] = end_pfn; 326 - } else { 327 - node_start_pfn[nid] = min(node_start_pfn[nid], start_pfn); 328 - node_end_pfn[nid] = max(node_end_pfn[nid], end_pfn); 329 - } 330 - return 0; 331 - } 332 - 333 - /* temporary shim, will go away soon */ 334 - void __init numa_set_distance(int from, int to, int distance) 335 - { 336 - /* nada */ 337 420 }