Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arch, mm: pull out allocation of NODE_DATA to generic code

Architectures that support NUMA duplicate the code that allocates
NODE_DATA on the node-local memory with slight variations in reporting of
the addresses where the memory was allocated.

Use x86 version as the basis for the generic alloc_node_data() function
and call this function in architecture specific numa initialization.

Round up node data size to SMP_CACHE_BYTES rather than to PAGE_SIZE like
x86 used to do since the bootmem era when allocation granularity was
PAGE_SIZE anyway.

Link: https://lkml.kernel.org/r/20240807064110.1003856-10-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Zi Yan <ziy@nvidia.com> # for x86_64 and arm64
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [arm64 + CXL via QEMU]
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rob Herring (Arm) <robh@kernel.org>
Cc: Samuel Holland <samuel.holland@sifive.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
3515863d ec164cf1

+36 -117
-18
arch/loongarch/kernel/numa.c
··· 187 187 return numa_add_memblk_to(nid, start, end, &numa_meminfo); 188 188 } 189 189 190 - static void __init alloc_node_data(int nid) 191 - { 192 - void *nd; 193 - unsigned long nd_pa; 194 - size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE); 195 - 196 - nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid); 197 - if (!nd_pa) { 198 - pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid); 199 - return; 200 - } 201 - 202 - nd = __va(nd_pa); 203 - 204 - node_data[nid] = nd; 205 - memset(nd, 0, sizeof(pg_data_t)); 206 - } 207 - 208 190 static void __init node_mem_init(unsigned int node) 209 191 { 210 192 unsigned long start_pfn, end_pfn;
+2 -14
arch/mips/loongson64/numa.c
··· 81 81 82 82 static void __init node_mem_init(unsigned int node) 83 83 { 84 - struct pglist_data *nd; 85 84 unsigned long node_addrspace_offset; 86 85 unsigned long start_pfn, end_pfn; 87 - unsigned long nd_pa; 88 - int tnid; 89 - const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 90 86 91 87 node_addrspace_offset = nid_to_addrbase(node); 92 88 pr_info("Node%d's addrspace_offset is 0x%lx\n", ··· 92 96 pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n", 93 97 node, start_pfn, end_pfn); 94 98 95 - nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, node); 96 - if (!nd_pa) 97 - panic("Cannot allocate %zu bytes for node %d data\n", 98 - nd_size, node); 99 - nd = __va(nd_pa); 100 - memset(nd, 0, sizeof(struct pglist_data)); 101 - tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 102 - if (tnid != node) 103 - pr_info("NODE_DATA(%d) on node %d\n", node, tnid); 104 - node_data[node] = nd; 99 + alloc_node_data(node); 100 + 105 101 NODE_DATA(node)->node_start_pfn = start_pfn; 106 102 NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; 107 103
+1 -19
arch/powerpc/mm/numa.c
··· 1093 1093 static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) 1094 1094 { 1095 1095 u64 spanned_pages = end_pfn - start_pfn; 1096 - const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 1097 - u64 nd_pa; 1098 - void *nd; 1099 - int tnid; 1100 1096 1101 - nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 1102 - if (!nd_pa) 1103 - panic("Cannot allocate %zu bytes for node %d data\n", 1104 - nd_size, nid); 1097 + alloc_node_data(nid); 1105 1098 1106 - nd = __va(nd_pa); 1107 - 1108 - /* report and initialize */ 1109 - pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", 1110 - nd_pa, nd_pa + nd_size - 1); 1111 - tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 1112 - if (tnid != nid) 1113 - pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); 1114 - 1115 - node_data[nid] = nd; 1116 - memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 1117 1099 NODE_DATA(nid)->node_id = nid; 1118 1100 NODE_DATA(nid)->node_start_pfn = start_pfn; 1119 1101 NODE_DATA(nid)->node_spanned_pages = spanned_pages;
+1 -6
arch/sh/mm/init.c
··· 212 212 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 213 213 214 214 #ifdef CONFIG_NUMA 215 - NODE_DATA(nid) = memblock_alloc_try_nid( 216 - sizeof(struct pglist_data), 217 - SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, 218 - MEMBLOCK_ALLOC_ACCESSIBLE, nid); 219 - if (!NODE_DATA(nid)) 220 - panic("Can't allocate pgdat for node %d\n", nid); 215 + alloc_node_data(nid); 221 216 #endif 222 217 223 218 NODE_DATA(nid)->node_start_pfn = start_pfn;
+2 -7
arch/sparc/mm/init_64.c
··· 1075 1075 { 1076 1076 struct pglist_data *p; 1077 1077 unsigned long start_pfn, end_pfn; 1078 - #ifdef CONFIG_NUMA 1079 1078 1080 - NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data), 1081 - SMP_CACHE_BYTES, nid); 1082 - if (!NODE_DATA(nid)) { 1083 - prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); 1084 - prom_halt(); 1085 - } 1079 + #ifdef CONFIG_NUMA 1080 + alloc_node_data(nid); 1086 1081 1087 1082 NODE_DATA(nid)->node_id = nid; 1088 1083 #endif
+1 -33
arch/x86/mm/numa.c
··· 191 191 return numa_add_memblk_to(nid, start, end, &numa_meminfo); 192 192 } 193 193 194 - /* Allocate NODE_DATA for a node on the local memory */ 195 - static void __init alloc_node_data(int nid) 196 - { 197 - const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 198 - u64 nd_pa; 199 - void *nd; 200 - int tnid; 201 - 202 - /* 203 - * Allocate node data. Try node-local memory and then any node. 204 - * Never allocate in DMA zone. 205 - */ 206 - nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 207 - if (!nd_pa) { 208 - pr_err("Cannot find %zu bytes in any node (initial node: %d)\n", 209 - nd_size, nid); 210 - return; 211 - } 212 - nd = __va(nd_pa); 213 - 214 - /* report and initialize */ 215 - printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid, 216 - nd_pa, nd_pa + nd_size - 1); 217 - tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 218 - if (tnid != nid) 219 - printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); 220 - 221 - node_data[nid] = nd; 222 - memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 223 - 224 - node_set_online(nid); 225 - } 226 - 227 194 /** 228 195 * numa_cleanup_meminfo - Cleanup a numa_meminfo 229 196 * @mi: numa_meminfo to clean up ··· 538 571 continue; 539 572 540 573 alloc_node_data(nid); 574 + node_set_online(nid); 541 575 } 542 576 543 577 /* Dump memblock with node info and return. */
+1 -20
drivers/base/arch_numa.c
··· 216 216 */ 217 217 static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) 218 218 { 219 - const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 220 - u64 nd_pa; 221 - void *nd; 222 - int tnid; 223 - 224 219 if (start_pfn >= end_pfn) 225 220 pr_info("Initmem setup node %d [<memory-less node>]\n", nid); 226 221 227 - nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 228 - if (!nd_pa) 229 - panic("Cannot allocate %zu bytes for node %d data\n", 230 - nd_size, nid); 222 + alloc_node_data(nid); 231 223 232 - nd = __va(nd_pa); 233 - 234 - /* report and initialize */ 235 - pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n", 236 - nd_pa, nd_pa + nd_size - 1); 237 - tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 238 - if (tnid != nid) 239 - pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); 240 - 241 - node_data[nid] = nd; 242 - memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 243 224 NODE_DATA(nid)->node_id = nid; 244 225 NODE_DATA(nid)->node_start_pfn = start_pfn; 245 226 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+1
include/linux/numa.h
··· 33 33 extern struct pglist_data *node_data[]; 34 34 #define NODE_DATA(nid) (node_data[nid]) 35 35 36 + void __init alloc_node_data(int nid); 36 37 void __init alloc_offline_node_data(int nid); 37 38 38 39 /* Generic implementation available */
+27
mm/numa.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 3 + #include <linux/memblock.h> 3 4 #include <linux/printk.h> 4 5 #include <linux/numa.h> 5 6 6 7 struct pglist_data *node_data[MAX_NUMNODES]; 7 8 EXPORT_SYMBOL(node_data); 9 + 10 + /* Allocate NODE_DATA for a node on the local memory */ 11 + void __init alloc_node_data(int nid) 12 + { 13 + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 14 + u64 nd_pa; 15 + void *nd; 16 + int tnid; 17 + 18 + /* Allocate node data. Try node-local memory and then any node. */ 19 + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 20 + if (!nd_pa) 21 + panic("Cannot allocate %zu bytes for node %d data\n", 22 + nd_size, nid); 23 + nd = __va(nd_pa); 24 + 25 + /* report and initialize */ 26 + pr_info("NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid, 27 + nd_pa, nd_pa + nd_size - 1); 28 + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 29 + if (tnid != nid) 30 + pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); 31 + 32 + node_data[nid] = nd; 33 + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 34 + } 8 35 9 36 void __init alloc_offline_node_data(int nid) 10 37 {