s390/mm: remove fake numa support · tjh.dev/kernel@701dc81

+2 -59

arch/s390/Kconfig

··· 450 450 config HOTPLUG_CPU 451 451 def_bool y 452 452 453 - # Some NUMA nodes have memory ranges that span 454 - # other nodes. Even though a pfn is valid and 455 - # between a node's start and end pfns, it may not 456 - # reside on that node. See memmap_init_zone() 457 - # for details. <- They meant memory holes! 458 - config NODES_SPAN_OTHER_NODES 459 - def_bool NUMA 460 - 461 453 config NUMA 462 454 bool "NUMA support" 463 455 depends on SCHED_TOPOLOGY ··· 459 467 460 468 This option adds NUMA support to the kernel. 461 469 462 - An operation mode can be selected by appending 463 - numa=<method> to the kernel command line. 464 - 465 - The default behaviour is identical to appending numa=plain to 466 - the command line. This will create just one node with all 467 - available memory and all CPUs in it. 468 - 469 470 config NODES_SHIFT 470 - int "Maximum NUMA nodes (as a power of 2)" 471 - range 1 10 472 - depends on NUMA 473 - default "4" 474 - help 475 - Specify the maximum number of NUMA nodes available on the target 476 - system. Increases memory reserved to accommodate various tables. 477 - 478 - menu "Select NUMA modes" 479 - depends on NUMA 480 - 481 - config NUMA_EMU 482 - bool "NUMA emulation" 483 - default y 484 - help 485 - Numa emulation mode will split the available system memory into 486 - equal chunks which then are distributed over the configured number 487 - of nodes in a round-robin manner. 488 - 489 - The number of fake nodes is limited by the number of available memory 490 - chunks (i.e. memory size / fake size) and the number of supported 491 - nodes in the kernel. 492 - 493 - The CPUs are assigned to the nodes in a way that partially respects 494 - the original machine topology (if supported by the machine). 495 - Fair distribution of the CPUs is not guaranteed. 496 - 497 - config EMU_SIZE 498 - hex "NUMA emulation memory chunk size" 499 - default 0x10000000 500 - range 0x400000 0x100000000 501 - depends on NUMA_EMU 502 - help 503 - Select the default size by which the memory is chopped and then 504 - assigned to emulated NUMA nodes. 505 - 506 - This can be overridden by specifying 507 - 508 - emu_size=<n> 509 - 510 - on the kernel command line where also suffixes K, M, G, and T are 511 - supported. 512 - 513 - endmenu 471 + int 472 + default "1" 514 473 515 474 config SCHED_SMT 516 475 def_bool n

+1 -12

arch/s390/include/asm/numa.h

··· 13 13 #ifdef CONFIG_NUMA 14 14 15 15 #include <linux/numa.h> 16 - #include <linux/cpumask.h> 17 16 18 17 void numa_setup(void); 19 - int numa_pfn_to_nid(unsigned long pfn); 20 - int __node_distance(int a, int b); 21 - void numa_update_cpu_topology(void); 22 - 23 - extern cpumask_t node_to_cpumask_map[MAX_NUMNODES]; 24 - extern int numa_debug_enabled; 25 18 26 19 #else 27 20 28 21 static inline void numa_setup(void) { } 29 - static inline void numa_update_cpu_topology(void) { } 30 - static inline int numa_pfn_to_nid(unsigned long pfn) 31 - { 32 - return 0; 33 - } 34 22 35 23 #endif /* CONFIG_NUMA */ 24 + 36 25 #endif /* _ASM_S390_NUMA_H */

+6 -3

arch/s390/include/asm/topology.h

··· 16 16 unsigned short socket_id; 17 17 unsigned short book_id; 18 18 unsigned short drawer_id; 19 - unsigned short node_id; 20 19 unsigned short dedicated : 1; 21 20 cpumask_t thread_mask; 22 21 cpumask_t core_mask; ··· 70 71 #define cpu_to_node cpu_to_node 71 72 static inline int cpu_to_node(int cpu) 72 73 { 73 - return cpu_topology[cpu].node_id; 74 + return 0; 74 75 } 75 76 76 77 /* Returns a pointer to the cpumask of CPUs on node 'node'. */ 77 78 #define cpumask_of_node cpumask_of_node 78 79 static inline const struct cpumask *cpumask_of_node(int node) 79 80 { 80 - return &node_to_cpumask_map[node]; 81 + return cpu_possible_mask; 81 82 } 82 83 83 84 #define pcibus_to_node(bus) __pcibus_to_node(bus) 84 85 85 86 #define node_distance(a, b) __node_distance(a, b) 87 + static inline int __node_distance(int a, int b) 88 + { 89 + return 0; 90 + } 86 91 87 92 #else /* !CONFIG_NUMA */ 88 93

+1

arch/s390/kernel/setup.c

··· 790 790 memblock_physmem_add(start, end - start); 791 791 } 792 792 memblock_set_bottom_up(false); 793 + memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); 793 794 memblock_dump_all(); 794 795 } 795 796

-2

arch/s390/kernel/topology.c

··· 26 26 #include <linux/nodemask.h> 27 27 #include <linux/node.h> 28 28 #include <asm/sysinfo.h> 29 - #include <asm/numa.h> 30 29 31 30 #define PTF_HORIZONTAL (0UL) 32 31 #define PTF_VERTICAL (1UL) ··· 266 267 cpumask_set_cpu(cpu, &cpus_with_topology); 267 268 } 268 269 } 269 - numa_update_cpu_topology(); 270 270 } 271 271 272 272 void store_topology(struct sysinfo_15_1_x *info)

-2

arch/s390/numa/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-y += numa.o 3 - obj-y += toptree.o 4 - obj-$(CONFIG_NUMA_EMU) += mode_emu.o

-577

arch/s390/numa/mode_emu.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * NUMA support for s390 4 - * 5 - * NUMA emulation (aka fake NUMA) distributes the available memory to nodes 6 - * without using real topology information about the physical memory of the 7 - * machine. 8 - * 9 - * It distributes the available CPUs to nodes while respecting the original 10 - * machine topology information. This is done by trying to avoid to separate 11 - * CPUs which reside on the same book or even on the same MC. 12 - * 13 - * Because the current Linux scheduler code requires a stable cpu to node 14 - * mapping, cores are pinned to nodes when the first CPU thread is set online. 15 - * 16 - * Copyright IBM Corp. 2015 17 - */ 18 - 19 - #define KMSG_COMPONENT "numa_emu" 20 - #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 - 22 - #include <linux/kernel.h> 23 - #include <linux/cpumask.h> 24 - #include <linux/memblock.h> 25 - #include <linux/node.h> 26 - #include <linux/memory.h> 27 - #include <linux/slab.h> 28 - #include <asm/smp.h> 29 - #include <asm/topology.h> 30 - #include "numa_mode.h" 31 - #include "toptree.h" 32 - 33 - /* Distances between the different system components */ 34 - #define DIST_EMPTY 0 35 - #define DIST_CORE 1 36 - #define DIST_MC 2 37 - #define DIST_BOOK 3 38 - #define DIST_DRAWER 4 39 - #define DIST_MAX 5 40 - 41 - /* Node distance reported to common code */ 42 - #define EMU_NODE_DIST 10 43 - 44 - /* Node ID for free (not yet pinned) cores */ 45 - #define NODE_ID_FREE -1 46 - 47 - /* Different levels of toptree */ 48 - enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY}; 49 - 50 - /* The two toptree IDs */ 51 - enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA}; 52 - 53 - /* Number of NUMA nodes */ 54 - static int emu_nodes = 1; 55 - /* NUMA stripe size */ 56 - static unsigned long emu_size; 57 - 58 - /* 59 - * Node to core pinning information updates are protected by 60 - * "sched_domains_mutex". 61 - */ 62 - static struct { 63 - s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */ 64 - int total; /* Total number of pinned cores */ 65 - int per_node_target; /* Cores per node without extra cores */ 66 - int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */ 67 - } *emu_cores; 68 - 69 - /* 70 - * Pin a core to a node 71 - */ 72 - static void pin_core_to_node(int core_id, int node_id) 73 - { 74 - if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) { 75 - emu_cores->per_node[node_id]++; 76 - emu_cores->to_node_id[core_id] = node_id; 77 - emu_cores->total++; 78 - } else { 79 - WARN_ON(emu_cores->to_node_id[core_id] != node_id); 80 - } 81 - } 82 - 83 - /* 84 - * Number of pinned cores of a node 85 - */ 86 - static int cores_pinned(struct toptree *node) 87 - { 88 - return emu_cores->per_node[node->id]; 89 - } 90 - 91 - /* 92 - * ID of the node where the core is pinned (or NODE_ID_FREE) 93 - */ 94 - static int core_pinned_to_node_id(struct toptree *core) 95 - { 96 - return emu_cores->to_node_id[core->id]; 97 - } 98 - 99 - /* 100 - * Number of cores in the tree that are not yet pinned 101 - */ 102 - static int cores_free(struct toptree *tree) 103 - { 104 - struct toptree *core; 105 - int count = 0; 106 - 107 - toptree_for_each(core, tree, CORE) { 108 - if (core_pinned_to_node_id(core) == NODE_ID_FREE) 109 - count++; 110 - } 111 - return count; 112 - } 113 - 114 - /* 115 - * Return node of core 116 - */ 117 - static struct toptree *core_node(struct toptree *core) 118 - { 119 - return core->parent->parent->parent->parent; 120 - } 121 - 122 - /* 123 - * Return drawer of core 124 - */ 125 - static struct toptree *core_drawer(struct toptree *core) 126 - { 127 - return core->parent->parent->parent; 128 - } 129 - 130 - /* 131 - * Return book of core 132 - */ 133 - static struct toptree *core_book(struct toptree *core) 134 - { 135 - return core->parent->parent; 136 - } 137 - 138 - /* 139 - * Return mc of core 140 - */ 141 - static struct toptree *core_mc(struct toptree *core) 142 - { 143 - return core->parent; 144 - } 145 - 146 - /* 147 - * Distance between two cores 148 - */ 149 - static int dist_core_to_core(struct toptree *core1, struct toptree *core2) 150 - { 151 - if (core_drawer(core1)->id != core_drawer(core2)->id) 152 - return DIST_DRAWER; 153 - if (core_book(core1)->id != core_book(core2)->id) 154 - return DIST_BOOK; 155 - if (core_mc(core1)->id != core_mc(core2)->id) 156 - return DIST_MC; 157 - /* Same core or sibling on same MC */ 158 - return DIST_CORE; 159 - } 160 - 161 - /* 162 - * Distance of a node to a core 163 - */ 164 - static int dist_node_to_core(struct toptree *node, struct toptree *core) 165 - { 166 - struct toptree *core_node; 167 - int dist_min = DIST_MAX; 168 - 169 - toptree_for_each(core_node, node, CORE) 170 - dist_min = min(dist_min, dist_core_to_core(core_node, core)); 171 - return dist_min == DIST_MAX ? DIST_EMPTY : dist_min; 172 - } 173 - 174 - /* 175 - * Unify will delete empty nodes, therefore recreate nodes. 176 - */ 177 - static void toptree_unify_tree(struct toptree *tree) 178 - { 179 - int nid; 180 - 181 - toptree_unify(tree); 182 - for (nid = 0; nid < emu_nodes; nid++) 183 - toptree_get_child(tree, nid); 184 - } 185 - 186 - /* 187 - * Find the best/nearest node for a given core and ensure that no node 188 - * gets more than "emu_cores->per_node_target + extra" cores. 189 - */ 190 - static struct toptree *node_for_core(struct toptree *numa, struct toptree *core, 191 - int extra) 192 - { 193 - struct toptree *node, *node_best = NULL; 194 - int dist_cur, dist_best, cores_target; 195 - 196 - cores_target = emu_cores->per_node_target + extra; 197 - dist_best = DIST_MAX; 198 - node_best = NULL; 199 - toptree_for_each(node, numa, NODE) { 200 - /* Already pinned cores must use their nodes */ 201 - if (core_pinned_to_node_id(core) == node->id) { 202 - node_best = node; 203 - break; 204 - } 205 - /* Skip nodes that already have enough cores */ 206 - if (cores_pinned(node) >= cores_target) 207 - continue; 208 - dist_cur = dist_node_to_core(node, core); 209 - if (dist_cur < dist_best) { 210 - dist_best = dist_cur; 211 - node_best = node; 212 - } 213 - } 214 - return node_best; 215 - } 216 - 217 - /* 218 - * Find the best node for each core with respect to "extra" core count 219 - */ 220 - static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys, 221 - int extra) 222 - { 223 - struct toptree *node, *core, *tmp; 224 - 225 - toptree_for_each_safe(core, tmp, phys, CORE) { 226 - node = node_for_core(numa, core, extra); 227 - if (!node) 228 - return; 229 - toptree_move(core, node); 230 - pin_core_to_node(core->id, node->id); 231 - } 232 - } 233 - 234 - /* 235 - * Move structures of given level to specified NUMA node 236 - */ 237 - static void move_level_to_numa_node(struct toptree *node, struct toptree *phys, 238 - enum toptree_level level, bool perfect) 239 - { 240 - int cores_free, cores_target = emu_cores->per_node_target; 241 - struct toptree *cur, *tmp; 242 - 243 - toptree_for_each_safe(cur, tmp, phys, level) { 244 - cores_free = cores_target - toptree_count(node, CORE); 245 - if (perfect) { 246 - if (cores_free == toptree_count(cur, CORE)) 247 - toptree_move(cur, node); 248 - } else { 249 - if (cores_free >= toptree_count(cur, CORE)) 250 - toptree_move(cur, node); 251 - } 252 - } 253 - } 254 - 255 - /* 256 - * Move structures of a given level to NUMA nodes. If "perfect" is specified 257 - * move only perfectly fitting structures. Otherwise move also smaller 258 - * than needed structures. 259 - */ 260 - static void move_level_to_numa(struct toptree *numa, struct toptree *phys, 261 - enum toptree_level level, bool perfect) 262 - { 263 - struct toptree *node; 264 - 265 - toptree_for_each(node, numa, NODE) 266 - move_level_to_numa_node(node, phys, level, perfect); 267 - } 268 - 269 - /* 270 - * For the first run try to move the big structures 271 - */ 272 - static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys) 273 - { 274 - struct toptree *core; 275 - 276 - /* Always try to move perfectly fitting structures first */ 277 - move_level_to_numa(numa, phys, DRAWER, true); 278 - move_level_to_numa(numa, phys, DRAWER, false); 279 - move_level_to_numa(numa, phys, BOOK, true); 280 - move_level_to_numa(numa, phys, BOOK, false); 281 - move_level_to_numa(numa, phys, MC, true); 282 - move_level_to_numa(numa, phys, MC, false); 283 - /* Now pin all the moved cores */ 284 - toptree_for_each(core, numa, CORE) 285 - pin_core_to_node(core->id, core_node(core)->id); 286 - } 287 - 288 - /* 289 - * Allocate new topology and create required nodes 290 - */ 291 - static struct toptree *toptree_new(int id, int nodes) 292 - { 293 - struct toptree *tree; 294 - int nid; 295 - 296 - tree = toptree_alloc(TOPOLOGY, id); 297 - if (!tree) 298 - goto fail; 299 - for (nid = 0; nid < nodes; nid++) { 300 - if (!toptree_get_child(tree, nid)) 301 - goto fail; 302 - } 303 - return tree; 304 - fail: 305 - panic("NUMA emulation could not allocate topology"); 306 - } 307 - 308 - /* 309 - * Allocate and initialize core to node mapping 310 - */ 311 - static void __ref create_core_to_node_map(void) 312 - { 313 - int i; 314 - 315 - emu_cores = memblock_alloc(sizeof(*emu_cores), 8); 316 - if (!emu_cores) 317 - panic("%s: Failed to allocate %zu bytes align=0x%x\n", 318 - __func__, sizeof(*emu_cores), 8); 319 - for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++) 320 - emu_cores->to_node_id[i] = NODE_ID_FREE; 321 - } 322 - 323 - /* 324 - * Move cores from physical topology into NUMA target topology 325 - * and try to keep as much of the physical topology as possible. 326 - */ 327 - static struct toptree *toptree_to_numa(struct toptree *phys) 328 - { 329 - static int first = 1; 330 - struct toptree *numa; 331 - int cores_total; 332 - 333 - cores_total = emu_cores->total + cores_free(phys); 334 - emu_cores->per_node_target = cores_total / emu_nodes; 335 - numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes); 336 - if (first) { 337 - toptree_to_numa_first(numa, phys); 338 - first = 0; 339 - } 340 - toptree_to_numa_single(numa, phys, 0); 341 - toptree_to_numa_single(numa, phys, 1); 342 - toptree_unify_tree(numa); 343 - 344 - WARN_ON(cpumask_weight(&phys->mask)); 345 - return numa; 346 - } 347 - 348 - /* 349 - * Create a toptree out of the physical topology that we got from the hypervisor 350 - */ 351 - static struct toptree *toptree_from_topology(void) 352 - { 353 - struct toptree *phys, *node, *drawer, *book, *mc, *core; 354 - struct cpu_topology_s390 *top; 355 - int cpu; 356 - 357 - phys = toptree_new(TOPTREE_ID_PHYS, 1); 358 - 359 - for_each_cpu(cpu, &cpus_with_topology) { 360 - top = &cpu_topology[cpu]; 361 - node = toptree_get_child(phys, 0); 362 - drawer = toptree_get_child(node, top->drawer_id); 363 - book = toptree_get_child(drawer, top->book_id); 364 - mc = toptree_get_child(book, top->socket_id); 365 - core = toptree_get_child(mc, smp_get_base_cpu(cpu)); 366 - if (!drawer || !book || !mc || !core) 367 - panic("NUMA emulation could not allocate memory"); 368 - cpumask_set_cpu(cpu, &core->mask); 369 - toptree_update_mask(mc); 370 - } 371 - return phys; 372 - } 373 - 374 - /* 375 - * Add toptree core to topology and create correct CPU masks 376 - */ 377 - static void topology_add_core(struct toptree *core) 378 - { 379 - struct cpu_topology_s390 *top; 380 - int cpu; 381 - 382 - for_each_cpu(cpu, &core->mask) { 383 - top = &cpu_topology[cpu]; 384 - cpumask_copy(&top->thread_mask, &core->mask); 385 - cpumask_copy(&top->core_mask, &core_mc(core)->mask); 386 - cpumask_copy(&top->book_mask, &core_book(core)->mask); 387 - cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask); 388 - cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); 389 - top->node_id = core_node(core)->id; 390 - } 391 - } 392 - 393 - /* 394 - * Apply toptree to topology and create CPU masks 395 - */ 396 - static void toptree_to_topology(struct toptree *numa) 397 - { 398 - struct toptree *core; 399 - int i; 400 - 401 - /* Clear all node masks */ 402 - for (i = 0; i < MAX_NUMNODES; i++) 403 - cpumask_clear(&node_to_cpumask_map[i]); 404 - 405 - /* Rebuild all masks */ 406 - toptree_for_each(core, numa, CORE) 407 - topology_add_core(core); 408 - } 409 - 410 - /* 411 - * Show the node to core mapping 412 - */ 413 - static void print_node_to_core_map(void) 414 - { 415 - int nid, cid; 416 - 417 - if (!numa_debug_enabled) 418 - return; 419 - printk(KERN_DEBUG "NUMA node to core mapping\n"); 420 - for (nid = 0; nid < emu_nodes; nid++) { 421 - printk(KERN_DEBUG " node %3d: ", nid); 422 - for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) { 423 - if (emu_cores->to_node_id[cid] == nid) 424 - printk(KERN_CONT "%d ", cid); 425 - } 426 - printk(KERN_CONT "\n"); 427 - } 428 - } 429 - 430 - static void pin_all_possible_cpus(void) 431 - { 432 - int core_id, node_id, cpu; 433 - static int initialized; 434 - 435 - if (initialized) 436 - return; 437 - print_node_to_core_map(); 438 - node_id = 0; 439 - for_each_possible_cpu(cpu) { 440 - core_id = smp_get_base_cpu(cpu); 441 - if (emu_cores->to_node_id[core_id] != NODE_ID_FREE) 442 - continue; 443 - pin_core_to_node(core_id, node_id); 444 - cpu_topology[cpu].node_id = node_id; 445 - node_id = (node_id + 1) % emu_nodes; 446 - } 447 - print_node_to_core_map(); 448 - initialized = 1; 449 - } 450 - 451 - /* 452 - * Transfer physical topology into a NUMA topology and modify CPU masks 453 - * according to the NUMA topology. 454 - * 455 - * Must be called with "sched_domains_mutex" lock held. 456 - */ 457 - static void emu_update_cpu_topology(void) 458 - { 459 - struct toptree *phys, *numa; 460 - 461 - if (emu_cores == NULL) 462 - create_core_to_node_map(); 463 - phys = toptree_from_topology(); 464 - numa = toptree_to_numa(phys); 465 - toptree_free(phys); 466 - toptree_to_topology(numa); 467 - toptree_free(numa); 468 - pin_all_possible_cpus(); 469 - } 470 - 471 - /* 472 - * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum 473 - * alignment (needed for memory hotplug). 474 - */ 475 - static unsigned long emu_setup_size_adjust(unsigned long size) 476 - { 477 - unsigned long size_new; 478 - 479 - size = size ? : CONFIG_EMU_SIZE; 480 - size_new = roundup(size, memory_block_size_bytes()); 481 - if (size_new == size) 482 - return size; 483 - pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n", 484 - size >> 20, size_new >> 20); 485 - return size_new; 486 - } 487 - 488 - /* 489 - * If we have not enough memory for the specified nodes, reduce the node count. 490 - */ 491 - static int emu_setup_nodes_adjust(int nodes) 492 - { 493 - int nodes_max; 494 - 495 - nodes_max = memblock.memory.total_size / emu_size; 496 - nodes_max = max(nodes_max, 1); 497 - if (nodes_max >= nodes) 498 - return nodes; 499 - pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes); 500 - return nodes_max; 501 - } 502 - 503 - /* 504 - * Early emu setup 505 - */ 506 - static void emu_setup(void) 507 - { 508 - int nid; 509 - 510 - emu_size = emu_setup_size_adjust(emu_size); 511 - emu_nodes = emu_setup_nodes_adjust(emu_nodes); 512 - for (nid = 0; nid < emu_nodes; nid++) 513 - node_set(nid, node_possible_map); 514 - pr_info("Creating %d nodes with memory stripe size %ld MB\n", 515 - emu_nodes, emu_size >> 20); 516 - } 517 - 518 - /* 519 - * Return node id for given page number 520 - */ 521 - static int emu_pfn_to_nid(unsigned long pfn) 522 - { 523 - return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes; 524 - } 525 - 526 - /* 527 - * Return stripe size 528 - */ 529 - static unsigned long emu_align(void) 530 - { 531 - return emu_size; 532 - } 533 - 534 - /* 535 - * Return distance between two nodes 536 - */ 537 - static int emu_distance(int node1, int node2) 538 - { 539 - return (node1 != node2) * EMU_NODE_DIST; 540 - } 541 - 542 - /* 543 - * Define callbacks for generic s390 NUMA infrastructure 544 - */ 545 - const struct numa_mode numa_mode_emu = { 546 - .name = "emu", 547 - .setup = emu_setup, 548 - .update_cpu_topology = emu_update_cpu_topology, 549 - .__pfn_to_nid = emu_pfn_to_nid, 550 - .align = emu_align, 551 - .distance = emu_distance, 552 - }; 553 - 554 - /* 555 - * Kernel parameter: emu_nodes=<n> 556 - */ 557 - static int __init early_parse_emu_nodes(char *p) 558 - { 559 - int count; 560 - 561 - if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0) 562 - return 0; 563 - emu_nodes = min(count, MAX_NUMNODES); 564 - return 0; 565 - } 566 - early_param("emu_nodes", early_parse_emu_nodes); 567 - 568 - /* 569 - * Kernel parameter: emu_size=[<n>[k|M|G|T]] 570 - */ 571 - static int __init early_parse_emu_size(char *p) 572 - { 573 - if (p) 574 - emu_size = memparse(p, NULL); 575 - return 0; 576 - } 577 - early_param("emu_size", early_parse_emu_size);

+9 -138

arch/s390/numa/numa.c

··· 7 7 * Copyright IBM Corp. 2015 8 8 */ 9 9 10 - #define KMSG_COMPONENT "numa" 11 - #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 12 - 13 10 #include <linux/kernel.h> 14 11 #include <linux/mmzone.h> 15 12 #include <linux/cpumask.h> 16 13 #include <linux/memblock.h> 17 - #include <linux/slab.h> 18 14 #include <linux/node.h> 19 - 20 15 #include <asm/numa.h> 21 - #include "numa_mode.h" 22 16 23 - pg_data_t *node_data[MAX_NUMNODES]; 17 + struct pglist_data *node_data[MAX_NUMNODES]; 24 18 EXPORT_SYMBOL(node_data); 25 19 26 - cpumask_t node_to_cpumask_map[MAX_NUMNODES]; 27 - EXPORT_SYMBOL(node_to_cpumask_map); 28 - 29 - static void plain_setup(void) 20 + void __init numa_setup(void) 30 21 { 22 + int nid; 23 + 24 + nodes_clear(node_possible_map); 31 25 node_set(0, node_possible_map); 32 - } 33 - 34 - const struct numa_mode numa_mode_plain = { 35 - .name = "plain", 36 - .setup = plain_setup, 37 - }; 38 - 39 - static const struct numa_mode *mode = &numa_mode_plain; 40 - 41 - int numa_pfn_to_nid(unsigned long pfn) 42 - { 43 - return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0; 44 - } 45 - 46 - void numa_update_cpu_topology(void) 47 - { 48 - if (mode->update_cpu_topology) 49 - mode->update_cpu_topology(); 50 - } 51 - 52 - int __node_distance(int a, int b) 53 - { 54 - return mode->distance ? mode->distance(a, b) : 0; 55 - } 56 - EXPORT_SYMBOL(__node_distance); 57 - 58 - int numa_debug_enabled; 59 - 60 - /* 61 - * numa_setup_memory() - Assign bootmem to nodes 62 - * 63 - * The memory is first added to memblock without any respect to nodes. 64 - * This is fixed before remaining memblock memory is handed over to the 65 - * buddy allocator. 66 - * An important side effect is that large bootmem allocations might easily 67 - * cross node boundaries, which can be needed for large allocations with 68 - * smaller memory stripes in each node (i.e. when using NUMA emulation). 69 - * 70 - * Memory defines nodes: 71 - * Therefore this routine also sets the nodes online with memory. 72 - */ 73 - static void __init numa_setup_memory(void) 74 - { 75 - unsigned long cur_base, align, end_of_dram; 76 - int nid = 0; 77 - 78 - end_of_dram = memblock_end_of_DRAM(); 79 - align = mode->align ? mode->align() : ULONG_MAX; 80 - 81 - /* 82 - * Step through all available memory and assign it to the nodes 83 - * indicated by the mode implementation. 84 - * All nodes which are seen here will be set online. 85 - */ 86 - cur_base = 0; 87 - do { 88 - nid = numa_pfn_to_nid(PFN_DOWN(cur_base)); 89 - node_set_online(nid); 90 - memblock_set_node(cur_base, align, &memblock.memory, nid); 91 - cur_base += align; 92 - } while (cur_base < end_of_dram); 93 - 94 - /* Allocate and fill out node_data */ 26 + node_set_online(0); 95 27 for (nid = 0; nid < MAX_NUMNODES; nid++) { 96 28 NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); 97 29 if (!NODE_DATA(nid)) 98 30 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 99 31 __func__, sizeof(pg_data_t), 8); 100 32 } 101 - 102 - for_each_online_node(nid) { 103 - unsigned long start_pfn, end_pfn; 104 - unsigned long t_start, t_end; 105 - int i; 106 - 107 - start_pfn = ULONG_MAX; 108 - end_pfn = 0; 109 - for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) { 110 - if (t_start < start_pfn) 111 - start_pfn = t_start; 112 - if (t_end > end_pfn) 113 - end_pfn = t_end; 114 - } 115 - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 116 - NODE_DATA(nid)->node_id = nid; 117 - } 33 + NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; 34 + NODE_DATA(0)->node_id = 0; 118 35 } 119 36 120 - /* 121 - * numa_setup() - Earliest initialization 122 - * 123 - * Assign the mode and call the mode's setup routine. 124 - */ 125 - void __init numa_setup(void) 126 - { 127 - pr_info("NUMA mode: %s\n", mode->name); 128 - nodes_clear(node_possible_map); 129 - /* Initially attach all possible CPUs to node 0. */ 130 - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); 131 - if (mode->setup) 132 - mode->setup(); 133 - numa_setup_memory(); 134 - memblock_dump_all(); 135 - } 136 - 137 - /* 138 - * numa_init_late() - Initialization initcall 139 - * 140 - * Register NUMA nodes. 141 - */ 142 37 static int __init numa_init_late(void) 143 38 { 144 - int nid; 145 - 146 - for_each_online_node(nid) 147 - register_one_node(nid); 39 + register_one_node(0); 148 40 return 0; 149 41 } 150 42 arch_initcall(numa_init_late); 151 - 152 - static int __init parse_debug(char *parm) 153 - { 154 - numa_debug_enabled = 1; 155 - return 0; 156 - } 157 - early_param("numa_debug", parse_debug); 158 - 159 - static int __init parse_numa(char *parm) 160 - { 161 - if (!parm) 162 - return 1; 163 - if (strcmp(parm, numa_mode_plain.name) == 0) 164 - mode = &numa_mode_plain; 165 - #ifdef CONFIG_NUMA_EMU 166 - if (strcmp(parm, numa_mode_emu.name) == 0) 167 - mode = &numa_mode_emu; 168 - #endif 169 - return 0; 170 - } 171 - early_param("numa", parse_numa);

-25

arch/s390/numa/numa_mode.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * NUMA support for s390 4 - * 5 - * Define declarations used for communication between NUMA mode 6 - * implementations and NUMA core functionality. 7 - * 8 - * Copyright IBM Corp. 2015 9 - */ 10 - #ifndef __S390_NUMA_MODE_H 11 - #define __S390_NUMA_MODE_H 12 - 13 - struct numa_mode { 14 - char *name; /* Name of mode */ 15 - void (*setup)(void); /* Initizalize mode */ 16 - void (*update_cpu_topology)(void); /* Called by topology code */ 17 - int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */ 18 - unsigned long (*align)(void); /* Minimum node alignment */ 19 - int (*distance)(int a, int b); /* Distance between two nodes */ 20 - }; 21 - 22 - extern const struct numa_mode numa_mode_plain; 23 - extern const struct numa_mode numa_mode_emu; 24 - 25 - #endif /* __S390_NUMA_MODE_H */

-351

arch/s390/numa/toptree.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * NUMA support for s390 4 - * 5 - * A tree structure used for machine topology mangling 6 - * 7 - * Copyright IBM Corp. 2015 8 - */ 9 - 10 - #include <linux/kernel.h> 11 - #include <linux/memblock.h> 12 - #include <linux/cpumask.h> 13 - #include <linux/list.h> 14 - #include <linux/list_sort.h> 15 - #include <linux/slab.h> 16 - #include <asm/numa.h> 17 - 18 - #include "toptree.h" 19 - 20 - /** 21 - * toptree_alloc - Allocate and initialize a new tree node. 22 - * @level: The node's vertical level; level 0 contains the leaves. 23 - * @id: ID number, explicitly not unique beyond scope of node's siblings 24 - * 25 - * Allocate a new tree node and initialize it. 26 - * 27 - * RETURNS: 28 - * Pointer to the new tree node or NULL on error 29 - */ 30 - struct toptree __ref *toptree_alloc(int level, int id) 31 - { 32 - struct toptree *res; 33 - 34 - if (slab_is_available()) 35 - res = kzalloc(sizeof(*res), GFP_KERNEL); 36 - else 37 - res = memblock_alloc(sizeof(*res), 8); 38 - if (!res) 39 - return res; 40 - 41 - INIT_LIST_HEAD(&res->children); 42 - INIT_LIST_HEAD(&res->sibling); 43 - cpumask_clear(&res->mask); 44 - res->level = level; 45 - res->id = id; 46 - return res; 47 - } 48 - 49 - /** 50 - * toptree_remove - Remove a tree node from a tree 51 - * @cand: Pointer to the node to remove 52 - * 53 - * The node is detached from its parent node. The parent node's 54 - * masks will be updated to reflect the loss of the child. 55 - */ 56 - static void toptree_remove(struct toptree *cand) 57 - { 58 - struct toptree *oldparent; 59 - 60 - list_del_init(&cand->sibling); 61 - oldparent = cand->parent; 62 - cand->parent = NULL; 63 - toptree_update_mask(oldparent); 64 - } 65 - 66 - /** 67 - * toptree_free - discard a tree node 68 - * @cand: Pointer to the tree node to discard 69 - * 70 - * Checks if @cand is attached to a parent node. Detaches it 71 - * cleanly using toptree_remove. Possible children are freed 72 - * recursively. In the end @cand itself is freed. 73 - */ 74 - void __ref toptree_free(struct toptree *cand) 75 - { 76 - struct toptree *child, *tmp; 77 - 78 - if (cand->parent) 79 - toptree_remove(cand); 80 - toptree_for_each_child_safe(child, tmp, cand) 81 - toptree_free(child); 82 - if (slab_is_available()) 83 - kfree(cand); 84 - else 85 - memblock_free_early((unsigned long)cand, sizeof(*cand)); 86 - } 87 - 88 - /** 89 - * toptree_update_mask - Update node bitmasks 90 - * @cand: Pointer to a tree node 91 - * 92 - * The node's cpumask will be updated by combining all children's 93 - * masks. Then toptree_update_mask is called recursively for the 94 - * parent if applicable. 95 - * 96 - * NOTE: 97 - * This must not be called on leaves. If called on a leaf, its 98 - * CPU mask is cleared and lost. 99 - */ 100 - void toptree_update_mask(struct toptree *cand) 101 - { 102 - struct toptree *child; 103 - 104 - cpumask_clear(&cand->mask); 105 - list_for_each_entry(child, &cand->children, sibling) 106 - cpumask_or(&cand->mask, &cand->mask, &child->mask); 107 - if (cand->parent) 108 - toptree_update_mask(cand->parent); 109 - } 110 - 111 - /** 112 - * toptree_insert - Insert a tree node into tree 113 - * @cand: Pointer to the node to insert 114 - * @target: Pointer to the node to which @cand will added as a child 115 - * 116 - * Insert a tree node into a tree. Masks will be updated automatically. 117 - * 118 - * RETURNS: 119 - * 0 on success, -1 if NULL is passed as argument or the node levels 120 - * don't fit. 121 - */ 122 - static int toptree_insert(struct toptree *cand, struct toptree *target) 123 - { 124 - if (!cand || !target) 125 - return -1; 126 - if (target->level != (cand->level + 1)) 127 - return -1; 128 - list_add_tail(&cand->sibling, &target->children); 129 - cand->parent = target; 130 - toptree_update_mask(target); 131 - return 0; 132 - } 133 - 134 - /** 135 - * toptree_move_children - Move all child nodes of a node to a new place 136 - * @cand: Pointer to the node whose children are to be moved 137 - * @target: Pointer to the node to which @cand's children will be attached 138 - * 139 - * Take all child nodes of @cand and move them using toptree_move. 140 - */ 141 - static void toptree_move_children(struct toptree *cand, struct toptree *target) 142 - { 143 - struct toptree *child, *tmp; 144 - 145 - toptree_for_each_child_safe(child, tmp, cand) 146 - toptree_move(child, target); 147 - } 148 - 149 - /** 150 - * toptree_unify - Merge children with same ID 151 - * @cand: Pointer to node whose direct children should be made unique 152 - * 153 - * When mangling the tree it is possible that a node has two or more children 154 - * which have the same ID. This routine merges these children into one and 155 - * moves all children of the merged nodes into the unified node. 156 - */ 157 - void toptree_unify(struct toptree *cand) 158 - { 159 - struct toptree *child, *tmp, *cand_copy; 160 - 161 - /* Threads cannot be split, cores are not split */ 162 - if (cand->level < 2) 163 - return; 164 - 165 - cand_copy = toptree_alloc(cand->level, 0); 166 - toptree_for_each_child_safe(child, tmp, cand) { 167 - struct toptree *tmpchild; 168 - 169 - if (!cpumask_empty(&child->mask)) { 170 - tmpchild = toptree_get_child(cand_copy, child->id); 171 - toptree_move_children(child, tmpchild); 172 - } 173 - toptree_free(child); 174 - } 175 - toptree_move_children(cand_copy, cand); 176 - toptree_free(cand_copy); 177 - 178 - toptree_for_each_child(child, cand) 179 - toptree_unify(child); 180 - } 181 - 182 - /** 183 - * toptree_move - Move a node to another context 184 - * @cand: Pointer to the node to move 185 - * @target: Pointer to the node where @cand should go 186 - * 187 - * In the easiest case @cand is exactly on the level below @target 188 - * and will be immediately moved to the target. 189 - * 190 - * If @target's level is not the direct parent level of @cand, 191 - * nodes for the missing levels are created and put between 192 - * @cand and @target. The "stacking" nodes' IDs are taken from 193 - * @cand's parents. 194 - * 195 - * After this it is likely to have redundant nodes in the tree 196 - * which are addressed by means of toptree_unify. 197 - */ 198 - void toptree_move(struct toptree *cand, struct toptree *target) 199 - { 200 - struct toptree *stack_target, *real_insert_point, *ptr, *tmp; 201 - 202 - if (cand->level + 1 == target->level) { 203 - toptree_remove(cand); 204 - toptree_insert(cand, target); 205 - return; 206 - } 207 - 208 - real_insert_point = NULL; 209 - ptr = cand; 210 - stack_target = NULL; 211 - 212 - do { 213 - tmp = stack_target; 214 - stack_target = toptree_alloc(ptr->level + 1, 215 - ptr->parent->id); 216 - toptree_insert(tmp, stack_target); 217 - if (!real_insert_point) 218 - real_insert_point = stack_target; 219 - ptr = ptr->parent; 220 - } while (stack_target->level < (target->level - 1)); 221 - 222 - toptree_remove(cand); 223 - toptree_insert(cand, real_insert_point); 224 - toptree_insert(stack_target, target); 225 - } 226 - 227 - /** 228 - * toptree_get_child - Access a tree node's child by its ID 229 - * @cand: Pointer to tree node whose child is to access 230 - * @id: The desired child's ID 231 - * 232 - * @cand's children are searched for a child with matching ID. 233 - * If no match can be found, a new child with the desired ID 234 - * is created and returned. 235 - */ 236 - struct toptree *toptree_get_child(struct toptree *cand, int id) 237 - { 238 - struct toptree *child; 239 - 240 - toptree_for_each_child(child, cand) 241 - if (child->id == id) 242 - return child; 243 - child = toptree_alloc(cand->level-1, id); 244 - toptree_insert(child, cand); 245 - return child; 246 - } 247 - 248 - /** 249 - * toptree_first - Find the first descendant on specified level 250 - * @context: Pointer to tree node whose descendants are to be used 251 - * @level: The level of interest 252 - * 253 - * RETURNS: 254 - * @context's first descendant on the specified level, or NULL 255 - * if there is no matching descendant 256 - */ 257 - struct toptree *toptree_first(struct toptree *context, int level) 258 - { 259 - struct toptree *child, *tmp; 260 - 261 - if (context->level == level) 262 - return context; 263 - 264 - if (!list_empty(&context->children)) { 265 - list_for_each_entry(child, &context->children, sibling) { 266 - tmp = toptree_first(child, level); 267 - if (tmp) 268 - return tmp; 269 - } 270 - } 271 - return NULL; 272 - } 273 - 274 - /** 275 - * toptree_next_sibling - Return next sibling 276 - * @cur: Pointer to a tree node 277 - * 278 - * RETURNS: 279 - * If @cur has a parent and is not the last in the parent's children list, 280 - * the next sibling is returned. Or NULL when there are no siblings left. 281 - */ 282 - static struct toptree *toptree_next_sibling(struct toptree *cur) 283 - { 284 - if (cur->parent == NULL) 285 - return NULL; 286 - 287 - if (cur == list_last_entry(&cur->parent->children, 288 - struct toptree, sibling)) 289 - return NULL; 290 - return (struct toptree *) list_next_entry(cur, sibling); 291 - } 292 - 293 - /** 294 - * toptree_next - Tree traversal function 295 - * @cur: Pointer to current element 296 - * @context: Pointer to the root node of the tree or subtree to 297 - * be traversed. 298 - * @level: The level of interest. 299 - * 300 - * RETURNS: 301 - * Pointer to the next node on level @level 302 - * or NULL when there is no next node. 303 - */ 304 - struct toptree *toptree_next(struct toptree *cur, struct toptree *context, 305 - int level) 306 - { 307 - struct toptree *cur_context, *tmp; 308 - 309 - if (!cur) 310 - return NULL; 311 - 312 - if (context->level == level) 313 - return NULL; 314 - 315 - tmp = toptree_next_sibling(cur); 316 - if (tmp != NULL) 317 - return tmp; 318 - 319 - cur_context = cur; 320 - while (cur_context->level < context->level - 1) { 321 - /* Step up */ 322 - cur_context = cur_context->parent; 323 - /* Step aside */ 324 - tmp = toptree_next_sibling(cur_context); 325 - if (tmp != NULL) { 326 - /* Step down */ 327 - tmp = toptree_first(tmp, level); 328 - if (tmp != NULL) 329 - return tmp; 330 - } 331 - } 332 - return NULL; 333 - } 334 - 335 - /** 336 - * toptree_count - Count descendants on specified level 337 - * @context: Pointer to node whose descendants are to be considered 338 - * @level: Only descendants on the specified level will be counted 339 - * 340 - * RETURNS: 341 - * Number of descendants on the specified level 342 - */ 343 - int toptree_count(struct toptree *context, int level) 344 - { 345 - struct toptree *cur; 346 - int cnt = 0; 347 - 348 - toptree_for_each(cur, context, level) 349 - cnt++; 350 - return cnt; 351 - }

-61

arch/s390/numa/toptree.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * NUMA support for s390 4 - * 5 - * A tree structure used for machine topology mangling 6 - * 7 - * Copyright IBM Corp. 2015 8 - */ 9 - #ifndef S390_TOPTREE_H 10 - #define S390_TOPTREE_H 11 - 12 - #include <linux/cpumask.h> 13 - #include <linux/list.h> 14 - 15 - struct toptree { 16 - int level; 17 - int id; 18 - cpumask_t mask; 19 - struct toptree *parent; 20 - struct list_head sibling; 21 - struct list_head children; 22 - }; 23 - 24 - struct toptree *toptree_alloc(int level, int id); 25 - void toptree_free(struct toptree *cand); 26 - void toptree_update_mask(struct toptree *cand); 27 - void toptree_unify(struct toptree *cand); 28 - struct toptree *toptree_get_child(struct toptree *cand, int id); 29 - void toptree_move(struct toptree *cand, struct toptree *target); 30 - int toptree_count(struct toptree *context, int level); 31 - 32 - struct toptree *toptree_first(struct toptree *context, int level); 33 - struct toptree *toptree_next(struct toptree *cur, struct toptree *context, 34 - int level); 35 - 36 - #define toptree_for_each_child(child, ptree) \ 37 - list_for_each_entry(child, &ptree->children, sibling) 38 - 39 - #define toptree_for_each_child_safe(child, ptmp, ptree) \ 40 - list_for_each_entry_safe(child, ptmp, &ptree->children, sibling) 41 - 42 - #define toptree_is_last(ptree) \ 43 - ((ptree->parent == NULL) || \ 44 - (ptree->parent->children.prev == &ptree->sibling)) 45 - 46 - #define toptree_for_each(ptree, cont, ttype) \ 47 - for (ptree = toptree_first(cont, ttype); \ 48 - ptree != NULL; \ 49 - ptree = toptree_next(ptree, cont, ttype)) 50 - 51 - #define toptree_for_each_safe(ptree, tmp, cont, ttype) \ 52 - for (ptree = toptree_first(cont, ttype), \ 53 - tmp = toptree_next(ptree, cont, ttype); \ 54 - ptree != NULL; \ 55 - ptree = tmp, \ 56 - tmp = toptree_next(ptree, cont, ttype)) 57 - 58 - #define toptree_for_each_sibling(ptree, start) \ 59 - toptree_for_each(ptree, start->parent, start->level) 60 - 61 - #endif /* S390_TOPTREE_H */

+1 -1

drivers/s390/char/sclp_cmd.c

··· 406 406 if (!size) 407 407 goto skip_add; 408 408 for (addr = start; addr < start + size; addr += block_size) 409 - add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size); 409 + add_memory(0, addr, block_size); 410 410 skip_add: 411 411 first_rn = rn; 412 412 num = 1;