Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/mm: remove fake numa support

It turned out that fake numa support is rather useless on s390, since
there are no scenarios where there is any performance or other benefit
when used.

However it does provide maintenance cost and breaks from time to time.
Therefore remove it.

CONFIG_NUMA is still supported with a very small backend and only one
node. This way userspace applications which require NUMA interfaces
continue to work.

Note that NODES_SHIFT is set to 1 (= 2 nodes) instead of 0 (= 1 node),
since there is quite a bit of kernel code which assumes that more than
one node is possible if CONFIG_NUMA is enabled.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>

authored by

Heiko Carstens and committed by
Vasily Gorbik
701dc81e 4a559cd1

+20 -1231
+2 -59
arch/s390/Kconfig
··· 450 450 config HOTPLUG_CPU 451 451 def_bool y 452 452 453 - # Some NUMA nodes have memory ranges that span 454 - # other nodes. Even though a pfn is valid and 455 - # between a node's start and end pfns, it may not 456 - # reside on that node. See memmap_init_zone() 457 - # for details. <- They meant memory holes! 458 - config NODES_SPAN_OTHER_NODES 459 - def_bool NUMA 460 - 461 453 config NUMA 462 454 bool "NUMA support" 463 455 depends on SCHED_TOPOLOGY ··· 459 467 460 468 This option adds NUMA support to the kernel. 461 469 462 - An operation mode can be selected by appending 463 - numa=<method> to the kernel command line. 464 - 465 - The default behaviour is identical to appending numa=plain to 466 - the command line. This will create just one node with all 467 - available memory and all CPUs in it. 468 - 469 470 config NODES_SHIFT 470 - int "Maximum NUMA nodes (as a power of 2)" 471 - range 1 10 472 - depends on NUMA 473 - default "4" 474 - help 475 - Specify the maximum number of NUMA nodes available on the target 476 - system. Increases memory reserved to accommodate various tables. 477 - 478 - menu "Select NUMA modes" 479 - depends on NUMA 480 - 481 - config NUMA_EMU 482 - bool "NUMA emulation" 483 - default y 484 - help 485 - Numa emulation mode will split the available system memory into 486 - equal chunks which then are distributed over the configured number 487 - of nodes in a round-robin manner. 488 - 489 - The number of fake nodes is limited by the number of available memory 490 - chunks (i.e. memory size / fake size) and the number of supported 491 - nodes in the kernel. 492 - 493 - The CPUs are assigned to the nodes in a way that partially respects 494 - the original machine topology (if supported by the machine). 495 - Fair distribution of the CPUs is not guaranteed. 496 - 497 - config EMU_SIZE 498 - hex "NUMA emulation memory chunk size" 499 - default 0x10000000 500 - range 0x400000 0x100000000 501 - depends on NUMA_EMU 502 - help 503 - Select the default size by which the memory is chopped and then 504 - assigned to emulated NUMA nodes. 505 - 506 - This can be overridden by specifying 507 - 508 - emu_size=<n> 509 - 510 - on the kernel command line where also suffixes K, M, G, and T are 511 - supported. 512 - 513 - endmenu 471 + int 472 + default "1" 514 473 515 474 config SCHED_SMT 516 475 def_bool n
+1 -12
arch/s390/include/asm/numa.h
··· 13 13 #ifdef CONFIG_NUMA 14 14 15 15 #include <linux/numa.h> 16 - #include <linux/cpumask.h> 17 16 18 17 void numa_setup(void); 19 - int numa_pfn_to_nid(unsigned long pfn); 20 - int __node_distance(int a, int b); 21 - void numa_update_cpu_topology(void); 22 - 23 - extern cpumask_t node_to_cpumask_map[MAX_NUMNODES]; 24 - extern int numa_debug_enabled; 25 18 26 19 #else 27 20 28 21 static inline void numa_setup(void) { } 29 - static inline void numa_update_cpu_topology(void) { } 30 - static inline int numa_pfn_to_nid(unsigned long pfn) 31 - { 32 - return 0; 33 - } 34 22 35 23 #endif /* CONFIG_NUMA */ 24 + 36 25 #endif /* _ASM_S390_NUMA_H */
+6 -3
arch/s390/include/asm/topology.h
··· 16 16 unsigned short socket_id; 17 17 unsigned short book_id; 18 18 unsigned short drawer_id; 19 - unsigned short node_id; 20 19 unsigned short dedicated : 1; 21 20 cpumask_t thread_mask; 22 21 cpumask_t core_mask; ··· 70 71 #define cpu_to_node cpu_to_node 71 72 static inline int cpu_to_node(int cpu) 72 73 { 73 - return cpu_topology[cpu].node_id; 74 + return 0; 74 75 } 75 76 76 77 /* Returns a pointer to the cpumask of CPUs on node 'node'. */ 77 78 #define cpumask_of_node cpumask_of_node 78 79 static inline const struct cpumask *cpumask_of_node(int node) 79 80 { 80 - return &node_to_cpumask_map[node]; 81 + return cpu_possible_mask; 81 82 } 82 83 83 84 #define pcibus_to_node(bus) __pcibus_to_node(bus) 84 85 85 86 #define node_distance(a, b) __node_distance(a, b) 87 + static inline int __node_distance(int a, int b) 88 + { 89 + return 0; 90 + } 86 91 87 92 #else /* !CONFIG_NUMA */ 88 93
+1
arch/s390/kernel/setup.c
··· 790 790 memblock_physmem_add(start, end - start); 791 791 } 792 792 memblock_set_bottom_up(false); 793 + memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); 793 794 memblock_dump_all(); 794 795 } 795 796
-2
arch/s390/kernel/topology.c
··· 26 26 #include <linux/nodemask.h> 27 27 #include <linux/node.h> 28 28 #include <asm/sysinfo.h> 29 - #include <asm/numa.h> 30 29 31 30 #define PTF_HORIZONTAL (0UL) 32 31 #define PTF_VERTICAL (1UL) ··· 266 267 cpumask_set_cpu(cpu, &cpus_with_topology); 267 268 } 268 269 } 269 - numa_update_cpu_topology(); 270 270 } 271 271 272 272 void store_topology(struct sysinfo_15_1_x *info)
-2
arch/s390/numa/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-y += numa.o 3 - obj-y += toptree.o 4 - obj-$(CONFIG_NUMA_EMU) += mode_emu.o
-577
arch/s390/numa/mode_emu.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * NUMA support for s390 4 - * 5 - * NUMA emulation (aka fake NUMA) distributes the available memory to nodes 6 - * without using real topology information about the physical memory of the 7 - * machine. 8 - * 9 - * It distributes the available CPUs to nodes while respecting the original 10 - * machine topology information. This is done by trying to avoid to separate 11 - * CPUs which reside on the same book or even on the same MC. 12 - * 13 - * Because the current Linux scheduler code requires a stable cpu to node 14 - * mapping, cores are pinned to nodes when the first CPU thread is set online. 15 - * 16 - * Copyright IBM Corp. 2015 17 - */ 18 - 19 - #define KMSG_COMPONENT "numa_emu" 20 - #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 - 22 - #include <linux/kernel.h> 23 - #include <linux/cpumask.h> 24 - #include <linux/memblock.h> 25 - #include <linux/node.h> 26 - #include <linux/memory.h> 27 - #include <linux/slab.h> 28 - #include <asm/smp.h> 29 - #include <asm/topology.h> 30 - #include "numa_mode.h" 31 - #include "toptree.h" 32 - 33 - /* Distances between the different system components */ 34 - #define DIST_EMPTY 0 35 - #define DIST_CORE 1 36 - #define DIST_MC 2 37 - #define DIST_BOOK 3 38 - #define DIST_DRAWER 4 39 - #define DIST_MAX 5 40 - 41 - /* Node distance reported to common code */ 42 - #define EMU_NODE_DIST 10 43 - 44 - /* Node ID for free (not yet pinned) cores */ 45 - #define NODE_ID_FREE -1 46 - 47 - /* Different levels of toptree */ 48 - enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY}; 49 - 50 - /* The two toptree IDs */ 51 - enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA}; 52 - 53 - /* Number of NUMA nodes */ 54 - static int emu_nodes = 1; 55 - /* NUMA stripe size */ 56 - static unsigned long emu_size; 57 - 58 - /* 59 - * Node to core pinning information updates are protected by 60 - * "sched_domains_mutex". 61 - */ 62 - static struct { 63 - s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */ 64 - int total; /* Total number of pinned cores */ 65 - int per_node_target; /* Cores per node without extra cores */ 66 - int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */ 67 - } *emu_cores; 68 - 69 - /* 70 - * Pin a core to a node 71 - */ 72 - static void pin_core_to_node(int core_id, int node_id) 73 - { 74 - if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) { 75 - emu_cores->per_node[node_id]++; 76 - emu_cores->to_node_id[core_id] = node_id; 77 - emu_cores->total++; 78 - } else { 79 - WARN_ON(emu_cores->to_node_id[core_id] != node_id); 80 - } 81 - } 82 - 83 - /* 84 - * Number of pinned cores of a node 85 - */ 86 - static int cores_pinned(struct toptree *node) 87 - { 88 - return emu_cores->per_node[node->id]; 89 - } 90 - 91 - /* 92 - * ID of the node where the core is pinned (or NODE_ID_FREE) 93 - */ 94 - static int core_pinned_to_node_id(struct toptree *core) 95 - { 96 - return emu_cores->to_node_id[core->id]; 97 - } 98 - 99 - /* 100 - * Number of cores in the tree that are not yet pinned 101 - */ 102 - static int cores_free(struct toptree *tree) 103 - { 104 - struct toptree *core; 105 - int count = 0; 106 - 107 - toptree_for_each(core, tree, CORE) { 108 - if (core_pinned_to_node_id(core) == NODE_ID_FREE) 109 - count++; 110 - } 111 - return count; 112 - } 113 - 114 - /* 115 - * Return node of core 116 - */ 117 - static struct toptree *core_node(struct toptree *core) 118 - { 119 - return core->parent->parent->parent->parent; 120 - } 121 - 122 - /* 123 - * Return drawer of core 124 - */ 125 - static struct toptree *core_drawer(struct toptree *core) 126 - { 127 - return core->parent->parent->parent; 128 - } 129 - 130 - /* 131 - * Return book of core 132 - */ 133 - static struct toptree *core_book(struct toptree *core) 134 - { 135 - return core->parent->parent; 136 - } 137 - 138 - /* 139 - * Return mc of core 140 - */ 141 - static struct toptree *core_mc(struct toptree *core) 142 - { 143 - return core->parent; 144 - } 145 - 146 - /* 147 - * Distance between two cores 148 - */ 149 - static int dist_core_to_core(struct toptree *core1, struct toptree *core2) 150 - { 151 - if (core_drawer(core1)->id != core_drawer(core2)->id) 152 - return DIST_DRAWER; 153 - if (core_book(core1)->id != core_book(core2)->id) 154 - return DIST_BOOK; 155 - if (core_mc(core1)->id != core_mc(core2)->id) 156 - return DIST_MC; 157 - /* Same core or sibling on same MC */ 158 - return DIST_CORE; 159 - } 160 - 161 - /* 162 - * Distance of a node to a core 163 - */ 164 - static int dist_node_to_core(struct toptree *node, struct toptree *core) 165 - { 166 - struct toptree *core_node; 167 - int dist_min = DIST_MAX; 168 - 169 - toptree_for_each(core_node, node, CORE) 170 - dist_min = min(dist_min, dist_core_to_core(core_node, core)); 171 - return dist_min == DIST_MAX ? DIST_EMPTY : dist_min; 172 - } 173 - 174 - /* 175 - * Unify will delete empty nodes, therefore recreate nodes. 176 - */ 177 - static void toptree_unify_tree(struct toptree *tree) 178 - { 179 - int nid; 180 - 181 - toptree_unify(tree); 182 - for (nid = 0; nid < emu_nodes; nid++) 183 - toptree_get_child(tree, nid); 184 - } 185 - 186 - /* 187 - * Find the best/nearest node for a given core and ensure that no node 188 - * gets more than "emu_cores->per_node_target + extra" cores. 189 - */ 190 - static struct toptree *node_for_core(struct toptree *numa, struct toptree *core, 191 - int extra) 192 - { 193 - struct toptree *node, *node_best = NULL; 194 - int dist_cur, dist_best, cores_target; 195 - 196 - cores_target = emu_cores->per_node_target + extra; 197 - dist_best = DIST_MAX; 198 - node_best = NULL; 199 - toptree_for_each(node, numa, NODE) { 200 - /* Already pinned cores must use their nodes */ 201 - if (core_pinned_to_node_id(core) == node->id) { 202 - node_best = node; 203 - break; 204 - } 205 - /* Skip nodes that already have enough cores */ 206 - if (cores_pinned(node) >= cores_target) 207 - continue; 208 - dist_cur = dist_node_to_core(node, core); 209 - if (dist_cur < dist_best) { 210 - dist_best = dist_cur; 211 - node_best = node; 212 - } 213 - } 214 - return node_best; 215 - } 216 - 217 - /* 218 - * Find the best node for each core with respect to "extra" core count 219 - */ 220 - static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys, 221 - int extra) 222 - { 223 - struct toptree *node, *core, *tmp; 224 - 225 - toptree_for_each_safe(core, tmp, phys, CORE) { 226 - node = node_for_core(numa, core, extra); 227 - if (!node) 228 - return; 229 - toptree_move(core, node); 230 - pin_core_to_node(core->id, node->id); 231 - } 232 - } 233 - 234 - /* 235 - * Move structures of given level to specified NUMA node 236 - */ 237 - static void move_level_to_numa_node(struct toptree *node, struct toptree *phys, 238 - enum toptree_level level, bool perfect) 239 - { 240 - int cores_free, cores_target = emu_cores->per_node_target; 241 - struct toptree *cur, *tmp; 242 - 243 - toptree_for_each_safe(cur, tmp, phys, level) { 244 - cores_free = cores_target - toptree_count(node, CORE); 245 - if (perfect) { 246 - if (cores_free == toptree_count(cur, CORE)) 247 - toptree_move(cur, node); 248 - } else { 249 - if (cores_free >= toptree_count(cur, CORE)) 250 - toptree_move(cur, node); 251 - } 252 - } 253 - } 254 - 255 - /* 256 - * Move structures of a given level to NUMA nodes. If "perfect" is specified 257 - * move only perfectly fitting structures. Otherwise move also smaller 258 - * than needed structures. 259 - */ 260 - static void move_level_to_numa(struct toptree *numa, struct toptree *phys, 261 - enum toptree_level level, bool perfect) 262 - { 263 - struct toptree *node; 264 - 265 - toptree_for_each(node, numa, NODE) 266 - move_level_to_numa_node(node, phys, level, perfect); 267 - } 268 - 269 - /* 270 - * For the first run try to move the big structures 271 - */ 272 - static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys) 273 - { 274 - struct toptree *core; 275 - 276 - /* Always try to move perfectly fitting structures first */ 277 - move_level_to_numa(numa, phys, DRAWER, true); 278 - move_level_to_numa(numa, phys, DRAWER, false); 279 - move_level_to_numa(numa, phys, BOOK, true); 280 - move_level_to_numa(numa, phys, BOOK, false); 281 - move_level_to_numa(numa, phys, MC, true); 282 - move_level_to_numa(numa, phys, MC, false); 283 - /* Now pin all the moved cores */ 284 - toptree_for_each(core, numa, CORE) 285 - pin_core_to_node(core->id, core_node(core)->id); 286 - } 287 - 288 - /* 289 - * Allocate new topology and create required nodes 290 - */ 291 - static struct toptree *toptree_new(int id, int nodes) 292 - { 293 - struct toptree *tree; 294 - int nid; 295 - 296 - tree = toptree_alloc(TOPOLOGY, id); 297 - if (!tree) 298 - goto fail; 299 - for (nid = 0; nid < nodes; nid++) { 300 - if (!toptree_get_child(tree, nid)) 301 - goto fail; 302 - } 303 - return tree; 304 - fail: 305 - panic("NUMA emulation could not allocate topology"); 306 - } 307 - 308 - /* 309 - * Allocate and initialize core to node mapping 310 - */ 311 - static void __ref create_core_to_node_map(void) 312 - { 313 - int i; 314 - 315 - emu_cores = memblock_alloc(sizeof(*emu_cores), 8); 316 - if (!emu_cores) 317 - panic("%s: Failed to allocate %zu bytes align=0x%x\n", 318 - __func__, sizeof(*emu_cores), 8); 319 - for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++) 320 - emu_cores->to_node_id[i] = NODE_ID_FREE; 321 - } 322 - 323 - /* 324 - * Move cores from physical topology into NUMA target topology 325 - * and try to keep as much of the physical topology as possible. 326 - */ 327 - static struct toptree *toptree_to_numa(struct toptree *phys) 328 - { 329 - static int first = 1; 330 - struct toptree *numa; 331 - int cores_total; 332 - 333 - cores_total = emu_cores->total + cores_free(phys); 334 - emu_cores->per_node_target = cores_total / emu_nodes; 335 - numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes); 336 - if (first) { 337 - toptree_to_numa_first(numa, phys); 338 - first = 0; 339 - } 340 - toptree_to_numa_single(numa, phys, 0); 341 - toptree_to_numa_single(numa, phys, 1); 342 - toptree_unify_tree(numa); 343 - 344 - WARN_ON(cpumask_weight(&phys->mask)); 345 - return numa; 346 - } 347 - 348 - /* 349 - * Create a toptree out of the physical topology that we got from the hypervisor 350 - */ 351 - static struct toptree *toptree_from_topology(void) 352 - { 353 - struct toptree *phys, *node, *drawer, *book, *mc, *core; 354 - struct cpu_topology_s390 *top; 355 - int cpu; 356 - 357 - phys = toptree_new(TOPTREE_ID_PHYS, 1); 358 - 359 - for_each_cpu(cpu, &cpus_with_topology) { 360 - top = &cpu_topology[cpu]; 361 - node = toptree_get_child(phys, 0); 362 - drawer = toptree_get_child(node, top->drawer_id); 363 - book = toptree_get_child(drawer, top->book_id); 364 - mc = toptree_get_child(book, top->socket_id); 365 - core = toptree_get_child(mc, smp_get_base_cpu(cpu)); 366 - if (!drawer || !book || !mc || !core) 367 - panic("NUMA emulation could not allocate memory"); 368 - cpumask_set_cpu(cpu, &core->mask); 369 - toptree_update_mask(mc); 370 - } 371 - return phys; 372 - } 373 - 374 - /* 375 - * Add toptree core to topology and create correct CPU masks 376 - */ 377 - static void topology_add_core(struct toptree *core) 378 - { 379 - struct cpu_topology_s390 *top; 380 - int cpu; 381 - 382 - for_each_cpu(cpu, &core->mask) { 383 - top = &cpu_topology[cpu]; 384 - cpumask_copy(&top->thread_mask, &core->mask); 385 - cpumask_copy(&top->core_mask, &core_mc(core)->mask); 386 - cpumask_copy(&top->book_mask, &core_book(core)->mask); 387 - cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask); 388 - cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); 389 - top->node_id = core_node(core)->id; 390 - } 391 - } 392 - 393 - /* 394 - * Apply toptree to topology and create CPU masks 395 - */ 396 - static void toptree_to_topology(struct toptree *numa) 397 - { 398 - struct toptree *core; 399 - int i; 400 - 401 - /* Clear all node masks */ 402 - for (i = 0; i < MAX_NUMNODES; i++) 403 - cpumask_clear(&node_to_cpumask_map[i]); 404 - 405 - /* Rebuild all masks */ 406 - toptree_for_each(core, numa, CORE) 407 - topology_add_core(core); 408 - } 409 - 410 - /* 411 - * Show the node to core mapping 412 - */ 413 - static void print_node_to_core_map(void) 414 - { 415 - int nid, cid; 416 - 417 - if (!numa_debug_enabled) 418 - return; 419 - printk(KERN_DEBUG "NUMA node to core mapping\n"); 420 - for (nid = 0; nid < emu_nodes; nid++) { 421 - printk(KERN_DEBUG " node %3d: ", nid); 422 - for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) { 423 - if (emu_cores->to_node_id[cid] == nid) 424 - printk(KERN_CONT "%d ", cid); 425 - } 426 - printk(KERN_CONT "\n"); 427 - } 428 - } 429 - 430 - static void pin_all_possible_cpus(void) 431 - { 432 - int core_id, node_id, cpu; 433 - static int initialized; 434 - 435 - if (initialized) 436 - return; 437 - print_node_to_core_map(); 438 - node_id = 0; 439 - for_each_possible_cpu(cpu) { 440 - core_id = smp_get_base_cpu(cpu); 441 - if (emu_cores->to_node_id[core_id] != NODE_ID_FREE) 442 - continue; 443 - pin_core_to_node(core_id, node_id); 444 - cpu_topology[cpu].node_id = node_id; 445 - node_id = (node_id + 1) % emu_nodes; 446 - } 447 - print_node_to_core_map(); 448 - initialized = 1; 449 - } 450 - 451 - /* 452 - * Transfer physical topology into a NUMA topology and modify CPU masks 453 - * according to the NUMA topology. 454 - * 455 - * Must be called with "sched_domains_mutex" lock held. 456 - */ 457 - static void emu_update_cpu_topology(void) 458 - { 459 - struct toptree *phys, *numa; 460 - 461 - if (emu_cores == NULL) 462 - create_core_to_node_map(); 463 - phys = toptree_from_topology(); 464 - numa = toptree_to_numa(phys); 465 - toptree_free(phys); 466 - toptree_to_topology(numa); 467 - toptree_free(numa); 468 - pin_all_possible_cpus(); 469 - } 470 - 471 - /* 472 - * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum 473 - * alignment (needed for memory hotplug). 474 - */ 475 - static unsigned long emu_setup_size_adjust(unsigned long size) 476 - { 477 - unsigned long size_new; 478 - 479 - size = size ? : CONFIG_EMU_SIZE; 480 - size_new = roundup(size, memory_block_size_bytes()); 481 - if (size_new == size) 482 - return size; 483 - pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n", 484 - size >> 20, size_new >> 20); 485 - return size_new; 486 - } 487 - 488 - /* 489 - * If we have not enough memory for the specified nodes, reduce the node count. 490 - */ 491 - static int emu_setup_nodes_adjust(int nodes) 492 - { 493 - int nodes_max; 494 - 495 - nodes_max = memblock.memory.total_size / emu_size; 496 - nodes_max = max(nodes_max, 1); 497 - if (nodes_max >= nodes) 498 - return nodes; 499 - pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes); 500 - return nodes_max; 501 - } 502 - 503 - /* 504 - * Early emu setup 505 - */ 506 - static void emu_setup(void) 507 - { 508 - int nid; 509 - 510 - emu_size = emu_setup_size_adjust(emu_size); 511 - emu_nodes = emu_setup_nodes_adjust(emu_nodes); 512 - for (nid = 0; nid < emu_nodes; nid++) 513 - node_set(nid, node_possible_map); 514 - pr_info("Creating %d nodes with memory stripe size %ld MB\n", 515 - emu_nodes, emu_size >> 20); 516 - } 517 - 518 - /* 519 - * Return node id for given page number 520 - */ 521 - static int emu_pfn_to_nid(unsigned long pfn) 522 - { 523 - return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes; 524 - } 525 - 526 - /* 527 - * Return stripe size 528 - */ 529 - static unsigned long emu_align(void) 530 - { 531 - return emu_size; 532 - } 533 - 534 - /* 535 - * Return distance between two nodes 536 - */ 537 - static int emu_distance(int node1, int node2) 538 - { 539 - return (node1 != node2) * EMU_NODE_DIST; 540 - } 541 - 542 - /* 543 - * Define callbacks for generic s390 NUMA infrastructure 544 - */ 545 - const struct numa_mode numa_mode_emu = { 546 - .name = "emu", 547 - .setup = emu_setup, 548 - .update_cpu_topology = emu_update_cpu_topology, 549 - .__pfn_to_nid = emu_pfn_to_nid, 550 - .align = emu_align, 551 - .distance = emu_distance, 552 - }; 553 - 554 - /* 555 - * Kernel parameter: emu_nodes=<n> 556 - */ 557 - static int __init early_parse_emu_nodes(char *p) 558 - { 559 - int count; 560 - 561 - if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0) 562 - return 0; 563 - emu_nodes = min(count, MAX_NUMNODES); 564 - return 0; 565 - } 566 - early_param("emu_nodes", early_parse_emu_nodes); 567 - 568 - /* 569 - * Kernel parameter: emu_size=[<n>[k|M|G|T]] 570 - */ 571 - static int __init early_parse_emu_size(char *p) 572 - { 573 - if (p) 574 - emu_size = memparse(p, NULL); 575 - return 0; 576 - } 577 - early_param("emu_size", early_parse_emu_size);
+9 -138
arch/s390/numa/numa.c
··· 7 7 * Copyright IBM Corp. 2015 8 8 */ 9 9 10 - #define KMSG_COMPONENT "numa" 11 - #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 12 - 13 10 #include <linux/kernel.h> 14 11 #include <linux/mmzone.h> 15 12 #include <linux/cpumask.h> 16 13 #include <linux/memblock.h> 17 - #include <linux/slab.h> 18 14 #include <linux/node.h> 19 - 20 15 #include <asm/numa.h> 21 - #include "numa_mode.h" 22 16 23 - pg_data_t *node_data[MAX_NUMNODES]; 17 + struct pglist_data *node_data[MAX_NUMNODES]; 24 18 EXPORT_SYMBOL(node_data); 25 19 26 - cpumask_t node_to_cpumask_map[MAX_NUMNODES]; 27 - EXPORT_SYMBOL(node_to_cpumask_map); 28 - 29 - static void plain_setup(void) 20 + void __init numa_setup(void) 30 21 { 22 + int nid; 23 + 24 + nodes_clear(node_possible_map); 31 25 node_set(0, node_possible_map); 32 - } 33 - 34 - const struct numa_mode numa_mode_plain = { 35 - .name = "plain", 36 - .setup = plain_setup, 37 - }; 38 - 39 - static const struct numa_mode *mode = &numa_mode_plain; 40 - 41 - int numa_pfn_to_nid(unsigned long pfn) 42 - { 43 - return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0; 44 - } 45 - 46 - void numa_update_cpu_topology(void) 47 - { 48 - if (mode->update_cpu_topology) 49 - mode->update_cpu_topology(); 50 - } 51 - 52 - int __node_distance(int a, int b) 53 - { 54 - return mode->distance ? mode->distance(a, b) : 0; 55 - } 56 - EXPORT_SYMBOL(__node_distance); 57 - 58 - int numa_debug_enabled; 59 - 60 - /* 61 - * numa_setup_memory() - Assign bootmem to nodes 62 - * 63 - * The memory is first added to memblock without any respect to nodes. 64 - * This is fixed before remaining memblock memory is handed over to the 65 - * buddy allocator. 66 - * An important side effect is that large bootmem allocations might easily 67 - * cross node boundaries, which can be needed for large allocations with 68 - * smaller memory stripes in each node (i.e. when using NUMA emulation). 69 - * 70 - * Memory defines nodes: 71 - * Therefore this routine also sets the nodes online with memory. 72 - */ 73 - static void __init numa_setup_memory(void) 74 - { 75 - unsigned long cur_base, align, end_of_dram; 76 - int nid = 0; 77 - 78 - end_of_dram = memblock_end_of_DRAM(); 79 - align = mode->align ? mode->align() : ULONG_MAX; 80 - 81 - /* 82 - * Step through all available memory and assign it to the nodes 83 - * indicated by the mode implementation. 84 - * All nodes which are seen here will be set online. 85 - */ 86 - cur_base = 0; 87 - do { 88 - nid = numa_pfn_to_nid(PFN_DOWN(cur_base)); 89 - node_set_online(nid); 90 - memblock_set_node(cur_base, align, &memblock.memory, nid); 91 - cur_base += align; 92 - } while (cur_base < end_of_dram); 93 - 94 - /* Allocate and fill out node_data */ 26 + node_set_online(0); 95 27 for (nid = 0; nid < MAX_NUMNODES; nid++) { 96 28 NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); 97 29 if (!NODE_DATA(nid)) 98 30 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 99 31 __func__, sizeof(pg_data_t), 8); 100 32 } 101 - 102 - for_each_online_node(nid) { 103 - unsigned long start_pfn, end_pfn; 104 - unsigned long t_start, t_end; 105 - int i; 106 - 107 - start_pfn = ULONG_MAX; 108 - end_pfn = 0; 109 - for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) { 110 - if (t_start < start_pfn) 111 - start_pfn = t_start; 112 - if (t_end > end_pfn) 113 - end_pfn = t_end; 114 - } 115 - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 116 - NODE_DATA(nid)->node_id = nid; 117 - } 33 + NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; 34 + NODE_DATA(0)->node_id = 0; 118 35 } 119 36 120 - /* 121 - * numa_setup() - Earliest initialization 122 - * 123 - * Assign the mode and call the mode's setup routine. 124 - */ 125 - void __init numa_setup(void) 126 - { 127 - pr_info("NUMA mode: %s\n", mode->name); 128 - nodes_clear(node_possible_map); 129 - /* Initially attach all possible CPUs to node 0. */ 130 - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); 131 - if (mode->setup) 132 - mode->setup(); 133 - numa_setup_memory(); 134 - memblock_dump_all(); 135 - } 136 - 137 - /* 138 - * numa_init_late() - Initialization initcall 139 - * 140 - * Register NUMA nodes. 141 - */ 142 37 static int __init numa_init_late(void) 143 38 { 144 - int nid; 145 - 146 - for_each_online_node(nid) 147 - register_one_node(nid); 39 + register_one_node(0); 148 40 return 0; 149 41 } 150 42 arch_initcall(numa_init_late); 151 - 152 - static int __init parse_debug(char *parm) 153 - { 154 - numa_debug_enabled = 1; 155 - return 0; 156 - } 157 - early_param("numa_debug", parse_debug); 158 - 159 - static int __init parse_numa(char *parm) 160 - { 161 - if (!parm) 162 - return 1; 163 - if (strcmp(parm, numa_mode_plain.name) == 0) 164 - mode = &numa_mode_plain; 165 - #ifdef CONFIG_NUMA_EMU 166 - if (strcmp(parm, numa_mode_emu.name) == 0) 167 - mode = &numa_mode_emu; 168 - #endif 169 - return 0; 170 - } 171 - early_param("numa", parse_numa);
-25
arch/s390/numa/numa_mode.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * NUMA support for s390 4 - * 5 - * Define declarations used for communication between NUMA mode 6 - * implementations and NUMA core functionality. 7 - * 8 - * Copyright IBM Corp. 2015 9 - */ 10 - #ifndef __S390_NUMA_MODE_H 11 - #define __S390_NUMA_MODE_H 12 - 13 - struct numa_mode { 14 - char *name; /* Name of mode */ 15 - void (*setup)(void); /* Initizalize mode */ 16 - void (*update_cpu_topology)(void); /* Called by topology code */ 17 - int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */ 18 - unsigned long (*align)(void); /* Minimum node alignment */ 19 - int (*distance)(int a, int b); /* Distance between two nodes */ 20 - }; 21 - 22 - extern const struct numa_mode numa_mode_plain; 23 - extern const struct numa_mode numa_mode_emu; 24 - 25 - #endif /* __S390_NUMA_MODE_H */
-351
arch/s390/numa/toptree.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * NUMA support for s390 4 - * 5 - * A tree structure used for machine topology mangling 6 - * 7 - * Copyright IBM Corp. 2015 8 - */ 9 - 10 - #include <linux/kernel.h> 11 - #include <linux/memblock.h> 12 - #include <linux/cpumask.h> 13 - #include <linux/list.h> 14 - #include <linux/list_sort.h> 15 - #include <linux/slab.h> 16 - #include <asm/numa.h> 17 - 18 - #include "toptree.h" 19 - 20 - /** 21 - * toptree_alloc - Allocate and initialize a new tree node. 22 - * @level: The node's vertical level; level 0 contains the leaves. 23 - * @id: ID number, explicitly not unique beyond scope of node's siblings 24 - * 25 - * Allocate a new tree node and initialize it. 26 - * 27 - * RETURNS: 28 - * Pointer to the new tree node or NULL on error 29 - */ 30 - struct toptree __ref *toptree_alloc(int level, int id) 31 - { 32 - struct toptree *res; 33 - 34 - if (slab_is_available()) 35 - res = kzalloc(sizeof(*res), GFP_KERNEL); 36 - else 37 - res = memblock_alloc(sizeof(*res), 8); 38 - if (!res) 39 - return res; 40 - 41 - INIT_LIST_HEAD(&res->children); 42 - INIT_LIST_HEAD(&res->sibling); 43 - cpumask_clear(&res->mask); 44 - res->level = level; 45 - res->id = id; 46 - return res; 47 - } 48 - 49 - /** 50 - * toptree_remove - Remove a tree node from a tree 51 - * @cand: Pointer to the node to remove 52 - * 53 - * The node is detached from its parent node. The parent node's 54 - * masks will be updated to reflect the loss of the child. 55 - */ 56 - static void toptree_remove(struct toptree *cand) 57 - { 58 - struct toptree *oldparent; 59 - 60 - list_del_init(&cand->sibling); 61 - oldparent = cand->parent; 62 - cand->parent = NULL; 63 - toptree_update_mask(oldparent); 64 - } 65 - 66 - /** 67 - * toptree_free - discard a tree node 68 - * @cand: Pointer to the tree node to discard 69 - * 70 - * Checks if @cand is attached to a parent node. Detaches it 71 - * cleanly using toptree_remove. Possible children are freed 72 - * recursively. In the end @cand itself is freed. 73 - */ 74 - void __ref toptree_free(struct toptree *cand) 75 - { 76 - struct toptree *child, *tmp; 77 - 78 - if (cand->parent) 79 - toptree_remove(cand); 80 - toptree_for_each_child_safe(child, tmp, cand) 81 - toptree_free(child); 82 - if (slab_is_available()) 83 - kfree(cand); 84 - else 85 - memblock_free_early((unsigned long)cand, sizeof(*cand)); 86 - } 87 - 88 - /** 89 - * toptree_update_mask - Update node bitmasks 90 - * @cand: Pointer to a tree node 91 - * 92 - * The node's cpumask will be updated by combining all children's 93 - * masks. Then toptree_update_mask is called recursively for the 94 - * parent if applicable. 95 - * 96 - * NOTE: 97 - * This must not be called on leaves. If called on a leaf, its 98 - * CPU mask is cleared and lost. 99 - */ 100 - void toptree_update_mask(struct toptree *cand) 101 - { 102 - struct toptree *child; 103 - 104 - cpumask_clear(&cand->mask); 105 - list_for_each_entry(child, &cand->children, sibling) 106 - cpumask_or(&cand->mask, &cand->mask, &child->mask); 107 - if (cand->parent) 108 - toptree_update_mask(cand->parent); 109 - } 110 - 111 - /** 112 - * toptree_insert - Insert a tree node into tree 113 - * @cand: Pointer to the node to insert 114 - * @target: Pointer to the node to which @cand will added as a child 115 - * 116 - * Insert a tree node into a tree. Masks will be updated automatically. 117 - * 118 - * RETURNS: 119 - * 0 on success, -1 if NULL is passed as argument or the node levels 120 - * don't fit. 121 - */ 122 - static int toptree_insert(struct toptree *cand, struct toptree *target) 123 - { 124 - if (!cand || !target) 125 - return -1; 126 - if (target->level != (cand->level + 1)) 127 - return -1; 128 - list_add_tail(&cand->sibling, &target->children); 129 - cand->parent = target; 130 - toptree_update_mask(target); 131 - return 0; 132 - } 133 - 134 - /** 135 - * toptree_move_children - Move all child nodes of a node to a new place 136 - * @cand: Pointer to the node whose children are to be moved 137 - * @target: Pointer to the node to which @cand's children will be attached 138 - * 139 - * Take all child nodes of @cand and move them using toptree_move. 140 - */ 141 - static void toptree_move_children(struct toptree *cand, struct toptree *target) 142 - { 143 - struct toptree *child, *tmp; 144 - 145 - toptree_for_each_child_safe(child, tmp, cand) 146 - toptree_move(child, target); 147 - } 148 - 149 - /** 150 - * toptree_unify - Merge children with same ID 151 - * @cand: Pointer to node whose direct children should be made unique 152 - * 153 - * When mangling the tree it is possible that a node has two or more children 154 - * which have the same ID. This routine merges these children into one and 155 - * moves all children of the merged nodes into the unified node. 156 - */ 157 - void toptree_unify(struct toptree *cand) 158 - { 159 - struct toptree *child, *tmp, *cand_copy; 160 - 161 - /* Threads cannot be split, cores are not split */ 162 - if (cand->level < 2) 163 - return; 164 - 165 - cand_copy = toptree_alloc(cand->level, 0); 166 - toptree_for_each_child_safe(child, tmp, cand) { 167 - struct toptree *tmpchild; 168 - 169 - if (!cpumask_empty(&child->mask)) { 170 - tmpchild = toptree_get_child(cand_copy, child->id); 171 - toptree_move_children(child, tmpchild); 172 - } 173 - toptree_free(child); 174 - } 175 - toptree_move_children(cand_copy, cand); 176 - toptree_free(cand_copy); 177 - 178 - toptree_for_each_child(child, cand) 179 - toptree_unify(child); 180 - } 181 - 182 - /** 183 - * toptree_move - Move a node to another context 184 - * @cand: Pointer to the node to move 185 - * @target: Pointer to the node where @cand should go 186 - * 187 - * In the easiest case @cand is exactly on the level below @target 188 - * and will be immediately moved to the target. 189 - * 190 - * If @target's level is not the direct parent level of @cand, 191 - * nodes for the missing levels are created and put between 192 - * @cand and @target. The "stacking" nodes' IDs are taken from 193 - * @cand's parents. 194 - * 195 - * After this it is likely to have redundant nodes in the tree 196 - * which are addressed by means of toptree_unify. 197 - */ 198 - void toptree_move(struct toptree *cand, struct toptree *target) 199 - { 200 - struct toptree *stack_target, *real_insert_point, *ptr, *tmp; 201 - 202 - if (cand->level + 1 == target->level) { 203 - toptree_remove(cand); 204 - toptree_insert(cand, target); 205 - return; 206 - } 207 - 208 - real_insert_point = NULL; 209 - ptr = cand; 210 - stack_target = NULL; 211 - 212 - do { 213 - tmp = stack_target; 214 - stack_target = toptree_alloc(ptr->level + 1, 215 - ptr->parent->id); 216 - toptree_insert(tmp, stack_target); 217 - if (!real_insert_point) 218 - real_insert_point = stack_target; 219 - ptr = ptr->parent; 220 - } while (stack_target->level < (target->level - 1)); 221 - 222 - toptree_remove(cand); 223 - toptree_insert(cand, real_insert_point); 224 - toptree_insert(stack_target, target); 225 - } 226 - 227 - /** 228 - * toptree_get_child - Access a tree node's child by its ID 229 - * @cand: Pointer to tree node whose child is to access 230 - * @id: The desired child's ID 231 - * 232 - * @cand's children are searched for a child with matching ID. 233 - * If no match can be found, a new child with the desired ID 234 - * is created and returned. 235 - */ 236 - struct toptree *toptree_get_child(struct toptree *cand, int id) 237 - { 238 - struct toptree *child; 239 - 240 - toptree_for_each_child(child, cand) 241 - if (child->id == id) 242 - return child; 243 - child = toptree_alloc(cand->level-1, id); 244 - toptree_insert(child, cand); 245 - return child; 246 - } 247 - 248 - /** 249 - * toptree_first - Find the first descendant on specified level 250 - * @context: Pointer to tree node whose descendants are to be used 251 - * @level: The level of interest 252 - * 253 - * RETURNS: 254 - * @context's first descendant on the specified level, or NULL 255 - * if there is no matching descendant 256 - */ 257 - struct toptree *toptree_first(struct toptree *context, int level) 258 - { 259 - struct toptree *child, *tmp; 260 - 261 - if (context->level == level) 262 - return context; 263 - 264 - if (!list_empty(&context->children)) { 265 - list_for_each_entry(child, &context->children, sibling) { 266 - tmp = toptree_first(child, level); 267 - if (tmp) 268 - return tmp; 269 - } 270 - } 271 - return NULL; 272 - } 273 - 274 - /** 275 - * toptree_next_sibling - Return next sibling 276 - * @cur: Pointer to a tree node 277 - * 278 - * RETURNS: 279 - * If @cur has a parent and is not the last in the parent's children list, 280 - * the next sibling is returned. Or NULL when there are no siblings left. 281 - */ 282 - static struct toptree *toptree_next_sibling(struct toptree *cur) 283 - { 284 - if (cur->parent == NULL) 285 - return NULL; 286 - 287 - if (cur == list_last_entry(&cur->parent->children, 288 - struct toptree, sibling)) 289 - return NULL; 290 - return (struct toptree *) list_next_entry(cur, sibling); 291 - } 292 - 293 - /** 294 - * toptree_next - Tree traversal function 295 - * @cur: Pointer to current element 296 - * @context: Pointer to the root node of the tree or subtree to 297 - * be traversed. 298 - * @level: The level of interest. 299 - * 300 - * RETURNS: 301 - * Pointer to the next node on level @level 302 - * or NULL when there is no next node. 303 - */ 304 - struct toptree *toptree_next(struct toptree *cur, struct toptree *context, 305 - int level) 306 - { 307 - struct toptree *cur_context, *tmp; 308 - 309 - if (!cur) 310 - return NULL; 311 - 312 - if (context->level == level) 313 - return NULL; 314 - 315 - tmp = toptree_next_sibling(cur); 316 - if (tmp != NULL) 317 - return tmp; 318 - 319 - cur_context = cur; 320 - while (cur_context->level < context->level - 1) { 321 - /* Step up */ 322 - cur_context = cur_context->parent; 323 - /* Step aside */ 324 - tmp = toptree_next_sibling(cur_context); 325 - if (tmp != NULL) { 326 - /* Step down */ 327 - tmp = toptree_first(tmp, level); 328 - if (tmp != NULL) 329 - return tmp; 330 - } 331 - } 332 - return NULL; 333 - } 334 - 335 - /** 336 - * toptree_count - Count descendants on specified level 337 - * @context: Pointer to node whose descendants are to be considered 338 - * @level: Only descendants on the specified level will be counted 339 - * 340 - * RETURNS: 341 - * Number of descendants on the specified level 342 - */ 343 - int toptree_count(struct toptree *context, int level) 344 - { 345 - struct toptree *cur; 346 - int cnt = 0; 347 - 348 - toptree_for_each(cur, context, level) 349 - cnt++; 350 - return cnt; 351 - }
-61
arch/s390/numa/toptree.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * NUMA support for s390 4 - * 5 - * A tree structure used for machine topology mangling 6 - * 7 - * Copyright IBM Corp. 2015 8 - */ 9 - #ifndef S390_TOPTREE_H 10 - #define S390_TOPTREE_H 11 - 12 - #include <linux/cpumask.h> 13 - #include <linux/list.h> 14 - 15 - struct toptree { 16 - int level; 17 - int id; 18 - cpumask_t mask; 19 - struct toptree *parent; 20 - struct list_head sibling; 21 - struct list_head children; 22 - }; 23 - 24 - struct toptree *toptree_alloc(int level, int id); 25 - void toptree_free(struct toptree *cand); 26 - void toptree_update_mask(struct toptree *cand); 27 - void toptree_unify(struct toptree *cand); 28 - struct toptree *toptree_get_child(struct toptree *cand, int id); 29 - void toptree_move(struct toptree *cand, struct toptree *target); 30 - int toptree_count(struct toptree *context, int level); 31 - 32 - struct toptree *toptree_first(struct toptree *context, int level); 33 - struct toptree *toptree_next(struct toptree *cur, struct toptree *context, 34 - int level); 35 - 36 - #define toptree_for_each_child(child, ptree) \ 37 - list_for_each_entry(child, &ptree->children, sibling) 38 - 39 - #define toptree_for_each_child_safe(child, ptmp, ptree) \ 40 - list_for_each_entry_safe(child, ptmp, &ptree->children, sibling) 41 - 42 - #define toptree_is_last(ptree) \ 43 - ((ptree->parent == NULL) || \ 44 - (ptree->parent->children.prev == &ptree->sibling)) 45 - 46 - #define toptree_for_each(ptree, cont, ttype) \ 47 - for (ptree = toptree_first(cont, ttype); \ 48 - ptree != NULL; \ 49 - ptree = toptree_next(ptree, cont, ttype)) 50 - 51 - #define toptree_for_each_safe(ptree, tmp, cont, ttype) \ 52 - for (ptree = toptree_first(cont, ttype), \ 53 - tmp = toptree_next(ptree, cont, ttype); \ 54 - ptree != NULL; \ 55 - ptree = tmp, \ 56 - tmp = toptree_next(ptree, cont, ttype)) 57 - 58 - #define toptree_for_each_sibling(ptree, start) \ 59 - toptree_for_each(ptree, start->parent, start->level) 60 - 61 - #endif /* S390_TOPTREE_H */
+1 -1
drivers/s390/char/sclp_cmd.c
··· 406 406 if (!size) 407 407 goto skip_add; 408 408 for (addr = start; addr < start + size; addr += block_size) 409 - add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size); 409 + add_memory(0, addr, block_size); 410 410 skip_add: 411 411 first_rn = rn; 412 412 num = 1;