Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v5.1-rc6 503 lines 13 kB view raw
1/* 2 * acpi_numa.c - ACPI NUMA support 3 * 4 * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> 5 * 6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 * 20 */ 21 22#define pr_fmt(fmt) "ACPI: " fmt 23 24#include <linux/module.h> 25#include <linux/init.h> 26#include <linux/kernel.h> 27#include <linux/types.h> 28#include <linux/errno.h> 29#include <linux/acpi.h> 30#include <linux/memblock.h> 31#include <linux/numa.h> 32#include <linux/nodemask.h> 33#include <linux/topology.h> 34 35static nodemask_t nodes_found_map = NODE_MASK_NONE; 36 37/* maps to convert between proximity domain and logical node ID */ 38static int pxm_to_node_map[MAX_PXM_DOMAINS] 39 = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; 40static int node_to_pxm_map[MAX_NUMNODES] 41 = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; 42 43unsigned char acpi_srat_revision __initdata; 44int acpi_numa __initdata; 45 46int pxm_to_node(int pxm) 47{ 48 if (pxm < 0) 49 return NUMA_NO_NODE; 50 return pxm_to_node_map[pxm]; 51} 52 53int node_to_pxm(int node) 54{ 55 if (node < 0) 56 return PXM_INVAL; 57 return node_to_pxm_map[node]; 58} 59 60static void __acpi_map_pxm_to_node(int pxm, int node) 61{ 62 if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) 63 pxm_to_node_map[pxm] = node; 64 if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) 65 node_to_pxm_map[node] = pxm; 66} 67 68int acpi_map_pxm_to_node(int pxm) 69{ 70 int node; 71 72 if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) 73 return NUMA_NO_NODE; 74 75 node = pxm_to_node_map[pxm]; 76 77 if (node == NUMA_NO_NODE) { 78 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) 79 return NUMA_NO_NODE; 80 node = first_unset_node(nodes_found_map); 81 __acpi_map_pxm_to_node(pxm, node); 82 node_set(node, nodes_found_map); 83 } 84 85 return node; 86} 87EXPORT_SYMBOL(acpi_map_pxm_to_node); 88 89/** 90 * acpi_map_pxm_to_online_node - Map proximity ID to online node 91 * @pxm: ACPI proximity ID 92 * 93 * This is similar to acpi_map_pxm_to_node(), but always returns an online 94 * node. When the mapped node from a given proximity ID is offline, it 95 * looks up the node distance table and returns the nearest online node. 96 * 97 * ACPI device drivers, which are called after the NUMA initialization has 98 * completed in the kernel, can call this interface to obtain their device 99 * NUMA topology from ACPI tables. Such drivers do not have to deal with 100 * offline nodes. A node may be offline when a device proximity ID is 101 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. 102 * "numa=off" on x86. 103 */ 104int acpi_map_pxm_to_online_node(int pxm) 105{ 106 int node, min_node; 107 108 node = acpi_map_pxm_to_node(pxm); 109 110 if (node == NUMA_NO_NODE) 111 node = 0; 112 113 min_node = node; 114 if (!node_online(node)) { 115 int min_dist = INT_MAX, dist, n; 116 117 for_each_online_node(n) { 118 dist = node_distance(node, n); 119 if (dist < min_dist) { 120 min_dist = dist; 121 min_node = n; 122 } 123 } 124 } 125 126 return min_node; 127} 128EXPORT_SYMBOL(acpi_map_pxm_to_online_node); 129 130static void __init 131acpi_table_print_srat_entry(struct acpi_subtable_header *header) 132{ 133 switch (header->type) { 134 case ACPI_SRAT_TYPE_CPU_AFFINITY: 135 { 136 struct acpi_srat_cpu_affinity *p = 137 (struct acpi_srat_cpu_affinity *)header; 138 pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", 139 p->apic_id, p->local_sapic_eid, 140 p->proximity_domain_lo, 141 (p->flags & ACPI_SRAT_CPU_ENABLED) ? 142 "enabled" : "disabled"); 143 } 144 break; 145 146 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 147 { 148 struct acpi_srat_mem_affinity *p = 149 (struct acpi_srat_mem_affinity *)header; 150 pr_debug("SRAT Memory (0x%llx length 0x%llx) in proximity domain %d %s%s%s\n", 151 (unsigned long long)p->base_address, 152 (unsigned long long)p->length, 153 p->proximity_domain, 154 (p->flags & ACPI_SRAT_MEM_ENABLED) ? 155 "enabled" : "disabled", 156 (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? 157 " hot-pluggable" : "", 158 (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ? 159 " non-volatile" : ""); 160 } 161 break; 162 163 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 164 { 165 struct acpi_srat_x2apic_cpu_affinity *p = 166 (struct acpi_srat_x2apic_cpu_affinity *)header; 167 pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n", 168 p->apic_id, 169 p->proximity_domain, 170 (p->flags & ACPI_SRAT_CPU_ENABLED) ? 171 "enabled" : "disabled"); 172 } 173 break; 174 175 case ACPI_SRAT_TYPE_GICC_AFFINITY: 176 { 177 struct acpi_srat_gicc_affinity *p = 178 (struct acpi_srat_gicc_affinity *)header; 179 pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n", 180 p->acpi_processor_uid, 181 p->proximity_domain, 182 (p->flags & ACPI_SRAT_GICC_ENABLED) ? 183 "enabled" : "disabled"); 184 } 185 break; 186 187 default: 188 pr_warn("Found unsupported SRAT entry (type = 0x%x)\n", 189 header->type); 190 break; 191 } 192} 193 194/* 195 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes 196 * up the NUMA heuristics which wants the local node to have a smaller 197 * distance than the others. 198 * Do some quick checks here and only use the SLIT if it passes. 199 */ 200static int __init slit_valid(struct acpi_table_slit *slit) 201{ 202 int i, j; 203 int d = slit->locality_count; 204 for (i = 0; i < d; i++) { 205 for (j = 0; j < d; j++) { 206 u8 val = slit->entry[d*i + j]; 207 if (i == j) { 208 if (val != LOCAL_DISTANCE) 209 return 0; 210 } else if (val <= LOCAL_DISTANCE) 211 return 0; 212 } 213 } 214 return 1; 215} 216 217void __init bad_srat(void) 218{ 219 pr_err("SRAT: SRAT not used.\n"); 220 acpi_numa = -1; 221} 222 223int __init srat_disabled(void) 224{ 225 return acpi_numa < 0; 226} 227 228#if defined(CONFIG_X86) || defined(CONFIG_ARM64) 229/* 230 * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for 231 * I/O localities since SRAT does not list them. I/O localities are 232 * not supported at this point. 233 */ 234void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 235{ 236 int i, j; 237 238 for (i = 0; i < slit->locality_count; i++) { 239 const int from_node = pxm_to_node(i); 240 241 if (from_node == NUMA_NO_NODE) 242 continue; 243 244 for (j = 0; j < slit->locality_count; j++) { 245 const int to_node = pxm_to_node(j); 246 247 if (to_node == NUMA_NO_NODE) 248 continue; 249 250 numa_set_distance(from_node, to_node, 251 slit->entry[slit->locality_count * i + j]); 252 } 253 } 254} 255 256/* 257 * Default callback for parsing of the Proximity Domain <-> Memory 258 * Area mappings 259 */ 260int __init 261acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 262{ 263 u64 start, end; 264 u32 hotpluggable; 265 int node, pxm; 266 267 if (srat_disabled()) 268 goto out_err; 269 if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) { 270 pr_err("SRAT: Unexpected header length: %d\n", 271 ma->header.length); 272 goto out_err_bad_srat; 273 } 274 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) 275 goto out_err; 276 hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; 277 if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 278 goto out_err; 279 280 start = ma->base_address; 281 end = start + ma->length; 282 pxm = ma->proximity_domain; 283 if (acpi_srat_revision <= 1) 284 pxm &= 0xff; 285 286 node = acpi_map_pxm_to_node(pxm); 287 if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { 288 pr_err("SRAT: Too many proximity domains.\n"); 289 goto out_err_bad_srat; 290 } 291 292 if (numa_add_memblk(node, start, end) < 0) { 293 pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n", 294 node, (unsigned long long) start, 295 (unsigned long long) end - 1); 296 goto out_err_bad_srat; 297 } 298 299 node_set(node, numa_nodes_parsed); 300 301 pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", 302 node, pxm, 303 (unsigned long long) start, (unsigned long long) end - 1, 304 hotpluggable ? " hotplug" : "", 305 ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); 306 307 /* Mark hotplug range in memblock. */ 308 if (hotpluggable && memblock_mark_hotplug(start, ma->length)) 309 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", 310 (unsigned long long)start, (unsigned long long)end - 1); 311 312 max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1)); 313 314 return 0; 315out_err_bad_srat: 316 bad_srat(); 317out_err: 318 return -EINVAL; 319} 320#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ 321 322static int __init acpi_parse_slit(struct acpi_table_header *table) 323{ 324 struct acpi_table_slit *slit = (struct acpi_table_slit *)table; 325 326 if (!slit_valid(slit)) { 327 pr_info("SLIT table looks invalid. Not used.\n"); 328 return -EINVAL; 329 } 330 acpi_numa_slit_init(slit); 331 332 return 0; 333} 334 335void __init __weak 336acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) 337{ 338 pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id); 339} 340 341static int __init 342acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, 343 const unsigned long end) 344{ 345 struct acpi_srat_x2apic_cpu_affinity *processor_affinity; 346 347 processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; 348 if (!processor_affinity) 349 return -EINVAL; 350 351 acpi_table_print_srat_entry(header); 352 353 /* let architecture-dependent part to do it */ 354 acpi_numa_x2apic_affinity_init(processor_affinity); 355 356 return 0; 357} 358 359static int __init 360acpi_parse_processor_affinity(struct acpi_subtable_header *header, 361 const unsigned long end) 362{ 363 struct acpi_srat_cpu_affinity *processor_affinity; 364 365 processor_affinity = (struct acpi_srat_cpu_affinity *)header; 366 if (!processor_affinity) 367 return -EINVAL; 368 369 acpi_table_print_srat_entry(header); 370 371 /* let architecture-dependent part to do it */ 372 acpi_numa_processor_affinity_init(processor_affinity); 373 374 return 0; 375} 376 377static int __init 378acpi_parse_gicc_affinity(struct acpi_subtable_header *header, 379 const unsigned long end) 380{ 381 struct acpi_srat_gicc_affinity *processor_affinity; 382 383 processor_affinity = (struct acpi_srat_gicc_affinity *)header; 384 if (!processor_affinity) 385 return -EINVAL; 386 387 acpi_table_print_srat_entry(header); 388 389 /* let architecture-dependent part to do it */ 390 acpi_numa_gicc_affinity_init(processor_affinity); 391 392 return 0; 393} 394 395static int __initdata parsed_numa_memblks; 396 397static int __init 398acpi_parse_memory_affinity(struct acpi_subtable_header * header, 399 const unsigned long end) 400{ 401 struct acpi_srat_mem_affinity *memory_affinity; 402 403 memory_affinity = (struct acpi_srat_mem_affinity *)header; 404 if (!memory_affinity) 405 return -EINVAL; 406 407 acpi_table_print_srat_entry(header); 408 409 /* let architecture-dependent part to do it */ 410 if (!acpi_numa_memory_affinity_init(memory_affinity)) 411 parsed_numa_memblks++; 412 return 0; 413} 414 415static int __init acpi_parse_srat(struct acpi_table_header *table) 416{ 417 struct acpi_table_srat *srat = (struct acpi_table_srat *)table; 418 419 acpi_srat_revision = srat->header.revision; 420 421 /* Real work done in acpi_table_parse_srat below. */ 422 423 return 0; 424} 425 426static int __init 427acpi_table_parse_srat(enum acpi_srat_type id, 428 acpi_tbl_entry_handler handler, unsigned int max_entries) 429{ 430 return acpi_table_parse_entries(ACPI_SIG_SRAT, 431 sizeof(struct acpi_table_srat), id, 432 handler, max_entries); 433} 434 435int __init acpi_numa_init(void) 436{ 437 int cnt = 0; 438 439 if (acpi_disabled) 440 return -EINVAL; 441 442 /* 443 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= 444 * SRAT cpu entries could have different order with that in MADT. 445 * So go over all cpu entries in SRAT to get apicid to node mapping. 446 */ 447 448 /* SRAT: System Resource Affinity Table */ 449 if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { 450 struct acpi_subtable_proc srat_proc[3]; 451 452 memset(srat_proc, 0, sizeof(srat_proc)); 453 srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY; 454 srat_proc[0].handler = acpi_parse_processor_affinity; 455 srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY; 456 srat_proc[1].handler = acpi_parse_x2apic_affinity; 457 srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY; 458 srat_proc[2].handler = acpi_parse_gicc_affinity; 459 460 acpi_table_parse_entries_array(ACPI_SIG_SRAT, 461 sizeof(struct acpi_table_srat), 462 srat_proc, ARRAY_SIZE(srat_proc), 0); 463 464 cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, 465 acpi_parse_memory_affinity, 0); 466 } 467 468 /* SLIT: System Locality Information Table */ 469 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); 470 471 if (cnt < 0) 472 return cnt; 473 else if (!parsed_numa_memblks) 474 return -ENOENT; 475 return 0; 476} 477 478static int acpi_get_pxm(acpi_handle h) 479{ 480 unsigned long long pxm; 481 acpi_status status; 482 acpi_handle handle; 483 acpi_handle phandle = h; 484 485 do { 486 handle = phandle; 487 status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); 488 if (ACPI_SUCCESS(status)) 489 return pxm; 490 status = acpi_get_parent(handle, &phandle); 491 } while (ACPI_SUCCESS(status)); 492 return -1; 493} 494 495int acpi_get_node(acpi_handle handle) 496{ 497 int pxm; 498 499 pxm = acpi_get_pxm(handle); 500 501 return acpi_map_pxm_to_node(pxm); 502} 503EXPORT_SYMBOL(acpi_get_node);