at master 9.8 kB view raw
1/* 2 * include/linux/topology.h 3 * 4 * Written by: Matthew Dobson, IBM Corporation 5 * 6 * Copyright (C) 2002, IBM Corp. 7 * 8 * All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 18 * NON INFRINGEMENT. See the GNU General Public License for more 19 * details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 24 * 25 * Send feedback to <colpatch@us.ibm.com> 26 */ 27#ifndef _LINUX_TOPOLOGY_H 28#define _LINUX_TOPOLOGY_H 29 30#include <linux/arch_topology.h> 31#include <linux/cpumask.h> 32#include <linux/nodemask.h> 33#include <linux/bitops.h> 34#include <linux/mmzone.h> 35#include <linux/smp.h> 36#include <linux/percpu.h> 37#include <asm/topology.h> 38 39#ifndef nr_cpus_node 40#define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) 41#endif 42 43int arch_update_cpu_topology(void); 44 45/* Conform to ACPI 2.0 SLIT distance definitions */ 46#define LOCAL_DISTANCE 10 47#define REMOTE_DISTANCE 20 48#define DISTANCE_BITS 8 49#ifndef node_distance 50#define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) 51#endif 52#ifndef RECLAIM_DISTANCE 53/* 54 * If the distance between nodes in a system is larger than RECLAIM_DISTANCE 55 * (in whatever arch specific measurement units returned by node_distance()) 56 * and node_reclaim_mode is enabled then the VM will only call node_reclaim() 57 * on nodes within this distance. 58 */ 59#define RECLAIM_DISTANCE 30 60#endif 61 62/* 63 * The following tunable allows platforms to override the default node 64 * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are 65 * sufficiently fast that the default value actually hurts 66 * performance. 67 * 68 * AMD EPYC machines use this because even though the 2-hop distance 69 * is 32 (3.2x slower than a local memory access) performance actually 70 * *improves* if allowed to reclaim memory and load balance tasks 71 * between NUMA nodes 2-hops apart. 72 */ 73extern int __read_mostly node_reclaim_distance; 74 75#ifndef PENALTY_FOR_NODE_WITH_CPUS 76#define PENALTY_FOR_NODE_WITH_CPUS (1) 77#endif 78 79#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID 80DECLARE_PER_CPU(int, numa_node); 81 82#ifndef numa_node_id 83/* Returns the number of the current Node. */ 84static inline int numa_node_id(void) 85{ 86 return raw_cpu_read(numa_node); 87} 88#endif 89 90#ifndef cpu_to_node 91static inline int cpu_to_node(int cpu) 92{ 93 return per_cpu(numa_node, cpu); 94} 95#endif 96 97#ifndef set_numa_node 98static inline void set_numa_node(int node) 99{ 100 this_cpu_write(numa_node, node); 101} 102#endif 103 104#ifndef set_cpu_numa_node 105static inline void set_cpu_numa_node(int cpu, int node) 106{ 107 per_cpu(numa_node, cpu) = node; 108} 109#endif 110 111#else /* !CONFIG_USE_PERCPU_NUMA_NODE_ID */ 112 113/* Returns the number of the current Node. */ 114#ifndef numa_node_id 115static inline int numa_node_id(void) 116{ 117 return cpu_to_node(raw_smp_processor_id()); 118} 119#endif 120 121#endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ 122 123#ifdef CONFIG_HAVE_MEMORYLESS_NODES 124 125/* 126 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. 127 * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. 128 * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). 129 */ 130DECLARE_PER_CPU(int, _numa_mem_); 131 132#ifndef set_numa_mem 133static inline void set_numa_mem(int node) 134{ 135 this_cpu_write(_numa_mem_, node); 136} 137#endif 138 139#ifndef numa_mem_id 140/* Returns the number of the nearest Node with memory */ 141static inline int numa_mem_id(void) 142{ 143 return raw_cpu_read(_numa_mem_); 144} 145#endif 146 147#ifndef cpu_to_mem 148static inline int cpu_to_mem(int cpu) 149{ 150 return per_cpu(_numa_mem_, cpu); 151} 152#endif 153 154#ifndef set_cpu_numa_mem 155static inline void set_cpu_numa_mem(int cpu, int node) 156{ 157 per_cpu(_numa_mem_, cpu) = node; 158} 159#endif 160 161#else /* !CONFIG_HAVE_MEMORYLESS_NODES */ 162 163#ifndef numa_mem_id 164/* Returns the number of the nearest Node with memory */ 165static inline int numa_mem_id(void) 166{ 167 return numa_node_id(); 168} 169#endif 170 171#ifndef cpu_to_mem 172static inline int cpu_to_mem(int cpu) 173{ 174 return cpu_to_node(cpu); 175} 176#endif 177 178#endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ 179 180#if defined(topology_die_id) && defined(topology_die_cpumask) 181#define TOPOLOGY_DIE_SYSFS 182#endif 183#if defined(topology_cluster_id) && defined(topology_cluster_cpumask) 184#define TOPOLOGY_CLUSTER_SYSFS 185#endif 186#if defined(topology_book_id) && defined(topology_book_cpumask) 187#define TOPOLOGY_BOOK_SYSFS 188#endif 189#if defined(topology_drawer_id) && defined(topology_drawer_cpumask) 190#define TOPOLOGY_DRAWER_SYSFS 191#endif 192 193#ifndef topology_physical_package_id 194#define topology_physical_package_id(cpu) ((void)(cpu), -1) 195#endif 196#ifndef topology_die_id 197#define topology_die_id(cpu) ((void)(cpu), -1) 198#endif 199#ifndef topology_cluster_id 200#define topology_cluster_id(cpu) ((void)(cpu), -1) 201#endif 202#ifndef topology_core_id 203#define topology_core_id(cpu) ((void)(cpu), 0) 204#endif 205#ifndef topology_book_id 206#define topology_book_id(cpu) ((void)(cpu), -1) 207#endif 208#ifndef topology_drawer_id 209#define topology_drawer_id(cpu) ((void)(cpu), -1) 210#endif 211#ifndef topology_ppin 212#define topology_ppin(cpu) ((void)(cpu), 0ull) 213#endif 214#ifndef topology_sibling_cpumask 215#define topology_sibling_cpumask(cpu) cpumask_of(cpu) 216#endif 217#ifndef topology_core_cpumask 218#define topology_core_cpumask(cpu) cpumask_of(cpu) 219#endif 220#ifndef topology_cluster_cpumask 221#define topology_cluster_cpumask(cpu) cpumask_of(cpu) 222#endif 223#ifndef topology_die_cpumask 224#define topology_die_cpumask(cpu) cpumask_of(cpu) 225#endif 226#ifndef topology_book_cpumask 227#define topology_book_cpumask(cpu) cpumask_of(cpu) 228#endif 229#ifndef topology_drawer_cpumask 230#define topology_drawer_cpumask(cpu) cpumask_of(cpu) 231#endif 232 233#if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) 234static inline const struct cpumask *cpu_smt_mask(int cpu) 235{ 236 return topology_sibling_cpumask(cpu); 237} 238#endif 239 240#ifndef topology_is_primary_thread 241 242static inline bool topology_is_primary_thread(unsigned int cpu) 243{ 244 /* 245 * When disabling SMT, the primary thread of the SMT will remain 246 * enabled/active. Architectures that have a special primary thread 247 * (e.g. x86) need to override this function. Otherwise the first 248 * thread in the SMT can be made the primary thread. 249 * 250 * The sibling cpumask of an offline CPU always contains the CPU 251 * itself on architectures using the implementation of 252 * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. 253 * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for 254 * building their topology have to check whether to use this default 255 * implementation or to override it. 256 */ 257 return cpu == cpumask_first(topology_sibling_cpumask(cpu)); 258} 259#define topology_is_primary_thread topology_is_primary_thread 260 261#endif 262 263static inline const struct cpumask *cpu_node_mask(int cpu) 264{ 265 return cpumask_of_node(cpu_to_node(cpu)); 266} 267 268#ifdef CONFIG_NUMA 269int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); 270extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); 271#else 272static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) 273{ 274 return cpumask_nth_and(cpu, cpus, cpu_online_mask); 275} 276 277static inline const struct cpumask * 278sched_numa_hop_mask(unsigned int node, unsigned int hops) 279{ 280 return ERR_PTR(-EOPNOTSUPP); 281} 282#endif /* CONFIG_NUMA */ 283 284/** 285 * for_each_node_numadist() - iterate over nodes in increasing distance 286 * order, starting from a given node 287 * @node: the iteration variable and the starting node. 288 * @unvisited: a nodemask to keep track of the unvisited nodes. 289 * 290 * This macro iterates over NUMA node IDs in increasing distance from the 291 * starting @node and yields MAX_NUMNODES when all the nodes have been 292 * visited. 293 * 294 * Note that by the time the loop completes, the @unvisited nodemask will 295 * be fully cleared, unless the loop exits early. 296 * 297 * The difference between for_each_node() and for_each_node_numadist() is 298 * that the former allows to iterate over nodes in numerical order, whereas 299 * the latter iterates over nodes in increasing order of distance. 300 * 301 * This complexity of this iterator is O(N^2), where N represents the 302 * number of nodes, as each iteration involves scanning all nodes to 303 * find the one with the shortest distance. 304 * 305 * Requires rcu_lock to be held. 306 */ 307#define for_each_node_numadist(node, unvisited) \ 308 for (int __start = (node), \ 309 (node) = nearest_node_nodemask((__start), &(unvisited)); \ 310 (node) < MAX_NUMNODES; \ 311 node_clear((node), (unvisited)), \ 312 (node) = nearest_node_nodemask((__start), &(unvisited))) 313 314/** 315 * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance 316 * from a given node. 317 * @mask: the iteration variable. 318 * @node: the NUMA node to start the search from. 319 * 320 * Requires rcu_lock to be held. 321 * 322 * Yields cpu_online_mask for @node == NUMA_NO_NODE. 323 */ 324#define for_each_numa_hop_mask(mask, node) \ 325 for (unsigned int __hops = 0; \ 326 mask = (node != NUMA_NO_NODE || __hops) ? \ 327 sched_numa_hop_mask(node, __hops) : \ 328 cpu_online_mask, \ 329 !IS_ERR_OR_NULL(mask); \ 330 __hops++) 331 332DECLARE_PER_CPU(unsigned long, cpu_scale); 333 334static inline unsigned long topology_get_cpu_scale(int cpu) 335{ 336 return per_cpu(cpu_scale, cpu); 337} 338 339void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); 340 341#endif /* _LINUX_TOPOLOGY_H */