Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched/topology: Add SD_ASYM_CPUCAPACITY flag detection

The SD_ASYM_CPUCAPACITY sched_domain flag is supposed to mark the
sched_domain in the hierarchy where all CPU capacities are visible for
any CPU's point of view on asymmetric CPU capacity systems. The
scheduler can then take to take capacity asymmetry into account when
balancing at this level. It also serves as an indicator for how wide
task placement heuristics have to search to consider all available CPU
capacities as asymmetric systems might often appear symmetric at
smallest level(s) of the sched_domain hierarchy.

The flag has been around for while but so far only been set by
out-of-tree code in Android kernels. One solution is to let each
architecture provide the flag through a custom sched_domain topology
array and associated mask and flag functions. However,
SD_ASYM_CPUCAPACITY is special in the sense that it depends on the
capacity and presence of all CPUs in the system, i.e. when hotplugging
all CPUs out except those with one particular CPU capacity the flag
should disappear even if the sched_domains don't collapse. Similarly,
the flag is affected by cpusets where load-balancing is turned off.
Detecting when the flags should be set therefore depends not only on
topology information but also the cpuset configuration and hotplug
state. The arch code doesn't have easy access to the cpuset
configuration.

Instead, this patch implements the flag detection in generic code where
cpusets and hotplug state is already taken care of. All the arch is
responsible for is to implement arch_scale_cpu_capacity() and force a
full rebuild of the sched_domain hierarchy if capacities are updated,
e.g. later in the boot process when cpufreq has initialized.

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/1532093554-30504-2-git-send-email-morten.rasmussen@arm.com
[ Fixed 'CPU' capitalization. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Morten Rasmussen and committed by
Ingo Molnar
05484e09 882a78a9

+78 -9
+3 -3
include/linux/sched/topology.h
··· 23 23 #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ 24 24 #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ 25 25 #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ 26 - #define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ 27 - #define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ 26 + #define SD_ASYM_CPUCAPACITY 0x0040 /* Domain members have different CPU capacities */ 27 + #define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share CPU capacity */ 28 28 #define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ 29 - #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ 29 + #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share CPU pkg resources */ 30 30 #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ 31 31 #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ 32 32 #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
+75 -6
kernel/sched/topology.c
··· 1061 1061 * SD_SHARE_PKG_RESOURCES - describes shared caches 1062 1062 * SD_NUMA - describes NUMA topologies 1063 1063 * SD_SHARE_POWERDOMAIN - describes shared power domain 1064 - * SD_ASYM_CPUCAPACITY - describes mixed capacity topologies 1065 1064 * 1066 1065 * Odd one out, which beside describing the topology has a quirk also 1067 1066 * prescribes the desired behaviour that goes along with it: ··· 1072 1073 SD_SHARE_PKG_RESOURCES | \ 1073 1074 SD_NUMA | \ 1074 1075 SD_ASYM_PACKING | \ 1075 - SD_ASYM_CPUCAPACITY | \ 1076 1076 SD_SHARE_POWERDOMAIN) 1077 1077 1078 1078 static struct sched_domain * 1079 1079 sd_init(struct sched_domain_topology_level *tl, 1080 1080 const struct cpumask *cpu_map, 1081 - struct sched_domain *child, int cpu) 1081 + struct sched_domain *child, int dflags, int cpu) 1082 1082 { 1083 1083 struct sd_data *sdd = &tl->data; 1084 1084 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); ··· 1097 1099 if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS, 1098 1100 "wrong sd_flags in topology description\n")) 1099 1101 sd_flags &= ~TOPOLOGY_SD_FLAGS; 1102 + 1103 + /* Apply detected topology flags */ 1104 + sd_flags |= dflags; 1100 1105 1101 1106 *sd = (struct sched_domain){ 1102 1107 .min_interval = sd_weight, ··· 1605 1604 1606 1605 static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, 1607 1606 const struct cpumask *cpu_map, struct sched_domain_attr *attr, 1608 - struct sched_domain *child, int cpu) 1607 + struct sched_domain *child, int dflags, int cpu) 1609 1608 { 1610 - struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu); 1609 + struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu); 1611 1610 1612 1611 if (child) { 1613 1612 sd->level = child->level + 1; ··· 1634 1633 } 1635 1634 1636 1635 /* 1636 + * Find the sched_domain_topology_level where all CPU capacities are visible 1637 + * for all CPUs. 1638 + */ 1639 + static struct sched_domain_topology_level 1640 + *asym_cpu_capacity_level(const struct cpumask *cpu_map) 1641 + { 1642 + int i, j, asym_level = 0; 1643 + bool asym = false; 1644 + struct sched_domain_topology_level *tl, *asym_tl = NULL; 1645 + unsigned long cap; 1646 + 1647 + /* Is there any asymmetry? */ 1648 + cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map)); 1649 + 1650 + for_each_cpu(i, cpu_map) { 1651 + if (arch_scale_cpu_capacity(NULL, i) != cap) { 1652 + asym = true; 1653 + break; 1654 + } 1655 + } 1656 + 1657 + if (!asym) 1658 + return NULL; 1659 + 1660 + /* 1661 + * Examine topology from all CPU's point of views to detect the lowest 1662 + * sched_domain_topology_level where a highest capacity CPU is visible 1663 + * to everyone. 1664 + */ 1665 + for_each_cpu(i, cpu_map) { 1666 + unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i); 1667 + int tl_id = 0; 1668 + 1669 + for_each_sd_topology(tl) { 1670 + if (tl_id < asym_level) 1671 + goto next_level; 1672 + 1673 + for_each_cpu_and(j, tl->mask(i), cpu_map) { 1674 + unsigned long capacity; 1675 + 1676 + capacity = arch_scale_cpu_capacity(NULL, j); 1677 + 1678 + if (capacity <= max_capacity) 1679 + continue; 1680 + 1681 + max_capacity = capacity; 1682 + asym_level = tl_id; 1683 + asym_tl = tl; 1684 + } 1685 + next_level: 1686 + tl_id++; 1687 + } 1688 + } 1689 + 1690 + return asym_tl; 1691 + } 1692 + 1693 + 1694 + /* 1637 1695 * Build sched domains for a given set of CPUs and attach the sched domains 1638 1696 * to the individual CPUs 1639 1697 */ ··· 1704 1644 struct s_data d; 1705 1645 struct rq *rq = NULL; 1706 1646 int i, ret = -ENOMEM; 1647 + struct sched_domain_topology_level *tl_asym; 1707 1648 1708 1649 alloc_state = __visit_domain_allocation_hell(&d, cpu_map); 1709 1650 if (alloc_state != sa_rootdomain) 1710 1651 goto error; 1652 + 1653 + tl_asym = asym_cpu_capacity_level(cpu_map); 1711 1654 1712 1655 /* Set up domains for CPUs specified by the cpu_map: */ 1713 1656 for_each_cpu(i, cpu_map) { ··· 1718 1655 1719 1656 sd = NULL; 1720 1657 for_each_sd_topology(tl) { 1721 - sd = build_sched_domain(tl, cpu_map, attr, sd, i); 1658 + int dflags = 0; 1659 + 1660 + if (tl == tl_asym) 1661 + dflags |= SD_ASYM_CPUCAPACITY; 1662 + 1663 + sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i); 1664 + 1722 1665 if (tl == sched_domain_topology) 1723 1666 *per_cpu_ptr(d.sd, i) = sd; 1724 1667 if (tl->flags & SDTL_OVERLAP)