Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: Add book scheduling domain

On top of the SMT and MC scheduling domains this adds the BOOK scheduling
domain. This is useful for NUMA like machines which do not have an interface
which tells which piece of memory is attached to which node or where the
hardware performs striping.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100831082844.253053798@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Heiko Carstens and committed by
Ingo Molnar
01a08546 f269893c

+82 -2
+1
include/linux/sched.h
··· 875 875 SD_LV_NONE = 0, 876 876 SD_LV_SIBLING, 877 877 SD_LV_MC, 878 + SD_LV_BOOK, 878 879 SD_LV_CPU, 879 880 SD_LV_NODE, 880 881 SD_LV_ALLNODES,
+6
include/linux/topology.h
··· 201 201 .balance_interval = 64, \ 202 202 } 203 203 204 + #ifdef CONFIG_SCHED_BOOK 205 + #ifndef SD_BOOK_INIT 206 + #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! 207 + #endif 208 + #endif /* CONFIG_SCHED_BOOK */ 209 + 204 210 #ifdef CONFIG_NUMA 205 211 #ifndef SD_NODE_INIT 206 212 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
+75 -2
kernel/sched.c
··· 6506 6506 cpumask_var_t nodemask; 6507 6507 cpumask_var_t this_sibling_map; 6508 6508 cpumask_var_t this_core_map; 6509 + cpumask_var_t this_book_map; 6509 6510 cpumask_var_t send_covered; 6510 6511 cpumask_var_t tmpmask; 6511 6512 struct sched_group **sched_group_nodes; ··· 6518 6517 sa_rootdomain, 6519 6518 sa_tmpmask, 6520 6519 sa_send_covered, 6520 + sa_this_book_map, 6521 6521 sa_this_core_map, 6522 6522 sa_this_sibling_map, 6523 6523 sa_nodemask, ··· 6572 6570 } 6573 6571 #endif /* CONFIG_SCHED_MC */ 6574 6572 6573 + /* 6574 + * book sched-domains: 6575 + */ 6576 + #ifdef CONFIG_SCHED_BOOK 6577 + static DEFINE_PER_CPU(struct static_sched_domain, book_domains); 6578 + static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); 6579 + 6580 + static int 6581 + cpu_to_book_group(int cpu, const struct cpumask *cpu_map, 6582 + struct sched_group **sg, struct cpumask *mask) 6583 + { 6584 + int group = cpu; 6585 + #ifdef CONFIG_SCHED_MC 6586 + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); 6587 + group = cpumask_first(mask); 6588 + #elif defined(CONFIG_SCHED_SMT) 6589 + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); 6590 + group = cpumask_first(mask); 6591 + #endif 6592 + if (sg) 6593 + *sg = &per_cpu(sched_group_book, group).sg; 6594 + return group; 6595 + } 6596 + #endif /* CONFIG_SCHED_BOOK */ 6597 + 6575 6598 static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); 6576 6599 static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); 6577 6600 ··· 6605 6578 struct sched_group **sg, struct cpumask *mask) 6606 6579 { 6607 6580 int group; 6608 - #ifdef CONFIG_SCHED_MC 6581 + #ifdef CONFIG_SCHED_BOOK 6582 + cpumask_and(mask, cpu_book_mask(cpu), cpu_map); 6583 + group = cpumask_first(mask); 6584 + #elif defined(CONFIG_SCHED_MC) 6609 6585 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); 6610 6586 group = cpumask_first(mask); 6611 6587 #elif defined(CONFIG_SCHED_SMT) ··· 6869 6839 #ifdef CONFIG_SCHED_MC 6870 6840 SD_INIT_FUNC(MC) 6871 6841 #endif 6842 + #ifdef CONFIG_SCHED_BOOK 6843 + SD_INIT_FUNC(BOOK) 6844 + #endif 6872 6845 6873 6846 static int default_relax_domain_level = -1; 6874 6847 ··· 6921 6888 free_cpumask_var(d->tmpmask); /* fall through */ 6922 6889 case sa_send_covered: 6923 6890 free_cpumask_var(d->send_covered); /* fall through */ 6891 + case sa_this_book_map: 6892 + free_cpumask_var(d->this_book_map); /* fall through */ 6924 6893 case sa_this_core_map: 6925 6894 free_cpumask_var(d->this_core_map); /* fall through */ 6926 6895 case sa_this_sibling_map: ··· 6969 6934 return sa_nodemask; 6970 6935 if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) 6971 6936 return sa_this_sibling_map; 6972 - if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) 6937 + if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) 6973 6938 return sa_this_core_map; 6939 + if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) 6940 + return sa_this_book_map; 6974 6941 if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) 6975 6942 return sa_send_covered; 6976 6943 d->rd = alloc_rootdomain(); ··· 7027 6990 if (parent) 7028 6991 parent->child = sd; 7029 6992 cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask); 6993 + return sd; 6994 + } 6995 + 6996 + static struct sched_domain *__build_book_sched_domain(struct s_data *d, 6997 + const struct cpumask *cpu_map, struct sched_domain_attr *attr, 6998 + struct sched_domain *parent, int i) 6999 + { 7000 + struct sched_domain *sd = parent; 7001 + #ifdef CONFIG_SCHED_BOOK 7002 + sd = &per_cpu(book_domains, i).sd; 7003 + SD_INIT(sd, BOOK); 7004 + set_domain_attribute(sd, attr); 7005 + cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); 7006 + sd->parent = parent; 7007 + parent->child = sd; 7008 + cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); 7009 + #endif 7030 7010 return sd; 7031 7011 } 7032 7012 ··· 7104 7050 d->send_covered, d->tmpmask); 7105 7051 break; 7106 7052 #endif 7053 + #ifdef CONFIG_SCHED_BOOK 7054 + case SD_LV_BOOK: /* set up book groups */ 7055 + cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); 7056 + if (cpu == cpumask_first(d->this_book_map)) 7057 + init_sched_build_groups(d->this_book_map, cpu_map, 7058 + &cpu_to_book_group, 7059 + d->send_covered, d->tmpmask); 7060 + break; 7061 + #endif 7107 7062 case SD_LV_CPU: /* set up physical groups */ 7108 7063 cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); 7109 7064 if (!cpumask_empty(d->nodemask)) ··· 7160 7097 7161 7098 sd = __build_numa_sched_domains(&d, cpu_map, attr, i); 7162 7099 sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); 7100 + sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); 7163 7101 sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); 7164 7102 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); 7165 7103 } 7166 7104 7167 7105 for_each_cpu(i, cpu_map) { 7168 7106 build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); 7107 + build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); 7169 7108 build_sched_groups(&d, SD_LV_MC, cpu_map, i); 7170 7109 } 7171 7110 ··· 7198 7133 init_sched_groups_power(i, sd); 7199 7134 } 7200 7135 #endif 7136 + #ifdef CONFIG_SCHED_BOOK 7137 + for_each_cpu(i, cpu_map) { 7138 + sd = &per_cpu(book_domains, i).sd; 7139 + init_sched_groups_power(i, sd); 7140 + } 7141 + #endif 7201 7142 7202 7143 for_each_cpu(i, cpu_map) { 7203 7144 sd = &per_cpu(phys_domains, i).sd; ··· 7229 7158 sd = &per_cpu(cpu_domains, i).sd; 7230 7159 #elif defined(CONFIG_SCHED_MC) 7231 7160 sd = &per_cpu(core_domains, i).sd; 7161 + #elif defined(CONFIG_SCHED_BOOK) 7162 + sd = &per_cpu(book_domains, i).sd; 7232 7163 #else 7233 7164 sd = &per_cpu(phys_domains, i).sd; 7234 7165 #endif