Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
"Misc fixes: various scheduler metrics corner case fixes, a
sched_features deadlock fix, and a topology fix for certain NUMA
systems"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/fair: Fix kernel-doc notation warning
sched/fair: Fix load_balance redo for !imbalance
sched/fair: Fix scale_rt_capacity() for SMT
sched/fair: Fix vruntime_normalized() for remote non-migration wakeup
sched/pelt: Fix update_blocked_averages() for RT and DL classes
sched/topology: Set correct NUMA topology type
sched/debug: Fix potential deadlock when writing to sched_features

Changed files
+22 -15
kernel
+4 -2
kernel/sched/debug.c
··· 89 89 90 90 static void sched_feat_disable(int i) 91 91 { 92 - static_key_disable(&sched_feat_keys[i]); 92 + static_key_disable_cpuslocked(&sched_feat_keys[i]); 93 93 } 94 94 95 95 static void sched_feat_enable(int i) 96 96 { 97 - static_key_enable(&sched_feat_keys[i]); 97 + static_key_enable_cpuslocked(&sched_feat_keys[i]); 98 98 } 99 99 #else 100 100 static void sched_feat_disable(int i) { }; ··· 146 146 147 147 /* Ensure the static_key remains in a consistent state */ 148 148 inode = file_inode(filp); 149 + cpus_read_lock(); 149 150 inode_lock(inode); 150 151 ret = sched_feat_set(cmp); 151 152 inode_unlock(inode); 153 + cpus_read_unlock(); 152 154 if (ret < 0) 153 155 return ret; 154 156
+17 -9
kernel/sched/fair.c
··· 3362 3362 * attach_entity_load_avg - attach this entity to its cfs_rq load avg 3363 3363 * @cfs_rq: cfs_rq to attach to 3364 3364 * @se: sched_entity to attach 3365 + * @flags: migration hints 3365 3366 * 3366 3367 * Must call update_cfs_rq_load_avg() before this, since we rely on 3367 3368 * cfs_rq->avg.last_update_time being current. ··· 7264 7263 { 7265 7264 struct rq *rq = cpu_rq(cpu); 7266 7265 struct cfs_rq *cfs_rq, *pos; 7266 + const struct sched_class *curr_class; 7267 7267 struct rq_flags rf; 7268 7268 bool done = true; 7269 7269 ··· 7301 7299 if (cfs_rq_has_blocked(cfs_rq)) 7302 7300 done = false; 7303 7301 } 7304 - update_rt_rq_load_avg(rq_clock_task(rq), rq, 0); 7305 - update_dl_rq_load_avg(rq_clock_task(rq), rq, 0); 7302 + 7303 + curr_class = rq->curr->sched_class; 7304 + update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); 7305 + update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); 7306 7306 update_irq_load_avg(rq, 0); 7307 7307 /* Don't need periodic decay once load/util_avg are null */ 7308 7308 if (others_have_blocked(rq)) ··· 7369 7365 { 7370 7366 struct rq *rq = cpu_rq(cpu); 7371 7367 struct cfs_rq *cfs_rq = &rq->cfs; 7368 + const struct sched_class *curr_class; 7372 7369 struct rq_flags rf; 7373 7370 7374 7371 rq_lock_irqsave(rq, &rf); 7375 7372 update_rq_clock(rq); 7376 7373 update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq); 7377 - update_rt_rq_load_avg(rq_clock_task(rq), rq, 0); 7378 - update_dl_rq_load_avg(rq_clock_task(rq), rq, 0); 7374 + 7375 + curr_class = rq->curr->sched_class; 7376 + update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class); 7377 + update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class); 7379 7378 update_irq_load_avg(rq, 0); 7380 7379 #ifdef CONFIG_NO_HZ_COMMON 7381 7380 rq->last_blocked_load_update_tick = jiffies; ··· 7489 7482 return load_idx; 7490 7483 } 7491 7484 7492 - static unsigned long scale_rt_capacity(int cpu) 7485 + static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) 7493 7486 { 7494 7487 struct rq *rq = cpu_rq(cpu); 7495 - unsigned long max = arch_scale_cpu_capacity(NULL, cpu); 7488 + unsigned long max = arch_scale_cpu_capacity(sd, cpu); 7496 7489 unsigned long used, free; 7497 7490 unsigned long irq; 7498 7491 ··· 7514 7507 7515 7508 static void update_cpu_capacity(struct sched_domain *sd, int cpu) 7516 7509 { 7517 - unsigned long capacity = scale_rt_capacity(cpu); 7510 + unsigned long capacity = scale_rt_capacity(sd, cpu); 7518 7511 struct sched_group *sdg = sd->groups; 7519 7512 7520 7513 cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu); ··· 8276 8269 force_balance: 8277 8270 /* Looks like there is an imbalance. Compute it */ 8278 8271 calculate_imbalance(env, &sds); 8279 - return sds.busiest; 8272 + return env->imbalance ? sds.busiest : NULL; 8280 8273 8281 8274 out_balanced: 8282 8275 env->imbalance = 0; ··· 9645 9638 * - A task which has been woken up by try_to_wake_up() and 9646 9639 * waiting for actually being woken up by sched_ttwu_pending(). 9647 9640 */ 9648 - if (!se->sum_exec_runtime || p->state == TASK_WAKING) 9641 + if (!se->sum_exec_runtime || 9642 + (p->state == TASK_WAKING && p->sched_remote_wakeup)) 9649 9643 return true; 9650 9644 9651 9645 return false;
+1 -4
kernel/sched/topology.c
··· 1295 1295 1296 1296 n = sched_max_numa_distance; 1297 1297 1298 - if (sched_domains_numa_levels <= 1) { 1298 + if (sched_domains_numa_levels <= 2) { 1299 1299 sched_numa_topology_type = NUMA_DIRECT; 1300 1300 return; 1301 1301 } ··· 1379 1379 if (!sched_debug()) 1380 1380 break; 1381 1381 } 1382 - 1383 - if (!level) 1384 - return; 1385 1382 1386 1383 /* 1387 1384 * 'level' contains the number of unique distances