Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched, delay accounting: fix incorrect delay time when constantly waiting on runqueue
sched: CPU hotplug events must not destroy scheduler domains created by the cpusets
sched: rt-group: fix RR buglet
sched: rt-group: heirarchy aware throttle
sched: rt-group: fix hierarchy
sched: NULL pointer dereference while setting sched_rt_period_us
sched: fix defined-but-unused warning

+73 -29
+6
kernel/cpuset.c
··· 1890 1890 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 1891 1891 scan_for_empty_cpusets(&top_cpuset); 1892 1892 1893 + /* 1894 + * Scheduler destroys domains on hotplug events. 1895 + * Rebuild them based on the current settings. 1896 + */ 1897 + rebuild_sched_domains(); 1898 + 1893 1899 cgroup_unlock(); 1894 1900 } 1895 1901
+25 -2
kernel/sched.c
··· 1127 1127 return HRTIMER_NORESTART; 1128 1128 } 1129 1129 1130 + #ifdef CONFIG_SMP 1130 1131 static void hotplug_hrtick_disable(int cpu) 1131 1132 { 1132 1133 struct rq *rq = cpu_rq(cpu); ··· 1183 1182 { 1184 1183 hotcpu_notifier(hotplug_hrtick, 0); 1185 1184 } 1185 + #endif /* CONFIG_SMP */ 1186 1186 1187 1187 static void init_rq_hrtick(struct rq *rq) 1188 1188 { ··· 7238 7236 } 7239 7237 7240 7238 /* 7239 + * Free current domain masks. 7240 + * Called after all cpus are attached to NULL domain. 7241 + */ 7242 + static void free_sched_domains(void) 7243 + { 7244 + ndoms_cur = 0; 7245 + if (doms_cur != &fallback_doms) 7246 + kfree(doms_cur); 7247 + doms_cur = &fallback_doms; 7248 + } 7249 + 7250 + /* 7241 7251 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7242 7252 * For now this just excludes isolated cpus, but could be used to 7243 7253 * exclude other special cases in the future. ··· 7396 7382 get_online_cpus(); 7397 7383 mutex_lock(&sched_domains_mutex); 7398 7384 detach_destroy_domains(&cpu_online_map); 7385 + free_sched_domains(); 7399 7386 err = arch_init_sched_domains(&cpu_online_map); 7400 7387 mutex_unlock(&sched_domains_mutex); 7401 7388 put_online_cpus(); ··· 7482 7467 case CPU_DOWN_PREPARE: 7483 7468 case CPU_DOWN_PREPARE_FROZEN: 7484 7469 detach_destroy_domains(&cpu_online_map); 7470 + free_sched_domains(); 7485 7471 return NOTIFY_OK; 7486 7472 7487 7473 case CPU_UP_CANCELED: ··· 7501 7485 return NOTIFY_DONE; 7502 7486 } 7503 7487 7488 + #ifndef CONFIG_CPUSETS 7489 + /* 7490 + * Create default domain partitioning if cpusets are disabled. 7491 + * Otherwise we let cpusets rebuild the domains based on the 7492 + * current setup. 7493 + */ 7494 + 7504 7495 /* The hotplug lock is already held by cpu_up/cpu_down */ 7505 7496 arch_init_sched_domains(&cpu_online_map); 7497 + #endif 7506 7498 7507 7499 return NOTIFY_OK; 7508 7500 } ··· 7650 7626 else 7651 7627 rt_se->rt_rq = parent->my_q; 7652 7628 7653 - rt_se->rt_rq = &rq->rt; 7654 7629 rt_se->my_q = rt_rq; 7655 7630 rt_se->parent = parent; 7656 7631 INIT_LIST_HEAD(&rt_se->run_list); ··· 8371 8348 #ifdef CONFIG_CGROUP_SCHED 8372 8349 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 8373 8350 { 8374 - struct task_group *tgi, *parent = tg->parent; 8351 + struct task_group *tgi, *parent = tg ? tg->parent : NULL; 8375 8352 unsigned long total = 0; 8376 8353 8377 8354 if (!parent) {
+36 -27
kernel/sched_rt.c
··· 449 449 #endif 450 450 } 451 451 452 - static void enqueue_rt_entity(struct sched_rt_entity *rt_se) 452 + static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) 453 453 { 454 454 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 455 455 struct rt_prio_array *array = &rt_rq->active; 456 456 struct rt_rq *group_rq = group_rt_rq(rt_se); 457 457 458 - if (group_rq && rt_rq_throttled(group_rq)) 458 + /* 459 + * Don't enqueue the group if its throttled, or when empty. 460 + * The latter is a consequence of the former when a child group 461 + * get throttled and the current group doesn't have any other 462 + * active members. 463 + */ 464 + if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 459 465 return; 460 466 461 467 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); ··· 470 464 inc_rt_tasks(rt_se, rt_rq); 471 465 } 472 466 473 - static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 467 + static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 474 468 { 475 469 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 476 470 struct rt_prio_array *array = &rt_rq->active; ··· 486 480 * Because the prio of an upper entry depends on the lower 487 481 * entries, we must remove entries top - down. 488 482 */ 489 - static void dequeue_rt_stack(struct task_struct *p) 483 + static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 490 484 { 491 - struct sched_rt_entity *rt_se, *back = NULL; 485 + struct sched_rt_entity *back = NULL; 492 486 493 - rt_se = &p->rt; 494 487 for_each_sched_rt_entity(rt_se) { 495 488 rt_se->back = back; 496 489 back = rt_se; ··· 497 492 498 493 for (rt_se = back; rt_se; rt_se = rt_se->back) { 499 494 if (on_rt_rq(rt_se)) 500 - dequeue_rt_entity(rt_se); 495 + __dequeue_rt_entity(rt_se); 496 + } 497 + } 498 + 499 + static void enqueue_rt_entity(struct sched_rt_entity *rt_se) 500 + { 501 + dequeue_rt_stack(rt_se); 502 + for_each_sched_rt_entity(rt_se) 503 + __enqueue_rt_entity(rt_se); 504 + } 505 + 506 + static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 507 + { 508 + dequeue_rt_stack(rt_se); 509 + 510 + for_each_sched_rt_entity(rt_se) { 511 + struct rt_rq *rt_rq = group_rt_rq(rt_se); 512 + 513 + if (rt_rq && rt_rq->rt_nr_running) 514 + __enqueue_rt_entity(rt_se); 501 515 } 502 516 } 503 517 ··· 530 506 if (wakeup) 531 507 rt_se->timeout = 0; 532 508 533 - dequeue_rt_stack(p); 534 - 535 - /* 536 - * enqueue everybody, bottom - up. 537 - */ 538 - for_each_sched_rt_entity(rt_se) 539 - enqueue_rt_entity(rt_se); 509 + enqueue_rt_entity(rt_se); 540 510 } 541 511 542 512 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 543 513 { 544 514 struct sched_rt_entity *rt_se = &p->rt; 545 - struct rt_rq *rt_rq; 546 515 547 516 update_curr_rt(rq); 548 - 549 - dequeue_rt_stack(p); 550 - 551 - /* 552 - * re-enqueue all non-empty rt_rq entities. 553 - */ 554 - for_each_sched_rt_entity(rt_se) { 555 - rt_rq = group_rt_rq(rt_se); 556 - if (rt_rq && rt_rq->rt_nr_running) 557 - enqueue_rt_entity(rt_se); 558 - } 517 + dequeue_rt_entity(rt_se); 559 518 } 560 519 561 520 /* ··· 549 542 void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 550 543 { 551 544 struct rt_prio_array *array = &rt_rq->active; 545 + struct list_head *queue = array->queue + rt_se_prio(rt_se); 552 546 553 - list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); 547 + if (on_rt_rq(rt_se)) 548 + list_move_tail(&rt_se->run_list, queue); 554 549 } 555 550 556 551 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+6
kernel/sched_stats.h
··· 198 198 /* 199 199 * Called when a process ceases being the active-running process, either 200 200 * voluntarily or involuntarily. Now we can calculate how long we ran. 201 + * Also, if the process is still in the TASK_RUNNING state, call 202 + * sched_info_queued() to mark that it has now again started waiting on 203 + * the runqueue. 201 204 */ 202 205 static inline void sched_info_depart(struct task_struct *t) 203 206 { ··· 209 206 210 207 t->sched_info.cpu_time += delta; 211 208 rq_sched_info_depart(task_rq(t), delta); 209 + 210 + if (t->state == TASK_RUNNING) 211 + sched_info_queued(t); 212 212 } 213 213 214 214 /*