Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched, delay accounting: fix incorrect delay time when constantly waiting on runqueue
sched: CPU hotplug events must not destroy scheduler domains created by the cpusets
sched: rt-group: fix RR buglet
sched: rt-group: heirarchy aware throttle
sched: rt-group: fix hierarchy
sched: NULL pointer dereference while setting sched_rt_period_us
sched: fix defined-but-unused warning

+73 -29
+6
kernel/cpuset.c
··· 1890 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 1891 scan_for_empty_cpusets(&top_cpuset); 1892 1893 cgroup_unlock(); 1894 } 1895
··· 1890 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 1891 scan_for_empty_cpusets(&top_cpuset); 1892 1893 + /* 1894 + * Scheduler destroys domains on hotplug events. 1895 + * Rebuild them based on the current settings. 1896 + */ 1897 + rebuild_sched_domains(); 1898 + 1899 cgroup_unlock(); 1900 } 1901
+25 -2
kernel/sched.c
··· 1127 return HRTIMER_NORESTART; 1128 } 1129 1130 static void hotplug_hrtick_disable(int cpu) 1131 { 1132 struct rq *rq = cpu_rq(cpu); ··· 1183 { 1184 hotcpu_notifier(hotplug_hrtick, 0); 1185 } 1186 1187 static void init_rq_hrtick(struct rq *rq) 1188 { ··· 7238 } 7239 7240 /* 7241 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7242 * For now this just excludes isolated cpus, but could be used to 7243 * exclude other special cases in the future. ··· 7396 get_online_cpus(); 7397 mutex_lock(&sched_domains_mutex); 7398 detach_destroy_domains(&cpu_online_map); 7399 err = arch_init_sched_domains(&cpu_online_map); 7400 mutex_unlock(&sched_domains_mutex); 7401 put_online_cpus(); ··· 7482 case CPU_DOWN_PREPARE: 7483 case CPU_DOWN_PREPARE_FROZEN: 7484 detach_destroy_domains(&cpu_online_map); 7485 return NOTIFY_OK; 7486 7487 case CPU_UP_CANCELED: ··· 7501 return NOTIFY_DONE; 7502 } 7503 7504 /* The hotplug lock is already held by cpu_up/cpu_down */ 7505 arch_init_sched_domains(&cpu_online_map); 7506 7507 return NOTIFY_OK; 7508 } ··· 7650 else 7651 rt_se->rt_rq = parent->my_q; 7652 7653 - rt_se->rt_rq = &rq->rt; 7654 rt_se->my_q = rt_rq; 7655 rt_se->parent = parent; 7656 INIT_LIST_HEAD(&rt_se->run_list); ··· 8371 #ifdef CONFIG_CGROUP_SCHED 8372 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 8373 { 8374 - struct task_group *tgi, *parent = tg->parent; 8375 unsigned long total = 0; 8376 8377 if (!parent) {
··· 1127 return HRTIMER_NORESTART; 1128 } 1129 1130 + #ifdef CONFIG_SMP 1131 static void hotplug_hrtick_disable(int cpu) 1132 { 1133 struct rq *rq = cpu_rq(cpu); ··· 1182 { 1183 hotcpu_notifier(hotplug_hrtick, 0); 1184 } 1185 + #endif /* CONFIG_SMP */ 1186 1187 static void init_rq_hrtick(struct rq *rq) 1188 { ··· 7236 } 7237 7238 /* 7239 + * Free current domain masks. 7240 + * Called after all cpus are attached to NULL domain. 7241 + */ 7242 + static void free_sched_domains(void) 7243 + { 7244 + ndoms_cur = 0; 7245 + if (doms_cur != &fallback_doms) 7246 + kfree(doms_cur); 7247 + doms_cur = &fallback_doms; 7248 + } 7249 + 7250 + /* 7251 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7252 * For now this just excludes isolated cpus, but could be used to 7253 * exclude other special cases in the future. ··· 7382 get_online_cpus(); 7383 mutex_lock(&sched_domains_mutex); 7384 detach_destroy_domains(&cpu_online_map); 7385 + free_sched_domains(); 7386 err = arch_init_sched_domains(&cpu_online_map); 7387 mutex_unlock(&sched_domains_mutex); 7388 put_online_cpus(); ··· 7467 case CPU_DOWN_PREPARE: 7468 case CPU_DOWN_PREPARE_FROZEN: 7469 detach_destroy_domains(&cpu_online_map); 7470 + free_sched_domains(); 7471 return NOTIFY_OK; 7472 7473 case CPU_UP_CANCELED: ··· 7485 return NOTIFY_DONE; 7486 } 7487 7488 + #ifndef CONFIG_CPUSETS 7489 + /* 7490 + * Create default domain partitioning if cpusets are disabled. 7491 + * Otherwise we let cpusets rebuild the domains based on the 7492 + * current setup. 7493 + */ 7494 + 7495 /* The hotplug lock is already held by cpu_up/cpu_down */ 7496 arch_init_sched_domains(&cpu_online_map); 7497 + #endif 7498 7499 return NOTIFY_OK; 7500 } ··· 7626 else 7627 rt_se->rt_rq = parent->my_q; 7628 7629 rt_se->my_q = rt_rq; 7630 rt_se->parent = parent; 7631 INIT_LIST_HEAD(&rt_se->run_list); ··· 8348 #ifdef CONFIG_CGROUP_SCHED 8349 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 8350 { 8351 + struct task_group *tgi, *parent = tg ? tg->parent : NULL; 8352 unsigned long total = 0; 8353 8354 if (!parent) {
+36 -27
kernel/sched_rt.c
··· 449 #endif 450 } 451 452 - static void enqueue_rt_entity(struct sched_rt_entity *rt_se) 453 { 454 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 455 struct rt_prio_array *array = &rt_rq->active; 456 struct rt_rq *group_rq = group_rt_rq(rt_se); 457 458 - if (group_rq && rt_rq_throttled(group_rq)) 459 return; 460 461 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); ··· 470 inc_rt_tasks(rt_se, rt_rq); 471 } 472 473 - static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 474 { 475 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 476 struct rt_prio_array *array = &rt_rq->active; ··· 486 * Because the prio of an upper entry depends on the lower 487 * entries, we must remove entries top - down. 488 */ 489 - static void dequeue_rt_stack(struct task_struct *p) 490 { 491 - struct sched_rt_entity *rt_se, *back = NULL; 492 493 - rt_se = &p->rt; 494 for_each_sched_rt_entity(rt_se) { 495 rt_se->back = back; 496 back = rt_se; ··· 497 498 for (rt_se = back; rt_se; rt_se = rt_se->back) { 499 if (on_rt_rq(rt_se)) 500 - dequeue_rt_entity(rt_se); 501 } 502 } 503 ··· 530 if (wakeup) 531 rt_se->timeout = 0; 532 533 - dequeue_rt_stack(p); 534 - 535 - /* 536 - * enqueue everybody, bottom - up. 537 - */ 538 - for_each_sched_rt_entity(rt_se) 539 - enqueue_rt_entity(rt_se); 540 } 541 542 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 543 { 544 struct sched_rt_entity *rt_se = &p->rt; 545 - struct rt_rq *rt_rq; 546 547 update_curr_rt(rq); 548 - 549 - dequeue_rt_stack(p); 550 - 551 - /* 552 - * re-enqueue all non-empty rt_rq entities. 553 - */ 554 - for_each_sched_rt_entity(rt_se) { 555 - rt_rq = group_rt_rq(rt_se); 556 - if (rt_rq && rt_rq->rt_nr_running) 557 - enqueue_rt_entity(rt_se); 558 - } 559 } 560 561 /* ··· 549 void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 550 { 551 struct rt_prio_array *array = &rt_rq->active; 552 553 - list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); 554 } 555 556 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
··· 449 #endif 450 } 451 452 + static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) 453 { 454 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 455 struct rt_prio_array *array = &rt_rq->active; 456 struct rt_rq *group_rq = group_rt_rq(rt_se); 457 458 + /* 459 + * Don't enqueue the group if its throttled, or when empty. 460 + * The latter is a consequence of the former when a child group 461 + * get throttled and the current group doesn't have any other 462 + * active members. 463 + */ 464 + if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 465 return; 466 467 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); ··· 464 inc_rt_tasks(rt_se, rt_rq); 465 } 466 467 + static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 468 { 469 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 470 struct rt_prio_array *array = &rt_rq->active; ··· 480 * Because the prio of an upper entry depends on the lower 481 * entries, we must remove entries top - down. 482 */ 483 + static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 484 { 485 + struct sched_rt_entity *back = NULL; 486 487 for_each_sched_rt_entity(rt_se) { 488 rt_se->back = back; 489 back = rt_se; ··· 492 493 for (rt_se = back; rt_se; rt_se = rt_se->back) { 494 if (on_rt_rq(rt_se)) 495 + __dequeue_rt_entity(rt_se); 496 + } 497 + } 498 + 499 + static void enqueue_rt_entity(struct sched_rt_entity *rt_se) 500 + { 501 + dequeue_rt_stack(rt_se); 502 + for_each_sched_rt_entity(rt_se) 503 + __enqueue_rt_entity(rt_se); 504 + } 505 + 506 + static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 507 + { 508 + dequeue_rt_stack(rt_se); 509 + 510 + for_each_sched_rt_entity(rt_se) { 511 + struct rt_rq *rt_rq = group_rt_rq(rt_se); 512 + 513 + if (rt_rq && rt_rq->rt_nr_running) 514 + __enqueue_rt_entity(rt_se); 515 } 516 } 517 ··· 506 if (wakeup) 507 rt_se->timeout = 0; 508 509 + enqueue_rt_entity(rt_se); 510 } 511 512 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 513 { 514 struct sched_rt_entity *rt_se = &p->rt; 515 516 update_curr_rt(rq); 517 + dequeue_rt_entity(rt_se); 518 } 519 520 /* ··· 542 void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 543 { 544 struct rt_prio_array *array = &rt_rq->active; 545 + struct list_head *queue = array->queue + rt_se_prio(rt_se); 546 547 + if (on_rt_rq(rt_se)) 548 + list_move_tail(&rt_se->run_list, queue); 549 } 550 551 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+6
kernel/sched_stats.h
··· 198 /* 199 * Called when a process ceases being the active-running process, either 200 * voluntarily or involuntarily. Now we can calculate how long we ran. 201 */ 202 static inline void sched_info_depart(struct task_struct *t) 203 { ··· 209 210 t->sched_info.cpu_time += delta; 211 rq_sched_info_depart(task_rq(t), delta); 212 } 213 214 /*
··· 198 /* 199 * Called when a process ceases being the active-running process, either 200 * voluntarily or involuntarily. Now we can calculate how long we ran. 201 + * Also, if the process is still in the TASK_RUNNING state, call 202 + * sched_info_queued() to mark that it has now again started waiting on 203 + * the runqueue. 204 */ 205 static inline void sched_info_depart(struct task_struct *t) 206 { ··· 206 207 t->sched_info.cpu_time += delta; 208 rq_sched_info_depart(task_rq(t), delta); 209 + 210 + if (t->state == TASK_RUNNING) 211 + sched_info_queued(t); 212 } 213 214 /*