Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched

* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
sched: fix startup penalty calculation
sched: simplify bonus calculation #2
sched: simplify bonus calculation #1
sched: tidy up and simplify the bonus balance
sched: optimize task_tick_rt() a bit
sched: simplify can_migrate_task()
sched: remove HZ dependency from the granularity default
sched: CONFIG_SCHED_GROUP_FAIR=y fixlet

+28 -26
+1 -7
kernel/sched.c
··· 2180 if (task_running(rq, p)) 2181 return 0; 2182 2183 - /* 2184 - * Aggressive migration if too many balance attempts have failed: 2185 - */ 2186 - if (sd->nr_balance_failed > sd->cache_nice_tries) 2187 - return 1; 2188 - 2189 return 1; 2190 } 2191 ··· 4917 if (sysctl_sched_granularity > gran_limit) 4918 sysctl_sched_granularity = gran_limit; 4919 4920 - sysctl_sched_runtime_limit = sysctl_sched_granularity * 8; 4921 sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; 4922 } 4923
··· 2180 if (task_running(rq, p)) 2181 return 0; 2182 2183 return 1; 2184 } 2185 ··· 4923 if (sysctl_sched_granularity > gran_limit) 4924 sysctl_sched_granularity = gran_limit; 4925 4926 + sysctl_sched_runtime_limit = sysctl_sched_granularity * 5; 4927 sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; 4928 } 4929
+19 -16
kernel/sched_fair.c
··· 19 20 /* 21 * Preemption granularity: 22 - * (default: 2 msec, units: nanoseconds) 23 * 24 * NOTE: this granularity value is not the same as the concept of 25 * 'timeslice length' - timeslices in CFS will typically be somewhat ··· 31 * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way 32 * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) 33 */ 34 - unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ; 35 36 /* 37 * SCHED_BATCH wake-up granularity. 38 - * (default: 10 msec, units: nanoseconds) 39 * 40 * This option delays the preemption effects of decoupled workloads 41 * and reduces their over-scheduling. Synchronous workloads will still 42 * have immediate wakeup/sleep latencies. 43 */ 44 - unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 45 - 10000000000ULL/HZ; 46 47 /* 48 * SCHED_OTHER wake-up granularity. ··· 51 * and reduces their over-scheduling. Synchronous workloads will still 52 * have immediate wakeup/sleep latencies. 53 */ 54 - unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ; 55 56 unsigned int sysctl_sched_stat_granularity __read_mostly; 57 58 /* 59 - * Initialized in sched_init_granularity(): 60 */ 61 unsigned int sysctl_sched_runtime_limit __read_mostly; 62 ··· 303 delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); 304 305 if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { 306 - delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); 307 - delta = calc_delta_mine(delta, curr->load.weight, lw); 308 - delta = min((u64)delta, cfs_rq->sleeper_bonus); 309 cfs_rq->sleeper_bonus -= delta; 310 delta_mine -= delta; 311 } ··· 493 unsigned long load = cfs_rq->load.weight, delta_fair; 494 long prev_runtime; 495 496 if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) 497 load = rq_of(cfs_rq)->cpu_load[2]; 498 ··· 519 520 prev_runtime = se->wait_runtime; 521 __add_wait_runtime(cfs_rq, se, delta_fair); 522 delta_fair = se->wait_runtime - prev_runtime; 523 524 /* 525 * Track the amount of bonus we've given to sleepers: 526 */ 527 cfs_rq->sleeper_bonus += delta_fair; 528 - if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) 529 - cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit; 530 - 531 - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 532 } 533 534 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) ··· 1047 * -granularity/2, so initialize the task with that: 1048 */ 1049 if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) 1050 - p->se.wait_runtime = -(sysctl_sched_granularity / 2); 1051 1052 __enqueue_entity(cfs_rq, se); 1053 } ··· 1060 */ 1061 static void set_curr_task_fair(struct rq *rq) 1062 { 1063 - struct sched_entity *se = &rq->curr.se; 1064 1065 for_each_sched_entity(se) 1066 set_next_entity(cfs_rq_of(se), se);
··· 19 20 /* 21 * Preemption granularity: 22 + * (default: 10 msec, units: nanoseconds) 23 * 24 * NOTE: this granularity value is not the same as the concept of 25 * 'timeslice length' - timeslices in CFS will typically be somewhat ··· 31 * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way 32 * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) 33 */ 34 + unsigned int sysctl_sched_granularity __read_mostly = 10000000UL; 35 36 /* 37 * SCHED_BATCH wake-up granularity. 38 + * (default: 25 msec, units: nanoseconds) 39 * 40 * This option delays the preemption effects of decoupled workloads 41 * and reduces their over-scheduling. Synchronous workloads will still 42 * have immediate wakeup/sleep latencies. 43 */ 44 + unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL; 45 46 /* 47 * SCHED_OTHER wake-up granularity. ··· 52 * and reduces their over-scheduling. Synchronous workloads will still 53 * have immediate wakeup/sleep latencies. 54 */ 55 + unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL; 56 57 unsigned int sysctl_sched_stat_granularity __read_mostly; 58 59 /* 60 + * Initialized in sched_init_granularity() [to 5 times the base granularity]: 61 */ 62 unsigned int sysctl_sched_runtime_limit __read_mostly; 63 ··· 304 delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); 305 306 if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { 307 + delta = min((u64)delta_mine, cfs_rq->sleeper_bonus); 308 + delta = min(delta, (unsigned long)( 309 + (long)sysctl_sched_runtime_limit - curr->wait_runtime)); 310 cfs_rq->sleeper_bonus -= delta; 311 delta_mine -= delta; 312 } ··· 494 unsigned long load = cfs_rq->load.weight, delta_fair; 495 long prev_runtime; 496 497 + /* 498 + * Do not boost sleepers if there's too much bonus 'in flight' 499 + * already: 500 + */ 501 + if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) 502 + return; 503 + 504 if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) 505 load = rq_of(cfs_rq)->cpu_load[2]; 506 ··· 513 514 prev_runtime = se->wait_runtime; 515 __add_wait_runtime(cfs_rq, se, delta_fair); 516 + schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 517 delta_fair = se->wait_runtime - prev_runtime; 518 519 /* 520 * Track the amount of bonus we've given to sleepers: 521 */ 522 cfs_rq->sleeper_bonus += delta_fair; 523 } 524 525 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) ··· 1044 * -granularity/2, so initialize the task with that: 1045 */ 1046 if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) 1047 + p->se.wait_runtime = -((long)sysctl_sched_granularity / 2); 1048 1049 __enqueue_entity(cfs_rq, se); 1050 } ··· 1057 */ 1058 static void set_curr_task_fair(struct rq *rq) 1059 { 1060 + struct sched_entity *se = &rq->curr->se; 1061 1062 for_each_sched_entity(se) 1063 set_next_entity(cfs_rq_of(se), se);
+8 -3
kernel/sched_rt.c
··· 207 return; 208 209 p->time_slice = static_prio_timeslice(p->static_prio); 210 - set_tsk_need_resched(p); 211 212 - /* put it at the end of the queue: */ 213 - requeue_task_rt(rq, p); 214 } 215 216 static struct sched_class rt_sched_class __read_mostly = {
··· 207 return; 208 209 p->time_slice = static_prio_timeslice(p->static_prio); 210 211 + /* 212 + * Requeue to the end of queue if we are not the only element 213 + * on the queue: 214 + */ 215 + if (p->run_list.prev != p->run_list.next) { 216 + requeue_task_rt(rq, p); 217 + set_tsk_need_resched(p); 218 + } 219 } 220 221 static struct sched_class rt_sched_class __read_mostly = {