Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched

* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
sched: fix ideal_runtime calculations for reniced tasks
sched: improve prev_sum_exec_runtime setting
sched: simplify __check_preempt_curr_fair()
sched: fix xtensa build warning
sched: debug: fix sum_exec_runtime clearing
sched: debug: fix cfs_rq->wait_runtime accounting
sched: fix niced_granularity() shift
sched: fix MC/HT scheduler optimization, without breaking the FUZZ logic.

+38 -37
+6 -9
kernel/sched.c
··· 668 /* 669 * Shift right and round: 670 */ 671 - #define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 672 673 static unsigned long 674 calc_delta_mine(unsigned long delta_exec, unsigned long weight, ··· 684 * Check whether we'd overflow the 64-bit multiplication: 685 */ 686 if (unlikely(tmp > WMULT_CONST)) 687 - tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, 688 WMULT_SHIFT/2); 689 else 690 - tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); 691 692 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 693 } ··· 858 859 static void set_load_weight(struct task_struct *p) 860 { 861 - task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; 862 p->se.wait_runtime = 0; 863 864 if (task_has_rt_policy(p)) { ··· 2511 * a think about bumping its value to force at least one task to be 2512 * moved 2513 */ 2514 - if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) { 2515 unsigned long tmp, pwr_now, pwr_move; 2516 unsigned int imbn; 2517 ··· 2563 pwr_move /= SCHED_LOAD_SCALE; 2564 2565 /* Move if we gain throughput */ 2566 - if (pwr_move <= pwr_now) 2567 - goto out_balanced; 2568 - 2569 - *imbalance = busiest_load_per_task; 2570 } 2571 2572 return busiest;
··· 668 /* 669 * Shift right and round: 670 */ 671 + #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 672 673 static unsigned long 674 calc_delta_mine(unsigned long delta_exec, unsigned long weight, ··· 684 * Check whether we'd overflow the 64-bit multiplication: 685 */ 686 if (unlikely(tmp > WMULT_CONST)) 687 + tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, 688 WMULT_SHIFT/2); 689 else 690 + tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); 691 692 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 693 } ··· 858 859 static void set_load_weight(struct task_struct *p) 860 { 861 p->se.wait_runtime = 0; 862 863 if (task_has_rt_policy(p)) { ··· 2512 * a think about bumping its value to force at least one task to be 2513 * moved 2514 */ 2515 + if (*imbalance < busiest_load_per_task) { 2516 unsigned long tmp, pwr_now, pwr_move; 2517 unsigned int imbn; 2518 ··· 2564 pwr_move /= SCHED_LOAD_SCALE; 2565 2566 /* Move if we gain throughput */ 2567 + if (pwr_move > pwr_now) 2568 + *imbalance = busiest_load_per_task; 2569 } 2570 2571 return busiest;
+1
kernel/sched_debug.c
··· 283 p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; 284 #endif 285 p->se.sum_exec_runtime = 0; 286 }
··· 283 p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; 284 #endif 285 p->se.sum_exec_runtime = 0; 286 + p->se.prev_sum_exec_runtime = 0; 287 }
+31 -28
kernel/sched_fair.c
··· 194 update_load_add(&cfs_rq->load, se->load.weight); 195 cfs_rq->nr_running++; 196 se->on_rq = 1; 197 } 198 199 static inline void ··· 207 update_load_sub(&cfs_rq->load, se->load.weight); 208 cfs_rq->nr_running--; 209 se->on_rq = 0; 210 } 211 212 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) ··· 295 /* 296 * It will always fit into 'long': 297 */ 298 - return (long) (tmp >> WMULT_SHIFT); 299 } 300 301 static inline void ··· 578 579 prev_runtime = se->wait_runtime; 580 __add_wait_runtime(cfs_rq, se, delta_fair); 581 - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 582 delta_fair = se->wait_runtime - prev_runtime; 583 584 /* ··· 665 if (tsk->state & TASK_UNINTERRUPTIBLE) 666 se->block_start = rq_of(cfs_rq)->clock; 667 } 668 - cfs_rq->wait_runtime -= se->wait_runtime; 669 #endif 670 } 671 __dequeue_entity(cfs_rq, se); ··· 673 /* 674 * Preempt the current task with a newly woken task if needed: 675 */ 676 - static int 677 __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, 678 struct sched_entity *curr, unsigned long granularity) 679 { 680 s64 __delta = curr->fair_key - se->fair_key; 681 682 /* 683 * Take scheduling granularity into account - do not 684 * preempt the current task unless the best task has 685 * a larger than sched_granularity fairness advantage: 686 */ 687 - if (__delta > niced_granularity(curr, granularity)) { 688 resched_task(rq_of(cfs_rq)->curr); 689 - return 1; 690 - } 691 - return 0; 692 } 693 694 static inline void ··· 721 update_stats_wait_end(cfs_rq, se); 722 update_stats_curr_start(cfs_rq, se); 723 set_cfs_rq_curr(cfs_rq, se); 724 } 725 726 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) ··· 751 752 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) 753 { 754 - unsigned long gran, ideal_runtime, delta_exec; 755 struct sched_entity *next; 756 757 /* ··· 767 if (next == curr) 768 return; 769 770 - gran = sched_granularity(cfs_rq); 771 - ideal_runtime = niced_granularity(curr, 772 - max(sysctl_sched_latency / cfs_rq->nr_running, 773 - (unsigned long)sysctl_sched_min_granularity)); 774 - /* 775 - * If we executed more than what the latency constraint suggests, 776 - * reduce the rescheduling granularity. This way the total latency 777 - * of how much a task is not scheduled converges to 778 - * sysctl_sched_latency: 779 - */ 780 - delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 781 - if (delta_exec > ideal_runtime) 782 - gran = 0; 783 - 784 - if (__check_preempt_curr_fair(cfs_rq, next, curr, gran)) 785 - curr->prev_sum_exec_runtime = curr->sum_exec_runtime; 786 } 787 788 /************************************************** ··· 1126 * The statistical average of wait_runtime is about 1127 * -granularity/2, so initialize the task with that: 1128 */ 1129 - if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) { 1130 se->wait_runtime = -(sched_granularity(cfs_rq) / 2); 1131 - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 1132 - } 1133 1134 __enqueue_entity(cfs_rq, se); 1135 }
··· 194 update_load_add(&cfs_rq->load, se->load.weight); 195 cfs_rq->nr_running++; 196 se->on_rq = 1; 197 + 198 + schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 199 } 200 201 static inline void ··· 205 update_load_sub(&cfs_rq->load, se->load.weight); 206 cfs_rq->nr_running--; 207 se->on_rq = 0; 208 + 209 + schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime); 210 } 211 212 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) ··· 291 /* 292 * It will always fit into 'long': 293 */ 294 + return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT)); 295 } 296 297 static inline void ··· 574 575 prev_runtime = se->wait_runtime; 576 __add_wait_runtime(cfs_rq, se, delta_fair); 577 delta_fair = se->wait_runtime - prev_runtime; 578 579 /* ··· 662 if (tsk->state & TASK_UNINTERRUPTIBLE) 663 se->block_start = rq_of(cfs_rq)->clock; 664 } 665 #endif 666 } 667 __dequeue_entity(cfs_rq, se); ··· 671 /* 672 * Preempt the current task with a newly woken task if needed: 673 */ 674 + static void 675 __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, 676 struct sched_entity *curr, unsigned long granularity) 677 { 678 s64 __delta = curr->fair_key - se->fair_key; 679 + unsigned long ideal_runtime, delta_exec; 680 + 681 + /* 682 + * ideal_runtime is compared against sum_exec_runtime, which is 683 + * walltime, hence do not scale. 684 + */ 685 + ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, 686 + (unsigned long)sysctl_sched_min_granularity); 687 + 688 + /* 689 + * If we executed more than what the latency constraint suggests, 690 + * reduce the rescheduling granularity. This way the total latency 691 + * of how much a task is not scheduled converges to 692 + * sysctl_sched_latency: 693 + */ 694 + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 695 + if (delta_exec > ideal_runtime) 696 + granularity = 0; 697 698 /* 699 * Take scheduling granularity into account - do not 700 * preempt the current task unless the best task has 701 * a larger than sched_granularity fairness advantage: 702 + * 703 + * scale granularity as key space is in fair_clock. 704 */ 705 + if (__delta > niced_granularity(curr, granularity)) 706 resched_task(rq_of(cfs_rq)->curr); 707 } 708 709 static inline void ··· 702 update_stats_wait_end(cfs_rq, se); 703 update_stats_curr_start(cfs_rq, se); 704 set_cfs_rq_curr(cfs_rq, se); 705 + se->prev_sum_exec_runtime = se->sum_exec_runtime; 706 } 707 708 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) ··· 731 732 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) 733 { 734 struct sched_entity *next; 735 736 /* ··· 748 if (next == curr) 749 return; 750 751 + __check_preempt_curr_fair(cfs_rq, next, curr, 752 + sched_granularity(cfs_rq)); 753 } 754 755 /************************************************** ··· 1121 * The statistical average of wait_runtime is about 1122 * -granularity/2, so initialize the task with that: 1123 */ 1124 + if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) 1125 se->wait_runtime = -(sched_granularity(cfs_rq) / 2); 1126 1127 __enqueue_entity(cfs_rq, se); 1128 }