Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched

* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
sched: fix ideal_runtime calculations for reniced tasks
sched: improve prev_sum_exec_runtime setting
sched: simplify __check_preempt_curr_fair()
sched: fix xtensa build warning
sched: debug: fix sum_exec_runtime clearing
sched: debug: fix cfs_rq->wait_runtime accounting
sched: fix niced_granularity() shift
sched: fix MC/HT scheduler optimization, without breaking the FUZZ logic.

+38 -37
+6 -9
kernel/sched.c
··· 668 668 /* 669 669 * Shift right and round: 670 670 */ 671 - #define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 671 + #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 672 672 673 673 static unsigned long 674 674 calc_delta_mine(unsigned long delta_exec, unsigned long weight, ··· 684 684 * Check whether we'd overflow the 64-bit multiplication: 685 685 */ 686 686 if (unlikely(tmp > WMULT_CONST)) 687 - tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, 687 + tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, 688 688 WMULT_SHIFT/2); 689 689 else 690 - tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); 690 + tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); 691 691 692 692 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 693 693 } ··· 858 858 859 859 static void set_load_weight(struct task_struct *p) 860 860 { 861 - task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; 862 861 p->se.wait_runtime = 0; 863 862 864 863 if (task_has_rt_policy(p)) { ··· 2511 2512 * a think about bumping its value to force at least one task to be 2512 2513 * moved 2513 2514 */ 2514 - if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) { 2515 + if (*imbalance < busiest_load_per_task) { 2515 2516 unsigned long tmp, pwr_now, pwr_move; 2516 2517 unsigned int imbn; 2517 2518 ··· 2563 2564 pwr_move /= SCHED_LOAD_SCALE; 2564 2565 2565 2566 /* Move if we gain throughput */ 2566 - if (pwr_move <= pwr_now) 2567 - goto out_balanced; 2568 - 2569 - *imbalance = busiest_load_per_task; 2567 + if (pwr_move > pwr_now) 2568 + *imbalance = busiest_load_per_task; 2570 2569 } 2571 2570 2572 2571 return busiest;
+1
kernel/sched_debug.c
··· 283 283 p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; 284 284 #endif 285 285 p->se.sum_exec_runtime = 0; 286 + p->se.prev_sum_exec_runtime = 0; 286 287 }
+31 -28
kernel/sched_fair.c
··· 194 194 update_load_add(&cfs_rq->load, se->load.weight); 195 195 cfs_rq->nr_running++; 196 196 se->on_rq = 1; 197 + 198 + schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 197 199 } 198 200 199 201 static inline void ··· 207 205 update_load_sub(&cfs_rq->load, se->load.weight); 208 206 cfs_rq->nr_running--; 209 207 se->on_rq = 0; 208 + 209 + schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime); 210 210 } 211 211 212 212 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) ··· 295 291 /* 296 292 * It will always fit into 'long': 297 293 */ 298 - return (long) (tmp >> WMULT_SHIFT); 294 + return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT)); 299 295 } 300 296 301 297 static inline void ··· 578 574 579 575 prev_runtime = se->wait_runtime; 580 576 __add_wait_runtime(cfs_rq, se, delta_fair); 581 - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 582 577 delta_fair = se->wait_runtime - prev_runtime; 583 578 584 579 /* ··· 665 662 if (tsk->state & TASK_UNINTERRUPTIBLE) 666 663 se->block_start = rq_of(cfs_rq)->clock; 667 664 } 668 - cfs_rq->wait_runtime -= se->wait_runtime; 669 665 #endif 670 666 } 671 667 __dequeue_entity(cfs_rq, se); ··· 673 671 /* 674 672 * Preempt the current task with a newly woken task if needed: 675 673 */ 676 - static int 674 + static void 677 675 __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, 678 676 struct sched_entity *curr, unsigned long granularity) 679 677 { 680 678 s64 __delta = curr->fair_key - se->fair_key; 679 + unsigned long ideal_runtime, delta_exec; 680 + 681 + /* 682 + * ideal_runtime is compared against sum_exec_runtime, which is 683 + * walltime, hence do not scale. 684 + */ 685 + ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, 686 + (unsigned long)sysctl_sched_min_granularity); 687 + 688 + /* 689 + * If we executed more than what the latency constraint suggests, 690 + * reduce the rescheduling granularity. This way the total latency 691 + * of how much a task is not scheduled converges to 692 + * sysctl_sched_latency: 693 + */ 694 + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 695 + if (delta_exec > ideal_runtime) 696 + granularity = 0; 681 697 682 698 /* 683 699 * Take scheduling granularity into account - do not 684 700 * preempt the current task unless the best task has 685 701 * a larger than sched_granularity fairness advantage: 702 + * 703 + * scale granularity as key space is in fair_clock. 686 704 */ 687 - if (__delta > niced_granularity(curr, granularity)) { 705 + if (__delta > niced_granularity(curr, granularity)) 688 706 resched_task(rq_of(cfs_rq)->curr); 689 - return 1; 690 - } 691 - return 0; 692 707 } 693 708 694 709 static inline void ··· 721 702 update_stats_wait_end(cfs_rq, se); 722 703 update_stats_curr_start(cfs_rq, se); 723 704 set_cfs_rq_curr(cfs_rq, se); 705 + se->prev_sum_exec_runtime = se->sum_exec_runtime; 724 706 } 725 707 726 708 static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) ··· 751 731 752 732 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) 753 733 { 754 - unsigned long gran, ideal_runtime, delta_exec; 755 734 struct sched_entity *next; 756 735 757 736 /* ··· 767 748 if (next == curr) 768 749 return; 769 750 770 - gran = sched_granularity(cfs_rq); 771 - ideal_runtime = niced_granularity(curr, 772 - max(sysctl_sched_latency / cfs_rq->nr_running, 773 - (unsigned long)sysctl_sched_min_granularity)); 774 - /* 775 - * If we executed more than what the latency constraint suggests, 776 - * reduce the rescheduling granularity. This way the total latency 777 - * of how much a task is not scheduled converges to 778 - * sysctl_sched_latency: 779 - */ 780 - delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 781 - if (delta_exec > ideal_runtime) 782 - gran = 0; 783 - 784 - if (__check_preempt_curr_fair(cfs_rq, next, curr, gran)) 785 - curr->prev_sum_exec_runtime = curr->sum_exec_runtime; 751 + __check_preempt_curr_fair(cfs_rq, next, curr, 752 + sched_granularity(cfs_rq)); 786 753 } 787 754 788 755 /************************************************** ··· 1126 1121 * The statistical average of wait_runtime is about 1127 1122 * -granularity/2, so initialize the task with that: 1128 1123 */ 1129 - if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) { 1124 + if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) 1130 1125 se->wait_runtime = -(sched_granularity(cfs_rq) / 2); 1131 - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 1132 - } 1133 1126 1134 1127 __enqueue_entity(cfs_rq, se); 1135 1128 }