Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: Fix race in cpupri introduced by cpumask_var changes
sched: Fix latencytop and sleep profiling vs group scheduling

+33 -14
+14 -1
kernel/sched_cpupri.c
··· 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 82 continue; 83 84 - if (lowest_mask) 85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 86 return 1; 87 } 88
··· 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 82 continue; 83 84 + if (lowest_mask) { 85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 86 + 87 + /* 88 + * We have to ensure that we have at least one bit 89 + * still set in the array, since the map could have 90 + * been concurrently emptied between the first and 91 + * second reads of vec->mask. If we hit this 92 + * condition, simply act as though we never hit this 93 + * priority level and continue on. 94 + */ 95 + if (cpumask_any(lowest_mask) >= nr_cpu_ids) 96 + continue; 97 + } 98 + 99 return 1; 100 } 101
+19 -13
kernel/sched_fair.c
··· 611 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 612 { 613 #ifdef CONFIG_SCHEDSTATS 614 if (se->sleep_start) { 615 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; 616 - struct task_struct *tsk = task_of(se); 617 618 if ((s64)delta < 0) 619 delta = 0; ··· 628 se->sleep_start = 0; 629 se->sum_sleep_runtime += delta; 630 631 - account_scheduler_latency(tsk, delta >> 10, 1); 632 } 633 if (se->block_start) { 634 u64 delta = rq_of(cfs_rq)->clock - se->block_start; 635 - struct task_struct *tsk = task_of(se); 636 637 if ((s64)delta < 0) 638 delta = 0; ··· 643 se->block_start = 0; 644 se->sum_sleep_runtime += delta; 645 646 - /* 647 - * Blocking time is in units of nanosecs, so shift by 20 to 648 - * get a milliseconds-range estimation of the amount of 649 - * time that the task spent sleeping: 650 - */ 651 - if (unlikely(prof_on == SLEEP_PROFILING)) { 652 - 653 - profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk), 654 - delta >> 20); 655 } 656 - account_scheduler_latency(tsk, delta >> 10, 0); 657 } 658 #endif 659 }
··· 611 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 612 { 613 #ifdef CONFIG_SCHEDSTATS 614 + struct task_struct *tsk = NULL; 615 + 616 + if (entity_is_task(se)) 617 + tsk = task_of(se); 618 + 619 if (se->sleep_start) { 620 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; 621 622 if ((s64)delta < 0) 623 delta = 0; ··· 624 se->sleep_start = 0; 625 se->sum_sleep_runtime += delta; 626 627 + if (tsk) 628 + account_scheduler_latency(tsk, delta >> 10, 1); 629 } 630 if (se->block_start) { 631 u64 delta = rq_of(cfs_rq)->clock - se->block_start; 632 633 if ((s64)delta < 0) 634 delta = 0; ··· 639 se->block_start = 0; 640 se->sum_sleep_runtime += delta; 641 642 + if (tsk) { 643 + /* 644 + * Blocking time is in units of nanosecs, so shift by 645 + * 20 to get a milliseconds-range estimation of the 646 + * amount of time that the task spent sleeping: 647 + */ 648 + if (unlikely(prof_on == SLEEP_PROFILING)) { 649 + profile_hits(SLEEP_PROFILING, 650 + (void *)get_wchan(tsk), 651 + delta >> 20); 652 + } 653 + account_scheduler_latency(tsk, delta >> 10, 0); 654 } 655 } 656 #endif 657 }