[PATCH] sched: clean up fallout of recent changes

Clean up some of the impact of recent (and not so recent) scheduler
changes:

- turning macros into nice inline functions
- sanitizing and unifying variable definitions
- whitespace, style consistency, 80-lines, comment correctness, spelling
and curly braces police

Due to the macro hell and variable placement simplifications there's even 26
bytes of .text saved:

text data bss dec hex filename
25510 4153 192 29855 749f sched.o.before
25484 4153 192 29829 7485 sched.o.after

[akpm@osdl.org: build fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Ingo Molnar and committed by Linus Torvalds 48f24c4d 829035fd

+195 -167
+195 -167
kernel/sched.c
··· 184 return static_prio_timeslice(p->static_prio); 185 } 186 187 - #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ 188 - < (long long) (sd)->cache_hot_time) 189 - 190 /* 191 * These are the runqueue data structures: 192 */ ··· 275 * The domain tree of any CPU may only be accessed from within 276 * preempt-disabled sections. 277 */ 278 - #define for_each_domain(cpu, domain) \ 279 - for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) 280 281 #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 282 #define this_rq() (&__get_cpu_var(runqueues)) ··· 1036 req->task = p; 1037 req->dest_cpu = dest_cpu; 1038 list_add(&req->list, &rq->migration_queue); 1039 return 1; 1040 } 1041 ··· 1133 runqueue_t *rq = cpu_rq(cpu); 1134 unsigned long n = rq->nr_running; 1135 1136 - return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1137 } 1138 1139 /* ··· 1492 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1493 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1494 } 1495 - 1496 EXPORT_SYMBOL(wake_up_process); 1497 1498 int fastcall wake_up_state(task_t *p, unsigned int state) ··· 1864 #ifdef CONFIG_SMP 1865 1866 /* 1867 * double_rq_lock - safely lock two runqueues 1868 * 1869 * Note this does not disable interrupts like task_rq_lock, ··· 2035 } 2036 2037 #define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2038 /* 2039 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2040 * load from busiest to this_rq, as part of a balancing operation within ··· 2048 struct sched_domain *sd, enum idle_type idle, 2049 int *all_pinned) 2050 { 2051 prio_array_t *array, *dst_array; 2052 struct list_head *head, *curr; 2053 - int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; 2054 - int busiest_best_prio_seen; 2055 - int skip_for_load; /* skip the task based on weighted load issues */ 2056 long rem_load_move; 2057 task_t *tmp; 2058 ··· 2061 rem_load_move = max_load_move; 2062 pinned = 1; 2063 this_best_prio = rq_best_prio(this_rq); 2064 - busiest_best_prio = rq_best_prio(busiest); 2065 /* 2066 * Enable handling of the case where there is more than one task 2067 * with the best priority. If the current running task is one 2068 - * of those with prio==busiest_best_prio we know it won't be moved 2069 * and therefore it's safe to override the skip (based on load) of 2070 * any task we find with that prio. 2071 */ 2072 - busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; 2073 2074 /* 2075 * We first consider expired tasks. Those will likely not be ··· 2116 */ 2117 skip_for_load = tmp->load_weight > rem_load_move; 2118 if (skip_for_load && idx < this_best_prio) 2119 - skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; 2120 if (skip_for_load || 2121 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2122 - busiest_best_prio_seen |= idx == busiest_best_prio; 2123 if (curr != head) 2124 goto skip_queue; 2125 idx++; ··· 2163 2164 /* 2165 * find_busiest_group finds and returns the busiest CPU group within the 2166 - * domain. It calculates and returns the amount of weighted load which should be 2167 - * moved to restore balance via the imbalance parameter. 2168 */ 2169 static struct sched_group * 2170 find_busiest_group(struct sched_domain *sd, int this_cpu, ··· 2286 * capacity but still has some space to pick up some load 2287 * from other group and save more power 2288 */ 2289 - if (sum_nr_running <= group_capacity - 1) 2290 if (sum_nr_running > leader_nr_running || 2291 (sum_nr_running == leader_nr_running && 2292 first_cpu(group->cpumask) > ··· 2294 group_leader = group; 2295 leader_nr_running = sum_nr_running; 2296 } 2297 - 2298 group_next: 2299 #endif 2300 group = group->next; ··· 2349 * moved 2350 */ 2351 if (*imbalance < busiest_load_per_task) { 2352 - unsigned long pwr_now, pwr_move; 2353 - unsigned long tmp; 2354 unsigned int imbn; 2355 2356 small_imbalance: ··· 2421 /* 2422 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2423 */ 2424 - static runqueue_t *find_busiest_queue(struct sched_group *group, 2425 - enum idle_type idle, unsigned long imbalance) 2426 { 2427 unsigned long max_load = 0; 2428 - runqueue_t *busiest = NULL, *rqi; 2429 int i; 2430 2431 for_each_cpu_mask(i, group->cpumask) { 2432 - rqi = cpu_rq(i); 2433 2434 - if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) 2435 continue; 2436 2437 - if (rqi->raw_weighted_load > max_load) { 2438 - max_load = rqi->raw_weighted_load; 2439 - busiest = rqi; 2440 } 2441 } 2442 ··· 2450 */ 2451 #define MAX_PINNED_INTERVAL 512 2452 2453 - #define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) 2454 /* 2455 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2456 * tasks if there is an imbalance. ··· 2464 static int load_balance(int this_cpu, runqueue_t *this_rq, 2465 struct sched_domain *sd, enum idle_type idle) 2466 { 2467 struct sched_group *group; 2468 - runqueue_t *busiest; 2469 unsigned long imbalance; 2470 - int nr_moved, all_pinned = 0; 2471 - int active_balance = 0; 2472 - int sd_idle = 0; 2473 2474 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2475 !sched_smt_power_savings) ··· 2501 */ 2502 double_rq_lock(this_rq, busiest); 2503 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2504 - minus_1_or_zero(busiest->nr_running), 2505 - imbalance, sd, idle, &all_pinned); 2506 double_rq_unlock(this_rq, busiest); 2507 2508 /* All tasks on this runqueue were pinned by CPU affinity */ ··· 2575 (sd->balance_interval < sd->max_interval)) 2576 sd->balance_interval *= 2; 2577 2578 - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2579 return -1; 2580 return 0; 2581 } ··· 2588 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2589 * this_rq is locked. 2590 */ 2591 - static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, 2592 - struct sched_domain *sd) 2593 { 2594 struct sched_group *group; 2595 runqueue_t *busiest = NULL; ··· 2638 2639 out_balanced: 2640 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2641 - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2642 return -1; 2643 sd->nr_balance_failed = 0; 2644 return 0; 2645 } 2646 ··· 2656 2657 for_each_domain(this_cpu, sd) { 2658 if (sd->flags & SD_BALANCE_NEWIDLE) { 2659 - if (load_balance_newidle(this_cpu, this_rq, sd)) { 2660 - /* We've pulled tasks over so stop searching */ 2661 break; 2662 - } 2663 } 2664 } 2665 } ··· 2677 runqueue_t *target_rq; 2678 int target_cpu = busiest_rq->push_cpu; 2679 2680 if (busiest_rq->nr_running <= 1) 2681 - /* no task to move */ 2682 return; 2683 2684 target_rq = cpu_rq(target_cpu); ··· 2696 /* Search for an sd spanning us and the target CPU. */ 2697 for_each_domain(target_cpu, sd) { 2698 if ((sd->flags & SD_LOAD_BALANCE) && 2699 - cpu_isset(busiest_cpu, sd->span)) 2700 break; 2701 } 2702 2703 - if (unlikely(sd == NULL)) 2704 - goto out; 2705 2706 - schedstat_inc(sd, alb_cnt); 2707 - 2708 - if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2709 - RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) 2710 - schedstat_inc(sd, alb_pushed); 2711 - else 2712 - schedstat_inc(sd, alb_failed); 2713 - out: 2714 spin_unlock(&target_rq->lock); 2715 } 2716 ··· 2722 * Balancing parameters are set up in arch_init_sched_domains. 2723 */ 2724 2725 - /* Don't have all balancing operations going off at once */ 2726 - #define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) 2727 - 2728 - static void rebalance_tick(int this_cpu, runqueue_t *this_rq, 2729 - enum idle_type idle) 2730 { 2731 - unsigned long old_load, this_load; 2732 - unsigned long j = jiffies + CPU_OFFSET(this_cpu); 2733 struct sched_domain *sd; 2734 - int i; 2735 2736 this_load = this_rq->raw_weighted_load; 2737 - /* Update our load */ 2738 - for (i = 0; i < 3; i++) { 2739 - unsigned long new_load = this_load; 2740 - int scale = 1 << i; 2741 old_load = this_rq->cpu_load[i]; 2742 /* 2743 * Round up the averaging division if load is increasing. This 2744 * prevents us from getting stuck on 9 if the load is 10, for ··· 2754 } 2755 2756 for_each_domain(this_cpu, sd) { 2757 - unsigned long interval; 2758 - 2759 if (!(sd->flags & SD_LOAD_BALANCE)) 2760 continue; 2761 ··· 2794 static inline int wake_priority_sleeper(runqueue_t *rq) 2795 { 2796 int ret = 0; 2797 #ifdef CONFIG_SCHED_SMT 2798 spin_lock(&rq->lock); 2799 /* ··· 2818 * This is called on clock ticks and on context switches. 2819 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2820 */ 2821 - static inline void update_cpu_clock(task_t *p, runqueue_t *rq, 2822 - unsigned long long now) 2823 { 2824 - unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); 2825 - p->sched_time += now - last; 2826 } 2827 2828 /* 2829 * Return current->sched_time plus any more ns on the sched_clock 2830 * that have not yet been banked. 2831 */ 2832 - unsigned long long current_sched_time(const task_t *tsk) 2833 { 2834 unsigned long long ns; 2835 unsigned long flags; 2836 local_irq_save(flags); 2837 - ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); 2838 - ns = tsk->sched_time + (sched_clock() - ns); 2839 local_irq_restore(flags); 2840 return ns; 2841 } 2842 ··· 2851 * increasing number of running tasks. We also ignore the interactivity 2852 * if a better static_prio task has expired: 2853 */ 2854 - #define EXPIRED_STARVING(rq) \ 2855 - ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ 2856 - (jiffies - (rq)->expired_timestamp >= \ 2857 - STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ 2858 - ((rq)->curr->static_prio > (rq)->best_expired_prio)) 2859 2860 /* 2861 * Account user cpu time to a process. ··· 2944 */ 2945 void scheduler_tick(void) 2946 { 2947 int cpu = smp_processor_id(); 2948 runqueue_t *rq = this_rq(); 2949 task_t *p = current; 2950 - unsigned long long now = sched_clock(); 2951 2952 update_cpu_clock(p, rq, now); 2953 ··· 2997 2998 if (!rq->expired_timestamp) 2999 rq->expired_timestamp = jiffies; 3000 - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 3001 enqueue_task(p, rq->expired); 3002 if (p->static_prio < rq->best_expired_prio) 3003 rq->best_expired_prio = p->static_prio; ··· 3156 static inline void wake_sleeping_dependent(int this_cpu) 3157 { 3158 } 3159 - 3160 - static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, 3161 - task_t *p) 3162 { 3163 return 0; 3164 } ··· 3211 */ 3212 asmlinkage void __sched schedule(void) 3213 { 3214 - long *switch_count; 3215 - task_t *prev, *next; 3216 - runqueue_t *rq; 3217 - prio_array_t *array; 3218 struct list_head *queue; 3219 unsigned long long now; 3220 unsigned long run_time; 3221 int cpu, idx, new_prio; 3222 3223 /* 3224 * Test if we are atomic. Since do_exit() needs to call into ··· 3371 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3372 goto need_resched; 3373 } 3374 - 3375 EXPORT_SYMBOL(schedule); 3376 3377 #ifdef CONFIG_PREEMPT ··· 3415 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3416 goto need_resched; 3417 } 3418 - 3419 EXPORT_SYMBOL(preempt_schedule); 3420 3421 /* ··· 3463 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3464 void *key) 3465 { 3466 - task_t *p = curr->private; 3467 - return try_to_wake_up(p, mode, sync); 3468 } 3469 - 3470 EXPORT_SYMBOL(default_wake_function); 3471 3472 /* ··· 3482 struct list_head *tmp, *next; 3483 3484 list_for_each_safe(tmp, next, &q->task_list) { 3485 - wait_queue_t *curr; 3486 - unsigned flags; 3487 - curr = list_entry(tmp, wait_queue_t, task_list); 3488 - flags = curr->flags; 3489 if (curr->func(curr, mode, sync, key) && 3490 - (flags & WQ_FLAG_EXCLUSIVE) && 3491 - !--nr_exclusive) 3492 break; 3493 } 3494 } ··· 3507 __wake_up_common(q, mode, nr_exclusive, 0, key); 3508 spin_unlock_irqrestore(&q->lock, flags); 3509 } 3510 - 3511 EXPORT_SYMBOL(__wake_up); 3512 3513 /* ··· 3575 void fastcall __sched wait_for_completion(struct completion *x) 3576 { 3577 might_sleep(); 3578 spin_lock_irq(&x->wait.lock); 3579 if (!x->done) { 3580 DECLARE_WAITQUEUE(wait, current); ··· 3720 schedule(); 3721 SLEEP_ON_TAIL 3722 } 3723 - 3724 EXPORT_SYMBOL(interruptible_sleep_on); 3725 3726 long fastcall __sched ··· 3735 3736 return timeout; 3737 } 3738 - 3739 EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3740 3741 void fastcall __sched sleep_on(wait_queue_head_t *q) ··· 3747 schedule(); 3748 SLEEP_ON_TAIL 3749 } 3750 - 3751 EXPORT_SYMBOL(sleep_on); 3752 3753 long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) ··· 3819 3820 void set_user_nice(task_t *p, long nice) 3821 { 3822 unsigned long flags; 3823 prio_array_t *array; 3824 runqueue_t *rq; 3825 - int old_prio, delta; 3826 3827 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3828 return; ··· 3877 { 3878 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3879 int nice_rlim = 20 - nice; 3880 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3881 capable(CAP_SYS_NICE)); 3882 } ··· 3893 */ 3894 asmlinkage long sys_nice(int increment) 3895 { 3896 - int retval; 3897 - long nice; 3898 3899 /* 3900 * Setpriority might change our priority at the same moment. ··· 3978 static void __setscheduler(struct task_struct *p, int policy, int prio) 3979 { 3980 BUG_ON(p->array); 3981 p->policy = policy; 3982 p->rt_priority = prio; 3983 p->normal_prio = normal_prio(p); ··· 4002 int sched_setscheduler(struct task_struct *p, int policy, 4003 struct sched_param *param) 4004 { 4005 - int retval; 4006 - int oldprio, oldpolicy = -1; 4007 prio_array_t *array; 4008 unsigned long flags; 4009 runqueue_t *rq; ··· 4504 set_current_state(TASK_RUNNING); 4505 sys_sched_yield(); 4506 } 4507 - 4508 EXPORT_SYMBOL(yield); 4509 4510 /* ··· 4521 schedule(); 4522 atomic_dec(&rq->nr_iowait); 4523 } 4524 - 4525 EXPORT_SYMBOL(io_schedule); 4526 4527 long __sched io_schedule_timeout(long timeout) ··· 4622 4623 static inline struct task_struct *eldest_child(struct task_struct *p) 4624 { 4625 - if (list_empty(&p->children)) return NULL; 4626 return list_entry(p->children.next,struct task_struct,sibling); 4627 } 4628 4629 static inline struct task_struct *older_sibling(struct task_struct *p) 4630 { 4631 - if (p->sibling.prev==&p->parent->children) return NULL; 4632 return list_entry(p->sibling.prev,struct task_struct,sibling); 4633 } 4634 4635 static inline struct task_struct *younger_sibling(struct task_struct *p) 4636 { 4637 - if (p->sibling.next==&p->parent->children) return NULL; 4638 return list_entry(p->sibling.next,struct task_struct,sibling); 4639 } 4640 ··· 4796 int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4797 { 4798 unsigned long flags; 4799 - int ret = 0; 4800 migration_req_t req; 4801 runqueue_t *rq; 4802 4803 rq = task_rq_lock(p, &flags); 4804 if (!cpus_intersects(new_mask, cpu_online_map)) { ··· 4821 } 4822 out: 4823 task_rq_unlock(rq, &flags); 4824 return ret; 4825 } 4826 - 4827 EXPORT_SYMBOL_GPL(set_cpus_allowed); 4828 4829 /* ··· 4884 */ 4885 static int migration_thread(void *data) 4886 { 4887 - runqueue_t *rq; 4888 int cpu = (long)data; 4889 4890 rq = cpu_rq(cpu); 4891 BUG_ON(rq->migration_thread != current); ··· 4942 4943 #ifdef CONFIG_HOTPLUG_CPU 4944 /* Figure out where task on dead CPU should go, use force if neccessary. */ 4945 - static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) 4946 { 4947 runqueue_t *rq; 4948 unsigned long flags; ··· 4952 restart: 4953 /* On same node? */ 4954 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4955 - cpus_and(mask, mask, tsk->cpus_allowed); 4956 dest_cpu = any_online_cpu(mask); 4957 4958 /* On any allowed CPU? */ 4959 if (dest_cpu == NR_CPUS) 4960 - dest_cpu = any_online_cpu(tsk->cpus_allowed); 4961 4962 /* No more Mr. Nice Guy. */ 4963 if (dest_cpu == NR_CPUS) { 4964 - rq = task_rq_lock(tsk, &flags); 4965 - cpus_setall(tsk->cpus_allowed); 4966 - dest_cpu = any_online_cpu(tsk->cpus_allowed); 4967 task_rq_unlock(rq, &flags); 4968 4969 /* ··· 4971 * kernel threads (both mm NULL), since they never 4972 * leave kernel. 4973 */ 4974 - if (tsk->mm && printk_ratelimit()) 4975 printk(KERN_INFO "process %d (%s) no " 4976 "longer affine to cpu%d\n", 4977 - tsk->pid, tsk->comm, dead_cpu); 4978 } 4979 - if (!__migrate_task(tsk, dead_cpu, dest_cpu)) 4980 goto restart; 4981 } 4982 ··· 5003 /* Run through task list and migrate tasks from the dead cpu. */ 5004 static void migrate_live_tasks(int src_cpu) 5005 { 5006 - struct task_struct *tsk, *t; 5007 5008 write_lock_irq(&tasklist_lock); 5009 5010 - do_each_thread(t, tsk) { 5011 - if (tsk == current) 5012 continue; 5013 5014 - if (task_cpu(tsk) == src_cpu) 5015 - move_task_off_dead_cpu(src_cpu, tsk); 5016 - } while_each_thread(t, tsk); 5017 5018 write_unlock_irq(&tasklist_lock); 5019 } 5020 5021 /* Schedules idle task to be the next runnable task on current CPU. 5022 * It does so by boosting its priority to highest possible and adding it to 5023 - * the _front_ of runqueue. Used by CPU offline code. 5024 */ 5025 void sched_idle_next(void) 5026 { 5027 - int cpu = smp_processor_id(); 5028 - runqueue_t *rq = this_rq(); 5029 struct task_struct *p = rq->idle; 5030 unsigned long flags; 5031 5032 /* cpu has to be offline */ 5033 - BUG_ON(cpu_online(cpu)); 5034 5035 - /* Strictly not necessary since rest of the CPUs are stopped by now 5036 - * and interrupts disabled on current cpu. 5037 */ 5038 spin_lock_irqsave(&rq->lock, flags); 5039 5040 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5041 - /* Add idle task to _front_ of it's priority queue */ 5042 __activate_idle_task(p, rq); 5043 5044 spin_unlock_irqrestore(&rq->lock, flags); 5045 } 5046 5047 - /* Ensures that the idle task is using init_mm right before its cpu goes 5048 * offline. 5049 */ 5050 void idle_task_exit(void) ··· 5061 mmdrop(mm); 5062 } 5063 5064 - static void migrate_dead(unsigned int dead_cpu, task_t *tsk) 5065 { 5066 struct runqueue *rq = cpu_rq(dead_cpu); 5067 5068 /* Must be exiting, otherwise would be on tasklist. */ 5069 - BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); 5070 5071 /* Cannot have done final schedule yet: would have vanished. */ 5072 - BUG_ON(tsk->flags & PF_DEAD); 5073 5074 - get_task_struct(tsk); 5075 5076 /* 5077 * Drop lock around migration; if someone else moves it, ··· 5079 * fine. 5080 */ 5081 spin_unlock_irq(&rq->lock); 5082 - move_task_off_dead_cpu(dead_cpu, tsk); 5083 spin_lock_irq(&rq->lock); 5084 5085 - put_task_struct(tsk); 5086 } 5087 5088 /* release_task() removes task from tasklist, so we won't find dead tasks. */ 5089 static void migrate_dead_tasks(unsigned int dead_cpu) 5090 { 5091 - unsigned arr, i; 5092 struct runqueue *rq = cpu_rq(dead_cpu); 5093 5094 for (arr = 0; arr < 2; arr++) { 5095 for (i = 0; i < MAX_PRIO; i++) { 5096 struct list_head *list = &rq->arrays[arr].queue[i]; 5097 while (!list_empty(list)) 5098 migrate_dead(dead_cpu, 5099 list_entry(list->next, task_t, ··· 5108 * migration_call - callback that gets triggered when a CPU is added. 5109 * Here we can start up the necessary migration thread for the new CPU. 5110 */ 5111 - static int __cpuinit migration_call(struct notifier_block *nfb, 5112 - unsigned long action, 5113 - void *hcpu) 5114 { 5115 - int cpu = (long)hcpu; 5116 struct task_struct *p; 5117 struct runqueue *rq; 5118 unsigned long flags; 5119 ··· 5129 task_rq_unlock(rq, &flags); 5130 cpu_rq(cpu)->migration_thread = p; 5131 break; 5132 case CPU_ONLINE: 5133 /* Strictly unneccessary, as first user will wake it. */ 5134 wake_up_process(cpu_rq(cpu)->migration_thread); 5135 break; 5136 #ifdef CONFIG_HOTPLUG_CPU 5137 case CPU_UP_CANCELED: 5138 if (!cpu_rq(cpu)->migration_thread) ··· 5145 kthread_stop(cpu_rq(cpu)->migration_thread); 5146 cpu_rq(cpu)->migration_thread = NULL; 5147 break; 5148 case CPU_DEAD: 5149 migrate_live_tasks(cpu); 5150 rq = cpu_rq(cpu); ··· 5190 int __init migration_init(void) 5191 { 5192 void *cpu = (void *)(long)smp_processor_id(); 5193 - /* Start one for boot CPU. */ 5194 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5195 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5196 register_cpu_notifier(&migration_notifier); 5197 return 0; 5198 } 5199 #endif ··· 5291 } while (sd); 5292 } 5293 #else 5294 - #define sched_domain_debug(sd, cpu) {} 5295 #endif 5296 5297 static int sd_degenerate(struct sched_domain *sd) ··· 5317 return 1; 5318 } 5319 5320 - static int sd_parent_degenerate(struct sched_domain *sd, 5321 - struct sched_domain *parent) 5322 { 5323 unsigned long cflags = sd->flags, pflags = parent->flags; 5324 ··· 5613 /* 5614 * Measure the cache-cost of one task migration. Returns in units of nsec. 5615 */ 5616 - static unsigned long long measure_one(void *cache, unsigned long size, 5617 - int source, int target) 5618 { 5619 cpumask_t mask, saved_mask; 5620 unsigned long long t0, t1, t2, t3, cost; ··· 5964 */ 5965 static cpumask_t sched_domain_node_span(int node) 5966 { 5967 - int i; 5968 - cpumask_t span, nodemask; 5969 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5970 5971 cpus_clear(span); 5972 bitmap_zero(used_nodes, MAX_NUMNODES); ··· 5977 5978 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5979 int next_node = find_next_best_node(node, used_nodes); 5980 nodemask = node_to_cpumask(next_node); 5981 cpus_or(span, span, nodemask); 5982 } ··· 5987 #endif 5988 5989 int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5990 /* 5991 - * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we 5992 - * can switch it on easily if needed. 5993 */ 5994 #ifdef CONFIG_SCHED_SMT 5995 static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 5996 static struct sched_group sched_group_cpus[NR_CPUS]; 5997 static int cpu_to_cpu_group(int cpu) 5998 { 5999 return cpu; 6000 } 6001 #endif 6002 6003 #ifdef CONFIG_SCHED_MC 6004 static DEFINE_PER_CPU(struct sched_domain, core_domains); 6005 static struct sched_group *sched_group_core_bycpu[NR_CPUS]; ··· 6023 6024 static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6025 static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6026 static int cpu_to_phys_group(int cpu) 6027 { 6028 - #if defined(CONFIG_SCHED_MC) 6029 cpumask_t mask = cpu_coregroup_map(cpu); 6030 return first_cpu(mask); 6031 #elif defined(CONFIG_SCHED_SMT) ··· 6572 int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6573 { 6574 int err = 0; 6575 #ifdef CONFIG_SCHED_SMT 6576 if (smt_capable()) 6577 err = sysfs_create_file(&cls->kset.kobj, ··· 6592 { 6593 return sprintf(page, "%u\n", sched_mc_power_savings); 6594 } 6595 - static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6596 { 6597 return sched_power_savings_store(buf, count, 0); 6598 } ··· 6606 { 6607 return sprintf(page, "%u\n", sched_smt_power_savings); 6608 } 6609 - static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6610 { 6611 return sched_power_savings_store(buf, count, 1); 6612 } ··· 6669 { 6670 /* Linker adds these: start and end of __sched functions */ 6671 extern char __sched_text_start[], __sched_text_end[]; 6672 return in_lock_functions(addr) || 6673 (addr >= (unsigned long)__sched_text_start 6674 && addr < (unsigned long)__sched_text_end); ··· 6677 6678 void __init sched_init(void) 6679 { 6680 - runqueue_t *rq; 6681 int i, j, k; 6682 6683 for_each_possible_cpu(i) { 6684 prio_array_t *array; 6685 6686 rq = cpu_rq(i); 6687 spin_lock_init(&rq->lock); ··· 6732 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6733 void __might_sleep(char *file, int line) 6734 { 6735 - #if defined(in_atomic) 6736 static unsigned long prev_jiffy; /* ratelimiting */ 6737 6738 if ((in_atomic() || irqs_disabled()) &&
··· 184 return static_prio_timeslice(p->static_prio); 185 } 186 187 /* 188 * These are the runqueue data structures: 189 */ ··· 278 * The domain tree of any CPU may only be accessed from within 279 * preempt-disabled sections. 280 */ 281 + #define for_each_domain(cpu, __sd) \ 282 + for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) 283 284 #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 285 #define this_rq() (&__get_cpu_var(runqueues)) ··· 1039 req->task = p; 1040 req->dest_cpu = dest_cpu; 1041 list_add(&req->list, &rq->migration_queue); 1042 + 1043 return 1; 1044 } 1045 ··· 1135 runqueue_t *rq = cpu_rq(cpu); 1136 unsigned long n = rq->nr_running; 1137 1138 + return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1139 } 1140 1141 /* ··· 1494 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1495 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1496 } 1497 EXPORT_SYMBOL(wake_up_process); 1498 1499 int fastcall wake_up_state(task_t *p, unsigned int state) ··· 1867 #ifdef CONFIG_SMP 1868 1869 /* 1870 + * Is this task likely cache-hot: 1871 + */ 1872 + static inline int 1873 + task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd) 1874 + { 1875 + return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time; 1876 + } 1877 + 1878 + /* 1879 * double_rq_lock - safely lock two runqueues 1880 * 1881 * Note this does not disable interrupts like task_rq_lock, ··· 2029 } 2030 2031 #define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2032 + 2033 /* 2034 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2035 * load from busiest to this_rq, as part of a balancing operation within ··· 2041 struct sched_domain *sd, enum idle_type idle, 2042 int *all_pinned) 2043 { 2044 + int idx, pulled = 0, pinned = 0, this_best_prio, best_prio, 2045 + best_prio_seen, skip_for_load; 2046 prio_array_t *array, *dst_array; 2047 struct list_head *head, *curr; 2048 long rem_load_move; 2049 task_t *tmp; 2050 ··· 2055 rem_load_move = max_load_move; 2056 pinned = 1; 2057 this_best_prio = rq_best_prio(this_rq); 2058 + best_prio = rq_best_prio(busiest); 2059 /* 2060 * Enable handling of the case where there is more than one task 2061 * with the best priority. If the current running task is one 2062 + * of those with prio==best_prio we know it won't be moved 2063 * and therefore it's safe to override the skip (based on load) of 2064 * any task we find with that prio. 2065 */ 2066 + best_prio_seen = best_prio == busiest->curr->prio; 2067 2068 /* 2069 * We first consider expired tasks. Those will likely not be ··· 2110 */ 2111 skip_for_load = tmp->load_weight > rem_load_move; 2112 if (skip_for_load && idx < this_best_prio) 2113 + skip_for_load = !best_prio_seen && idx == best_prio; 2114 if (skip_for_load || 2115 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2116 + 2117 + best_prio_seen |= idx == best_prio; 2118 if (curr != head) 2119 goto skip_queue; 2120 idx++; ··· 2156 2157 /* 2158 * find_busiest_group finds and returns the busiest CPU group within the 2159 + * domain. It calculates and returns the amount of weighted load which 2160 + * should be moved to restore balance via the imbalance parameter. 2161 */ 2162 static struct sched_group * 2163 find_busiest_group(struct sched_domain *sd, int this_cpu, ··· 2279 * capacity but still has some space to pick up some load 2280 * from other group and save more power 2281 */ 2282 + if (sum_nr_running <= group_capacity - 1) { 2283 if (sum_nr_running > leader_nr_running || 2284 (sum_nr_running == leader_nr_running && 2285 first_cpu(group->cpumask) > ··· 2287 group_leader = group; 2288 leader_nr_running = sum_nr_running; 2289 } 2290 + } 2291 group_next: 2292 #endif 2293 group = group->next; ··· 2342 * moved 2343 */ 2344 if (*imbalance < busiest_load_per_task) { 2345 + unsigned long tmp, pwr_now, pwr_move; 2346 unsigned int imbn; 2347 2348 small_imbalance: ··· 2415 /* 2416 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2417 */ 2418 + static runqueue_t * 2419 + find_busiest_queue(struct sched_group *group, enum idle_type idle, 2420 + unsigned long imbalance) 2421 { 2422 + runqueue_t *busiest = NULL, *rq; 2423 unsigned long max_load = 0; 2424 int i; 2425 2426 for_each_cpu_mask(i, group->cpumask) { 2427 + rq = cpu_rq(i); 2428 2429 + if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) 2430 continue; 2431 2432 + if (rq->raw_weighted_load > max_load) { 2433 + max_load = rq->raw_weighted_load; 2434 + busiest = rq; 2435 } 2436 } 2437 ··· 2443 */ 2444 #define MAX_PINNED_INTERVAL 512 2445 2446 + static inline unsigned long minus_1_or_zero(unsigned long n) 2447 + { 2448 + return n > 0 ? n - 1 : 0; 2449 + } 2450 + 2451 /* 2452 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2453 * tasks if there is an imbalance. ··· 2453 static int load_balance(int this_cpu, runqueue_t *this_rq, 2454 struct sched_domain *sd, enum idle_type idle) 2455 { 2456 + int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 2457 struct sched_group *group; 2458 unsigned long imbalance; 2459 + runqueue_t *busiest; 2460 2461 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2462 !sched_smt_power_savings) ··· 2492 */ 2493 double_rq_lock(this_rq, busiest); 2494 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2495 + minus_1_or_zero(busiest->nr_running), 2496 + imbalance, sd, idle, &all_pinned); 2497 double_rq_unlock(this_rq, busiest); 2498 2499 /* All tasks on this runqueue were pinned by CPU affinity */ ··· 2566 (sd->balance_interval < sd->max_interval)) 2567 sd->balance_interval *= 2; 2568 2569 + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2570 + !sched_smt_power_savings) 2571 return -1; 2572 return 0; 2573 } ··· 2578 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2579 * this_rq is locked. 2580 */ 2581 + static int 2582 + load_balance_newidle(int this_cpu, runqueue_t *this_rq, struct sched_domain *sd) 2583 { 2584 struct sched_group *group; 2585 runqueue_t *busiest = NULL; ··· 2628 2629 out_balanced: 2630 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2631 + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2632 + !sched_smt_power_savings) 2633 return -1; 2634 sd->nr_balance_failed = 0; 2635 + 2636 return 0; 2637 } 2638 ··· 2644 2645 for_each_domain(this_cpu, sd) { 2646 if (sd->flags & SD_BALANCE_NEWIDLE) { 2647 + /* If we've pulled tasks over stop searching: */ 2648 + if (load_balance_newidle(this_cpu, this_rq, sd)) 2649 break; 2650 } 2651 } 2652 } ··· 2666 runqueue_t *target_rq; 2667 int target_cpu = busiest_rq->push_cpu; 2668 2669 + /* Is there any task to move? */ 2670 if (busiest_rq->nr_running <= 1) 2671 return; 2672 2673 target_rq = cpu_rq(target_cpu); ··· 2685 /* Search for an sd spanning us and the target CPU. */ 2686 for_each_domain(target_cpu, sd) { 2687 if ((sd->flags & SD_LOAD_BALANCE) && 2688 + cpu_isset(busiest_cpu, sd->span)) 2689 break; 2690 } 2691 2692 + if (likely(sd)) { 2693 + schedstat_inc(sd, alb_cnt); 2694 2695 + if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2696 + RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, 2697 + NULL)) 2698 + schedstat_inc(sd, alb_pushed); 2699 + else 2700 + schedstat_inc(sd, alb_failed); 2701 + } 2702 spin_unlock(&target_rq->lock); 2703 } 2704 ··· 2712 * Balancing parameters are set up in arch_init_sched_domains. 2713 */ 2714 2715 + /* Don't have all balancing operations going off at once: */ 2716 + static inline unsigned long cpu_offset(int cpu) 2717 { 2718 + return jiffies + cpu * HZ / NR_CPUS; 2719 + } 2720 + 2721 + static void 2722 + rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle) 2723 + { 2724 + unsigned long this_load, interval, j = cpu_offset(this_cpu); 2725 struct sched_domain *sd; 2726 + int i, scale; 2727 2728 this_load = this_rq->raw_weighted_load; 2729 + 2730 + /* Update our load: */ 2731 + for (i = 0, scale = 1; i < 3; i++, scale <<= 1) { 2732 + unsigned long old_load, new_load; 2733 + 2734 old_load = this_rq->cpu_load[i]; 2735 + new_load = this_load; 2736 /* 2737 * Round up the averaging division if load is increasing. This 2738 * prevents us from getting stuck on 9 if the load is 10, for ··· 2740 } 2741 2742 for_each_domain(this_cpu, sd) { 2743 if (!(sd->flags & SD_LOAD_BALANCE)) 2744 continue; 2745 ··· 2782 static inline int wake_priority_sleeper(runqueue_t *rq) 2783 { 2784 int ret = 0; 2785 + 2786 #ifdef CONFIG_SCHED_SMT 2787 spin_lock(&rq->lock); 2788 /* ··· 2805 * This is called on clock ticks and on context switches. 2806 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2807 */ 2808 + static inline void 2809 + update_cpu_clock(task_t *p, runqueue_t *rq, unsigned long long now) 2810 { 2811 + p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick); 2812 } 2813 2814 /* 2815 * Return current->sched_time plus any more ns on the sched_clock 2816 * that have not yet been banked. 2817 */ 2818 + unsigned long long current_sched_time(const task_t *p) 2819 { 2820 unsigned long long ns; 2821 unsigned long flags; 2822 + 2823 local_irq_save(flags); 2824 + ns = max(p->timestamp, task_rq(p)->timestamp_last_tick); 2825 + ns = p->sched_time + sched_clock() - ns; 2826 local_irq_restore(flags); 2827 + 2828 return ns; 2829 } 2830 ··· 2837 * increasing number of running tasks. We also ignore the interactivity 2838 * if a better static_prio task has expired: 2839 */ 2840 + static inline int expired_starving(runqueue_t *rq) 2841 + { 2842 + if (rq->curr->static_prio > rq->best_expired_prio) 2843 + return 1; 2844 + if (!STARVATION_LIMIT || !rq->expired_timestamp) 2845 + return 0; 2846 + if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running) 2847 + return 1; 2848 + return 0; 2849 + } 2850 2851 /* 2852 * Account user cpu time to a process. ··· 2925 */ 2926 void scheduler_tick(void) 2927 { 2928 + unsigned long long now = sched_clock(); 2929 int cpu = smp_processor_id(); 2930 runqueue_t *rq = this_rq(); 2931 task_t *p = current; 2932 2933 update_cpu_clock(p, rq, now); 2934 ··· 2978 2979 if (!rq->expired_timestamp) 2980 rq->expired_timestamp = jiffies; 2981 + if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { 2982 enqueue_task(p, rq->expired); 2983 if (p->static_prio < rq->best_expired_prio) 2984 rq->best_expired_prio = p->static_prio; ··· 3137 static inline void wake_sleeping_dependent(int this_cpu) 3138 { 3139 } 3140 + static inline int 3141 + dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) 3142 { 3143 return 0; 3144 } ··· 3193 */ 3194 asmlinkage void __sched schedule(void) 3195 { 3196 struct list_head *queue; 3197 unsigned long long now; 3198 unsigned long run_time; 3199 int cpu, idx, new_prio; 3200 + task_t *prev, *next; 3201 + prio_array_t *array; 3202 + long *switch_count; 3203 + runqueue_t *rq; 3204 3205 /* 3206 * Test if we are atomic. Since do_exit() needs to call into ··· 3353 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3354 goto need_resched; 3355 } 3356 EXPORT_SYMBOL(schedule); 3357 3358 #ifdef CONFIG_PREEMPT ··· 3398 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3399 goto need_resched; 3400 } 3401 EXPORT_SYMBOL(preempt_schedule); 3402 3403 /* ··· 3447 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3448 void *key) 3449 { 3450 + return try_to_wake_up(curr->private, mode, sync); 3451 } 3452 EXPORT_SYMBOL(default_wake_function); 3453 3454 /* ··· 3468 struct list_head *tmp, *next; 3469 3470 list_for_each_safe(tmp, next, &q->task_list) { 3471 + wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); 3472 + unsigned flags = curr->flags; 3473 + 3474 if (curr->func(curr, mode, sync, key) && 3475 + (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) 3476 break; 3477 } 3478 } ··· 3495 __wake_up_common(q, mode, nr_exclusive, 0, key); 3496 spin_unlock_irqrestore(&q->lock, flags); 3497 } 3498 EXPORT_SYMBOL(__wake_up); 3499 3500 /* ··· 3564 void fastcall __sched wait_for_completion(struct completion *x) 3565 { 3566 might_sleep(); 3567 + 3568 spin_lock_irq(&x->wait.lock); 3569 if (!x->done) { 3570 DECLARE_WAITQUEUE(wait, current); ··· 3708 schedule(); 3709 SLEEP_ON_TAIL 3710 } 3711 EXPORT_SYMBOL(interruptible_sleep_on); 3712 3713 long fastcall __sched ··· 3724 3725 return timeout; 3726 } 3727 EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3728 3729 void fastcall __sched sleep_on(wait_queue_head_t *q) ··· 3737 schedule(); 3738 SLEEP_ON_TAIL 3739 } 3740 EXPORT_SYMBOL(sleep_on); 3741 3742 long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) ··· 3810 3811 void set_user_nice(task_t *p, long nice) 3812 { 3813 + int old_prio, delta; 3814 unsigned long flags; 3815 prio_array_t *array; 3816 runqueue_t *rq; 3817 3818 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3819 return; ··· 3868 { 3869 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3870 int nice_rlim = 20 - nice; 3871 + 3872 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3873 capable(CAP_SYS_NICE)); 3874 } ··· 3883 */ 3884 asmlinkage long sys_nice(int increment) 3885 { 3886 + long nice, retval; 3887 3888 /* 3889 * Setpriority might change our priority at the same moment. ··· 3969 static void __setscheduler(struct task_struct *p, int policy, int prio) 3970 { 3971 BUG_ON(p->array); 3972 + 3973 p->policy = policy; 3974 p->rt_priority = prio; 3975 p->normal_prio = normal_prio(p); ··· 3992 int sched_setscheduler(struct task_struct *p, int policy, 3993 struct sched_param *param) 3994 { 3995 + int retval, oldprio, oldpolicy = -1; 3996 prio_array_t *array; 3997 unsigned long flags; 3998 runqueue_t *rq; ··· 4495 set_current_state(TASK_RUNNING); 4496 sys_sched_yield(); 4497 } 4498 EXPORT_SYMBOL(yield); 4499 4500 /* ··· 4513 schedule(); 4514 atomic_dec(&rq->nr_iowait); 4515 } 4516 EXPORT_SYMBOL(io_schedule); 4517 4518 long __sched io_schedule_timeout(long timeout) ··· 4615 4616 static inline struct task_struct *eldest_child(struct task_struct *p) 4617 { 4618 + if (list_empty(&p->children)) 4619 + return NULL; 4620 return list_entry(p->children.next,struct task_struct,sibling); 4621 } 4622 4623 static inline struct task_struct *older_sibling(struct task_struct *p) 4624 { 4625 + if (p->sibling.prev==&p->parent->children) 4626 + return NULL; 4627 return list_entry(p->sibling.prev,struct task_struct,sibling); 4628 } 4629 4630 static inline struct task_struct *younger_sibling(struct task_struct *p) 4631 { 4632 + if (p->sibling.next==&p->parent->children) 4633 + return NULL; 4634 return list_entry(p->sibling.next,struct task_struct,sibling); 4635 } 4636 ··· 4786 int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4787 { 4788 unsigned long flags; 4789 migration_req_t req; 4790 runqueue_t *rq; 4791 + int ret = 0; 4792 4793 rq = task_rq_lock(p, &flags); 4794 if (!cpus_intersects(new_mask, cpu_online_map)) { ··· 4811 } 4812 out: 4813 task_rq_unlock(rq, &flags); 4814 + 4815 return ret; 4816 } 4817 EXPORT_SYMBOL_GPL(set_cpus_allowed); 4818 4819 /* ··· 4874 */ 4875 static int migration_thread(void *data) 4876 { 4877 int cpu = (long)data; 4878 + runqueue_t *rq; 4879 4880 rq = cpu_rq(cpu); 4881 BUG_ON(rq->migration_thread != current); ··· 4932 4933 #ifdef CONFIG_HOTPLUG_CPU 4934 /* Figure out where task on dead CPU should go, use force if neccessary. */ 4935 + static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 4936 { 4937 runqueue_t *rq; 4938 unsigned long flags; ··· 4942 restart: 4943 /* On same node? */ 4944 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4945 + cpus_and(mask, mask, p->cpus_allowed); 4946 dest_cpu = any_online_cpu(mask); 4947 4948 /* On any allowed CPU? */ 4949 if (dest_cpu == NR_CPUS) 4950 + dest_cpu = any_online_cpu(p->cpus_allowed); 4951 4952 /* No more Mr. Nice Guy. */ 4953 if (dest_cpu == NR_CPUS) { 4954 + rq = task_rq_lock(p, &flags); 4955 + cpus_setall(p->cpus_allowed); 4956 + dest_cpu = any_online_cpu(p->cpus_allowed); 4957 task_rq_unlock(rq, &flags); 4958 4959 /* ··· 4961 * kernel threads (both mm NULL), since they never 4962 * leave kernel. 4963 */ 4964 + if (p->mm && printk_ratelimit()) 4965 printk(KERN_INFO "process %d (%s) no " 4966 "longer affine to cpu%d\n", 4967 + p->pid, p->comm, dead_cpu); 4968 } 4969 + if (!__migrate_task(p, dead_cpu, dest_cpu)) 4970 goto restart; 4971 } 4972 ··· 4993 /* Run through task list and migrate tasks from the dead cpu. */ 4994 static void migrate_live_tasks(int src_cpu) 4995 { 4996 + struct task_struct *p, *t; 4997 4998 write_lock_irq(&tasklist_lock); 4999 5000 + do_each_thread(t, p) { 5001 + if (p == current) 5002 continue; 5003 5004 + if (task_cpu(p) == src_cpu) 5005 + move_task_off_dead_cpu(src_cpu, p); 5006 + } while_each_thread(t, p); 5007 5008 write_unlock_irq(&tasklist_lock); 5009 } 5010 5011 /* Schedules idle task to be the next runnable task on current CPU. 5012 * It does so by boosting its priority to highest possible and adding it to 5013 + * the _front_ of the runqueue. Used by CPU offline code. 5014 */ 5015 void sched_idle_next(void) 5016 { 5017 + int this_cpu = smp_processor_id(); 5018 + runqueue_t *rq = cpu_rq(this_cpu); 5019 struct task_struct *p = rq->idle; 5020 unsigned long flags; 5021 5022 /* cpu has to be offline */ 5023 + BUG_ON(cpu_online(this_cpu)); 5024 5025 + /* 5026 + * Strictly not necessary since rest of the CPUs are stopped by now 5027 + * and interrupts disabled on the current cpu. 5028 */ 5029 spin_lock_irqsave(&rq->lock, flags); 5030 5031 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5032 + 5033 + /* Add idle task to the _front_ of its priority queue: */ 5034 __activate_idle_task(p, rq); 5035 5036 spin_unlock_irqrestore(&rq->lock, flags); 5037 } 5038 5039 + /* 5040 + * Ensures that the idle task is using init_mm right before its cpu goes 5041 * offline. 5042 */ 5043 void idle_task_exit(void) ··· 5048 mmdrop(mm); 5049 } 5050 5051 + static void migrate_dead(unsigned int dead_cpu, task_t *p) 5052 { 5053 struct runqueue *rq = cpu_rq(dead_cpu); 5054 5055 /* Must be exiting, otherwise would be on tasklist. */ 5056 + BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); 5057 5058 /* Cannot have done final schedule yet: would have vanished. */ 5059 + BUG_ON(p->flags & PF_DEAD); 5060 5061 + get_task_struct(p); 5062 5063 /* 5064 * Drop lock around migration; if someone else moves it, ··· 5066 * fine. 5067 */ 5068 spin_unlock_irq(&rq->lock); 5069 + move_task_off_dead_cpu(dead_cpu, p); 5070 spin_lock_irq(&rq->lock); 5071 5072 + put_task_struct(p); 5073 } 5074 5075 /* release_task() removes task from tasklist, so we won't find dead tasks. */ 5076 static void migrate_dead_tasks(unsigned int dead_cpu) 5077 { 5078 struct runqueue *rq = cpu_rq(dead_cpu); 5079 + unsigned int arr, i; 5080 5081 for (arr = 0; arr < 2; arr++) { 5082 for (i = 0; i < MAX_PRIO; i++) { 5083 struct list_head *list = &rq->arrays[arr].queue[i]; 5084 + 5085 while (!list_empty(list)) 5086 migrate_dead(dead_cpu, 5087 list_entry(list->next, task_t, ··· 5094 * migration_call - callback that gets triggered when a CPU is added. 5095 * Here we can start up the necessary migration thread for the new CPU. 5096 */ 5097 + static int __cpuinit 5098 + migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) 5099 { 5100 struct task_struct *p; 5101 + int cpu = (long)hcpu; 5102 struct runqueue *rq; 5103 unsigned long flags; 5104 ··· 5116 task_rq_unlock(rq, &flags); 5117 cpu_rq(cpu)->migration_thread = p; 5118 break; 5119 + 5120 case CPU_ONLINE: 5121 /* Strictly unneccessary, as first user will wake it. */ 5122 wake_up_process(cpu_rq(cpu)->migration_thread); 5123 break; 5124 + 5125 #ifdef CONFIG_HOTPLUG_CPU 5126 case CPU_UP_CANCELED: 5127 if (!cpu_rq(cpu)->migration_thread) ··· 5130 kthread_stop(cpu_rq(cpu)->migration_thread); 5131 cpu_rq(cpu)->migration_thread = NULL; 5132 break; 5133 + 5134 case CPU_DEAD: 5135 migrate_live_tasks(cpu); 5136 rq = cpu_rq(cpu); ··· 5174 int __init migration_init(void) 5175 { 5176 void *cpu = (void *)(long)smp_processor_id(); 5177 + 5178 + /* Start one for the boot CPU: */ 5179 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5180 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5181 register_cpu_notifier(&migration_notifier); 5182 + 5183 return 0; 5184 } 5185 #endif ··· 5273 } while (sd); 5274 } 5275 #else 5276 + # define sched_domain_debug(sd, cpu) do { } while (0) 5277 #endif 5278 5279 static int sd_degenerate(struct sched_domain *sd) ··· 5299 return 1; 5300 } 5301 5302 + static int 5303 + sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) 5304 { 5305 unsigned long cflags = sd->flags, pflags = parent->flags; 5306 ··· 5595 /* 5596 * Measure the cache-cost of one task migration. Returns in units of nsec. 5597 */ 5598 + static unsigned long long 5599 + measure_one(void *cache, unsigned long size, int source, int target) 5600 { 5601 cpumask_t mask, saved_mask; 5602 unsigned long long t0, t1, t2, t3, cost; ··· 5946 */ 5947 static cpumask_t sched_domain_node_span(int node) 5948 { 5949 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5950 + cpumask_t span, nodemask; 5951 + int i; 5952 5953 cpus_clear(span); 5954 bitmap_zero(used_nodes, MAX_NUMNODES); ··· 5959 5960 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5961 int next_node = find_next_best_node(node, used_nodes); 5962 + 5963 nodemask = node_to_cpumask(next_node); 5964 cpus_or(span, span, nodemask); 5965 } ··· 5968 #endif 5969 5970 int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5971 + 5972 /* 5973 + * SMT sched-domains: 5974 */ 5975 #ifdef CONFIG_SCHED_SMT 5976 static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 5977 static struct sched_group sched_group_cpus[NR_CPUS]; 5978 + 5979 static int cpu_to_cpu_group(int cpu) 5980 { 5981 return cpu; 5982 } 5983 #endif 5984 5985 + /* 5986 + * multi-core sched-domains: 5987 + */ 5988 #ifdef CONFIG_SCHED_MC 5989 static DEFINE_PER_CPU(struct sched_domain, core_domains); 5990 static struct sched_group *sched_group_core_bycpu[NR_CPUS]; ··· 6000 6001 static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6002 static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6003 + 6004 static int cpu_to_phys_group(int cpu) 6005 { 6006 + #ifdef CONFIG_SCHED_MC 6007 cpumask_t mask = cpu_coregroup_map(cpu); 6008 return first_cpu(mask); 6009 #elif defined(CONFIG_SCHED_SMT) ··· 6548 int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6549 { 6550 int err = 0; 6551 + 6552 #ifdef CONFIG_SCHED_SMT 6553 if (smt_capable()) 6554 err = sysfs_create_file(&cls->kset.kobj, ··· 6567 { 6568 return sprintf(page, "%u\n", sched_mc_power_savings); 6569 } 6570 + static ssize_t sched_mc_power_savings_store(struct sys_device *dev, 6571 + const char *buf, size_t count) 6572 { 6573 return sched_power_savings_store(buf, count, 0); 6574 } ··· 6580 { 6581 return sprintf(page, "%u\n", sched_smt_power_savings); 6582 } 6583 + static ssize_t sched_smt_power_savings_store(struct sys_device *dev, 6584 + const char *buf, size_t count) 6585 { 6586 return sched_power_savings_store(buf, count, 1); 6587 } ··· 6642 { 6643 /* Linker adds these: start and end of __sched functions */ 6644 extern char __sched_text_start[], __sched_text_end[]; 6645 + 6646 return in_lock_functions(addr) || 6647 (addr >= (unsigned long)__sched_text_start 6648 && addr < (unsigned long)__sched_text_end); ··· 6649 6650 void __init sched_init(void) 6651 { 6652 int i, j, k; 6653 6654 for_each_possible_cpu(i) { 6655 prio_array_t *array; 6656 + runqueue_t *rq; 6657 6658 rq = cpu_rq(i); 6659 spin_lock_init(&rq->lock); ··· 6704 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6705 void __might_sleep(char *file, int line) 6706 { 6707 + #ifdef in_atomic 6708 static unsigned long prev_jiffy; /* ratelimiting */ 6709 6710 if ((in_atomic() || irqs_disabled()) &&