Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge remote-tracking branch 'tip/smp/hotplug' into next.2012.09.25b

The conflicts between kernel/rcutree.h and kernel/rcutree_plugin.h
were due to adjacent insertions and deletions, which were resolved
by simply accepting the changes on both branches.

+764 -814
+97 -169
drivers/infiniband/hw/ehca/ehca_irq.c
··· 42 42 */ 43 43 44 44 #include <linux/slab.h> 45 + #include <linux/smpboot.h> 45 46 46 47 #include "ehca_classes.h" 47 48 #include "ehca_irq.h" ··· 653 652 ehca_process_eq((struct ehca_shca*)data, 1); 654 653 } 655 654 656 - static inline int find_next_online_cpu(struct ehca_comp_pool *pool) 655 + static int find_next_online_cpu(struct ehca_comp_pool *pool) 657 656 { 658 657 int cpu; 659 658 unsigned long flags; ··· 663 662 ehca_dmp(cpu_online_mask, cpumask_size(), ""); 664 663 665 664 spin_lock_irqsave(&pool->last_cpu_lock, flags); 666 - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); 667 - if (cpu >= nr_cpu_ids) 668 - cpu = cpumask_first(cpu_online_mask); 669 - pool->last_cpu = cpu; 665 + do { 666 + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); 667 + if (cpu >= nr_cpu_ids) 668 + cpu = cpumask_first(cpu_online_mask); 669 + pool->last_cpu = cpu; 670 + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); 670 671 spin_unlock_irqrestore(&pool->last_cpu_lock, flags); 671 672 672 673 return cpu; 673 674 } 674 675 675 676 static void __queue_comp_task(struct ehca_cq *__cq, 676 - struct ehca_cpu_comp_task *cct) 677 + struct ehca_cpu_comp_task *cct, 678 + struct task_struct *thread) 677 679 { 678 680 unsigned long flags; 679 681 ··· 687 683 __cq->nr_callbacks++; 688 684 list_add_tail(&__cq->entry, &cct->cq_list); 689 685 cct->cq_jobs++; 690 - wake_up(&cct->wait_queue); 686 + wake_up_process(thread); 691 687 } else 692 688 __cq->nr_callbacks++; 693 689 ··· 699 695 { 700 696 int cpu_id; 701 697 struct ehca_cpu_comp_task *cct; 698 + struct task_struct *thread; 702 699 int cq_jobs; 703 700 unsigned long flags; 704 701 ··· 707 702 BUG_ON(!cpu_online(cpu_id)); 708 703 709 704 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 710 - BUG_ON(!cct); 705 + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); 706 + BUG_ON(!cct || !thread); 711 707 712 708 spin_lock_irqsave(&cct->task_lock, flags); 713 709 cq_jobs = cct->cq_jobs; ··· 716 710 if (cq_jobs > 0) { 717 711 cpu_id = find_next_online_cpu(pool); 718 712 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 719 - BUG_ON(!cct); 713 + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); 714 + BUG_ON(!cct || !thread); 720 715 } 721 - 722 - __queue_comp_task(__cq, cct); 716 + __queue_comp_task(__cq, cct, thread); 723 717 } 724 718 725 719 static void run_comp_task(struct ehca_cpu_comp_task *cct) 726 720 { 727 721 struct ehca_cq *cq; 728 - unsigned long flags; 729 - 730 - spin_lock_irqsave(&cct->task_lock, flags); 731 722 732 723 while (!list_empty(&cct->cq_list)) { 733 724 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 734 - spin_unlock_irqrestore(&cct->task_lock, flags); 725 + spin_unlock_irq(&cct->task_lock); 735 726 736 727 comp_event_callback(cq); 737 728 if (atomic_dec_and_test(&cq->nr_events)) 738 729 wake_up(&cq->wait_completion); 739 730 740 - spin_lock_irqsave(&cct->task_lock, flags); 731 + spin_lock_irq(&cct->task_lock); 741 732 spin_lock(&cq->task_lock); 742 733 cq->nr_callbacks--; 743 734 if (!cq->nr_callbacks) { ··· 743 740 } 744 741 spin_unlock(&cq->task_lock); 745 742 } 746 - 747 - spin_unlock_irqrestore(&cct->task_lock, flags); 748 743 } 749 744 750 - static int comp_task(void *__cct) 751 - { 752 - struct ehca_cpu_comp_task *cct = __cct; 753 - int cql_empty; 754 - DECLARE_WAITQUEUE(wait, current); 755 - 756 - set_current_state(TASK_INTERRUPTIBLE); 757 - while (!kthread_should_stop()) { 758 - add_wait_queue(&cct->wait_queue, &wait); 759 - 760 - spin_lock_irq(&cct->task_lock); 761 - cql_empty = list_empty(&cct->cq_list); 762 - spin_unlock_irq(&cct->task_lock); 763 - if (cql_empty) 764 - schedule(); 765 - else 766 - __set_current_state(TASK_RUNNING); 767 - 768 - remove_wait_queue(&cct->wait_queue, &wait); 769 - 770 - spin_lock_irq(&cct->task_lock); 771 - cql_empty = list_empty(&cct->cq_list); 772 - spin_unlock_irq(&cct->task_lock); 773 - if (!cql_empty) 774 - run_comp_task(__cct); 775 - 776 - set_current_state(TASK_INTERRUPTIBLE); 777 - } 778 - __set_current_state(TASK_RUNNING); 779 - 780 - return 0; 781 - } 782 - 783 - static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, 784 - int cpu) 785 - { 786 - struct ehca_cpu_comp_task *cct; 787 - 788 - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 789 - spin_lock_init(&cct->task_lock); 790 - INIT_LIST_HEAD(&cct->cq_list); 791 - init_waitqueue_head(&cct->wait_queue); 792 - cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), 793 - "ehca_comp/%d", cpu); 794 - 795 - return cct->task; 796 - } 797 - 798 - static void destroy_comp_task(struct ehca_comp_pool *pool, 799 - int cpu) 800 - { 801 - struct ehca_cpu_comp_task *cct; 802 - struct task_struct *task; 803 - unsigned long flags_cct; 804 - 805 - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 806 - 807 - spin_lock_irqsave(&cct->task_lock, flags_cct); 808 - 809 - task = cct->task; 810 - cct->task = NULL; 811 - cct->cq_jobs = 0; 812 - 813 - spin_unlock_irqrestore(&cct->task_lock, flags_cct); 814 - 815 - if (task) 816 - kthread_stop(task); 817 - } 818 - 819 - static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) 745 + static void comp_task_park(unsigned int cpu) 820 746 { 821 747 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 748 + struct ehca_cpu_comp_task *target; 749 + struct task_struct *thread; 750 + struct ehca_cq *cq, *tmp; 822 751 LIST_HEAD(list); 823 - struct ehca_cq *cq; 824 - unsigned long flags_cct; 825 752 826 - spin_lock_irqsave(&cct->task_lock, flags_cct); 827 - 753 + spin_lock_irq(&cct->task_lock); 754 + cct->cq_jobs = 0; 755 + cct->active = 0; 828 756 list_splice_init(&cct->cq_list, &list); 757 + spin_unlock_irq(&cct->task_lock); 829 758 830 - while (!list_empty(&list)) { 831 - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 832 - 759 + cpu = find_next_online_cpu(pool); 760 + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 761 + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); 762 + spin_lock_irq(&target->task_lock); 763 + list_for_each_entry_safe(cq, tmp, &list, entry) { 833 764 list_del(&cq->entry); 834 - __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks)); 765 + __queue_comp_task(cq, target, thread); 835 766 } 836 - 837 - spin_unlock_irqrestore(&cct->task_lock, flags_cct); 838 - 767 + spin_unlock_irq(&target->task_lock); 839 768 } 840 769 841 - static int __cpuinit comp_pool_callback(struct notifier_block *nfb, 842 - unsigned long action, 843 - void *hcpu) 770 + static void comp_task_stop(unsigned int cpu, bool online) 844 771 { 845 - unsigned int cpu = (unsigned long)hcpu; 846 - struct ehca_cpu_comp_task *cct; 772 + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 847 773 848 - switch (action) { 849 - case CPU_UP_PREPARE: 850 - case CPU_UP_PREPARE_FROZEN: 851 - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); 852 - if (!create_comp_task(pool, cpu)) { 853 - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); 854 - return notifier_from_errno(-ENOMEM); 855 - } 856 - break; 857 - case CPU_UP_CANCELED: 858 - case CPU_UP_CANCELED_FROZEN: 859 - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); 860 - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 861 - kthread_bind(cct->task, cpumask_any(cpu_online_mask)); 862 - destroy_comp_task(pool, cpu); 863 - break; 864 - case CPU_ONLINE: 865 - case CPU_ONLINE_FROZEN: 866 - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); 867 - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 868 - kthread_bind(cct->task, cpu); 869 - wake_up_process(cct->task); 870 - break; 871 - case CPU_DOWN_PREPARE: 872 - case CPU_DOWN_PREPARE_FROZEN: 873 - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); 874 - break; 875 - case CPU_DOWN_FAILED: 876 - case CPU_DOWN_FAILED_FROZEN: 877 - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); 878 - break; 879 - case CPU_DEAD: 880 - case CPU_DEAD_FROZEN: 881 - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); 882 - destroy_comp_task(pool, cpu); 883 - take_over_work(pool, cpu); 884 - break; 885 - } 886 - 887 - return NOTIFY_OK; 774 + spin_lock_irq(&cct->task_lock); 775 + cct->cq_jobs = 0; 776 + cct->active = 0; 777 + WARN_ON(!list_empty(&cct->cq_list)); 778 + spin_unlock_irq(&cct->task_lock); 888 779 } 889 780 890 - static struct notifier_block comp_pool_callback_nb __cpuinitdata = { 891 - .notifier_call = comp_pool_callback, 892 - .priority = 0, 781 + static int comp_task_should_run(unsigned int cpu) 782 + { 783 + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 784 + 785 + return cct->cq_jobs; 786 + } 787 + 788 + static void comp_task(unsigned int cpu) 789 + { 790 + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); 791 + int cql_empty; 792 + 793 + spin_lock_irq(&cct->task_lock); 794 + cql_empty = list_empty(&cct->cq_list); 795 + if (!cql_empty) { 796 + __set_current_state(TASK_RUNNING); 797 + run_comp_task(cct); 798 + } 799 + spin_unlock_irq(&cct->task_lock); 800 + } 801 + 802 + static struct smp_hotplug_thread comp_pool_threads = { 803 + .thread_should_run = comp_task_should_run, 804 + .thread_fn = comp_task, 805 + .thread_comm = "ehca_comp/%u", 806 + .cleanup = comp_task_stop, 807 + .park = comp_task_park, 893 808 }; 894 809 895 810 int ehca_create_comp_pool(void) 896 811 { 897 - int cpu; 898 - struct task_struct *task; 812 + int cpu, ret = -ENOMEM; 899 813 900 814 if (!ehca_scaling_code) 901 815 return 0; ··· 825 905 pool->last_cpu = cpumask_any(cpu_online_mask); 826 906 827 907 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); 828 - if (pool->cpu_comp_tasks == NULL) { 829 - kfree(pool); 830 - return -EINVAL; 908 + if (!pool->cpu_comp_tasks) 909 + goto out_pool; 910 + 911 + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); 912 + if (!pool->cpu_comp_threads) 913 + goto out_tasks; 914 + 915 + for_each_present_cpu(cpu) { 916 + struct ehca_cpu_comp_task *cct; 917 + 918 + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 919 + spin_lock_init(&cct->task_lock); 920 + INIT_LIST_HEAD(&cct->cq_list); 831 921 } 832 922 833 - for_each_online_cpu(cpu) { 834 - task = create_comp_task(pool, cpu); 835 - if (task) { 836 - kthread_bind(task, cpu); 837 - wake_up_process(task); 838 - } 839 - } 923 + comp_pool_threads.store = pool->cpu_comp_threads; 924 + ret = smpboot_register_percpu_thread(&comp_pool_threads); 925 + if (ret) 926 + goto out_threads; 840 927 841 - register_hotcpu_notifier(&comp_pool_callback_nb); 928 + pr_info("eHCA scaling code enabled\n"); 929 + return ret; 842 930 843 - printk(KERN_INFO "eHCA scaling code enabled\n"); 844 - 845 - return 0; 931 + out_threads: 932 + free_percpu(pool->cpu_comp_threads); 933 + out_tasks: 934 + free_percpu(pool->cpu_comp_tasks); 935 + out_pool: 936 + kfree(pool); 937 + return ret; 846 938 } 847 939 848 940 void ehca_destroy_comp_pool(void) 849 941 { 850 - int i; 851 - 852 942 if (!ehca_scaling_code) 853 943 return; 854 944 855 - unregister_hotcpu_notifier(&comp_pool_callback_nb); 945 + smpboot_unregister_percpu_thread(&comp_pool_threads); 856 946 857 - for_each_online_cpu(i) 858 - destroy_comp_task(pool, i); 859 - 947 + free_percpu(pool->cpu_comp_threads); 860 948 free_percpu(pool->cpu_comp_tasks); 861 949 kfree(pool); 862 950 }
+3 -3
drivers/infiniband/hw/ehca/ehca_irq.h
··· 58 58 void ehca_process_eq(struct ehca_shca *shca, int is_irq); 59 59 60 60 struct ehca_cpu_comp_task { 61 - wait_queue_head_t wait_queue; 62 61 struct list_head cq_list; 63 - struct task_struct *task; 64 62 spinlock_t task_lock; 65 63 int cq_jobs; 64 + int active; 66 65 }; 67 66 68 67 struct ehca_comp_pool { 69 - struct ehca_cpu_comp_task *cpu_comp_tasks; 68 + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; 69 + struct task_struct * __percpu *cpu_comp_threads; 70 70 int last_cpu; 71 71 spinlock_t last_cpu_lock; 72 72 };
+10 -1
include/linux/kthread.h
··· 14 14 kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) 15 15 16 16 17 + struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), 18 + void *data, 19 + unsigned int cpu, 20 + const char *namefmt); 21 + 17 22 /** 18 23 * kthread_run - create and wake a thread. 19 24 * @threadfn: the function to run until signal_pending(current). ··· 39 34 40 35 void kthread_bind(struct task_struct *k, unsigned int cpu); 41 36 int kthread_stop(struct task_struct *k); 42 - int kthread_should_stop(void); 37 + bool kthread_should_stop(void); 38 + bool kthread_should_park(void); 43 39 bool kthread_freezable_should_stop(bool *was_frozen); 44 40 void *kthread_data(struct task_struct *k); 41 + int kthread_park(struct task_struct *k); 42 + void kthread_unpark(struct task_struct *k); 43 + void kthread_parkme(void); 45 44 46 45 int kthreadd(void *unused); 47 46 extern struct task_struct *kthreadd_task;
+43
include/linux/smpboot.h
··· 1 + #ifndef _LINUX_SMPBOOT_H 2 + #define _LINUX_SMPBOOT_H 3 + 4 + #include <linux/types.h> 5 + 6 + struct task_struct; 7 + /* Cookie handed to the thread_fn*/ 8 + struct smpboot_thread_data; 9 + 10 + /** 11 + * struct smp_hotplug_thread - CPU hotplug related thread descriptor 12 + * @store: Pointer to per cpu storage for the task pointers 13 + * @list: List head for core management 14 + * @thread_should_run: Check whether the thread should run or not. Called with 15 + * preemption disabled. 16 + * @thread_fn: The associated thread function 17 + * @setup: Optional setup function, called when the thread gets 18 + * operational the first time 19 + * @cleanup: Optional cleanup function, called when the thread 20 + * should stop (module exit) 21 + * @park: Optional park function, called when the thread is 22 + * parked (cpu offline) 23 + * @unpark: Optional unpark function, called when the thread is 24 + * unparked (cpu online) 25 + * @thread_comm: The base name of the thread 26 + */ 27 + struct smp_hotplug_thread { 28 + struct task_struct __percpu **store; 29 + struct list_head list; 30 + int (*thread_should_run)(unsigned int cpu); 31 + void (*thread_fn)(unsigned int cpu); 32 + void (*setup)(unsigned int cpu); 33 + void (*cleanup)(unsigned int cpu, bool online); 34 + void (*park)(unsigned int cpu); 35 + void (*unpark)(unsigned int cpu); 36 + const char *thread_comm; 37 + }; 38 + 39 + int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); 40 + void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); 41 + int smpboot_thread_schedule(void); 42 + 43 + #endif
+1 -2
kernel/Makefile
··· 10 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 11 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 12 12 notifier.o ksysfs.o cred.o \ 13 - async.o range.o groups.o lglock.o 13 + async.o range.o groups.o lglock.o smpboot.o 14 14 15 15 ifdef CONFIG_FUNCTION_TRACER 16 16 # Do not trace debug files and internal ftrace files ··· 46 46 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 47 47 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 48 48 obj-$(CONFIG_SMP) += smp.o 49 - obj-$(CONFIG_SMP) += smpboot.o 50 49 ifneq ($(CONFIG_SMP),y) 51 50 obj-y += up.o 52 51 endif
+9 -1
kernel/cpu.c
··· 280 280 __func__, cpu); 281 281 goto out_release; 282 282 } 283 + smpboot_park_threads(cpu); 283 284 284 285 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 285 286 if (err) { 286 287 /* CPU didn't die: tell everyone. Can't complain. */ 288 + smpboot_unpark_threads(cpu); 287 289 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); 288 - 289 290 goto out_release; 290 291 } 291 292 BUG_ON(cpu_online(cpu)); ··· 355 354 goto out; 356 355 } 357 356 357 + ret = smpboot_create_threads(cpu); 358 + if (ret) 359 + goto out; 360 + 358 361 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); 359 362 if (ret) { 360 363 nr_calls--; ··· 372 367 if (ret != 0) 373 368 goto out_notify; 374 369 BUG_ON(!cpu_online(cpu)); 370 + 371 + /* Wake the per cpu threads */ 372 + smpboot_unpark_threads(cpu); 375 373 376 374 /* Now call notifier in preparation. */ 377 375 cpu_notify(CPU_ONLINE | mod, hcpu);
+166 -19
kernel/kthread.c
··· 37 37 }; 38 38 39 39 struct kthread { 40 - int should_stop; 40 + unsigned long flags; 41 + unsigned int cpu; 41 42 void *data; 43 + struct completion parked; 42 44 struct completion exited; 45 + }; 46 + 47 + enum KTHREAD_BITS { 48 + KTHREAD_IS_PER_CPU = 0, 49 + KTHREAD_SHOULD_STOP, 50 + KTHREAD_SHOULD_PARK, 51 + KTHREAD_IS_PARKED, 43 52 }; 44 53 45 54 #define to_kthread(tsk) \ ··· 61 52 * and this will return true. You should then return, and your return 62 53 * value will be passed through to kthread_stop(). 63 54 */ 64 - int kthread_should_stop(void) 55 + bool kthread_should_stop(void) 65 56 { 66 - return to_kthread(current)->should_stop; 57 + return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags); 67 58 } 68 59 EXPORT_SYMBOL(kthread_should_stop); 60 + 61 + /** 62 + * kthread_should_park - should this kthread park now? 63 + * 64 + * When someone calls kthread_park() on your kthread, it will be woken 65 + * and this will return true. You should then do the necessary 66 + * cleanup and call kthread_parkme() 67 + * 68 + * Similar to kthread_should_stop(), but this keeps the thread alive 69 + * and in a park position. kthread_unpark() "restarts" the thread and 70 + * calls the thread function again. 71 + */ 72 + bool kthread_should_park(void) 73 + { 74 + return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags); 75 + } 69 76 70 77 /** 71 78 * kthread_freezable_should_stop - should this freezable kthread return now? ··· 121 96 return to_kthread(task)->data; 122 97 } 123 98 99 + static void __kthread_parkme(struct kthread *self) 100 + { 101 + __set_current_state(TASK_INTERRUPTIBLE); 102 + while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { 103 + if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) 104 + complete(&self->parked); 105 + schedule(); 106 + __set_current_state(TASK_INTERRUPTIBLE); 107 + } 108 + clear_bit(KTHREAD_IS_PARKED, &self->flags); 109 + __set_current_state(TASK_RUNNING); 110 + } 111 + 112 + void kthread_parkme(void) 113 + { 114 + __kthread_parkme(to_kthread(current)); 115 + } 116 + 124 117 static int kthread(void *_create) 125 118 { 126 119 /* Copy data: it's on kthread's stack */ ··· 148 105 struct kthread self; 149 106 int ret; 150 107 151 - self.should_stop = 0; 108 + self.flags = 0; 152 109 self.data = data; 153 110 init_completion(&self.exited); 111 + init_completion(&self.parked); 154 112 current->vfork_done = &self.exited; 155 113 156 114 /* OK, tell user we're spawned, wait for stop or wakeup */ ··· 161 117 schedule(); 162 118 163 119 ret = -EINTR; 164 - if (!self.should_stop) 165 - ret = threadfn(data); 166 120 121 + if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { 122 + __kthread_parkme(&self); 123 + ret = threadfn(data); 124 + } 167 125 /* we can't just return, we must preserve "self" on stack */ 168 126 do_exit(ret); 169 127 } ··· 218 172 * Returns a task_struct or ERR_PTR(-ENOMEM). 219 173 */ 220 174 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), 221 - void *data, 222 - int node, 175 + void *data, int node, 223 176 const char namefmt[], 224 177 ...) 225 178 { ··· 255 210 } 256 211 EXPORT_SYMBOL(kthread_create_on_node); 257 212 213 + static void __kthread_bind(struct task_struct *p, unsigned int cpu) 214 + { 215 + /* It's safe because the task is inactive. */ 216 + do_set_cpus_allowed(p, cpumask_of(cpu)); 217 + p->flags |= PF_THREAD_BOUND; 218 + } 219 + 258 220 /** 259 221 * kthread_bind - bind a just-created kthread to a cpu. 260 222 * @p: thread created by kthread_create(). ··· 278 226 WARN_ON(1); 279 227 return; 280 228 } 281 - 282 - /* It's safe because the task is inactive. */ 283 - do_set_cpus_allowed(p, cpumask_of(cpu)); 284 - p->flags |= PF_THREAD_BOUND; 229 + __kthread_bind(p, cpu); 285 230 } 286 231 EXPORT_SYMBOL(kthread_bind); 232 + 233 + /** 234 + * kthread_create_on_cpu - Create a cpu bound kthread 235 + * @threadfn: the function to run until signal_pending(current). 236 + * @data: data ptr for @threadfn. 237 + * @cpu: The cpu on which the thread should be bound, 238 + * @namefmt: printf-style name for the thread. Format is restricted 239 + * to "name.*%u". Code fills in cpu number. 240 + * 241 + * Description: This helper function creates and names a kernel thread 242 + * The thread will be woken and put into park mode. 243 + */ 244 + struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), 245 + void *data, unsigned int cpu, 246 + const char *namefmt) 247 + { 248 + struct task_struct *p; 249 + 250 + p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt, 251 + cpu); 252 + if (IS_ERR(p)) 253 + return p; 254 + set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); 255 + to_kthread(p)->cpu = cpu; 256 + /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */ 257 + kthread_park(p); 258 + return p; 259 + } 260 + 261 + static struct kthread *task_get_live_kthread(struct task_struct *k) 262 + { 263 + struct kthread *kthread; 264 + 265 + get_task_struct(k); 266 + kthread = to_kthread(k); 267 + /* It might have exited */ 268 + barrier(); 269 + if (k->vfork_done != NULL) 270 + return kthread; 271 + return NULL; 272 + } 273 + 274 + /** 275 + * kthread_unpark - unpark a thread created by kthread_create(). 276 + * @k: thread created by kthread_create(). 277 + * 278 + * Sets kthread_should_park() for @k to return false, wakes it, and 279 + * waits for it to return. If the thread is marked percpu then its 280 + * bound to the cpu again. 281 + */ 282 + void kthread_unpark(struct task_struct *k) 283 + { 284 + struct kthread *kthread = task_get_live_kthread(k); 285 + 286 + if (kthread) { 287 + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 288 + /* 289 + * We clear the IS_PARKED bit here as we don't wait 290 + * until the task has left the park code. So if we'd 291 + * park before that happens we'd see the IS_PARKED bit 292 + * which might be about to be cleared. 293 + */ 294 + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { 295 + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) 296 + __kthread_bind(k, kthread->cpu); 297 + wake_up_process(k); 298 + } 299 + } 300 + put_task_struct(k); 301 + } 302 + 303 + /** 304 + * kthread_park - park a thread created by kthread_create(). 305 + * @k: thread created by kthread_create(). 306 + * 307 + * Sets kthread_should_park() for @k to return true, wakes it, and 308 + * waits for it to return. This can also be called after kthread_create() 309 + * instead of calling wake_up_process(): the thread will park without 310 + * calling threadfn(). 311 + * 312 + * Returns 0 if the thread is parked, -ENOSYS if the thread exited. 313 + * If called by the kthread itself just the park bit is set. 314 + */ 315 + int kthread_park(struct task_struct *k) 316 + { 317 + struct kthread *kthread = task_get_live_kthread(k); 318 + int ret = -ENOSYS; 319 + 320 + if (kthread) { 321 + if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { 322 + set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 323 + if (k != current) { 324 + wake_up_process(k); 325 + wait_for_completion(&kthread->parked); 326 + } 327 + } 328 + ret = 0; 329 + } 330 + put_task_struct(k); 331 + return ret; 332 + } 287 333 288 334 /** 289 335 * kthread_stop - stop a thread created by kthread_create(). ··· 400 250 */ 401 251 int kthread_stop(struct task_struct *k) 402 252 { 403 - struct kthread *kthread; 253 + struct kthread *kthread = task_get_live_kthread(k); 404 254 int ret; 405 255 406 256 trace_sched_kthread_stop(k); 407 - get_task_struct(k); 408 - 409 - kthread = to_kthread(k); 410 - barrier(); /* it might have exited */ 411 - if (k->vfork_done != NULL) { 412 - kthread->should_stop = 1; 257 + if (kthread) { 258 + set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); 259 + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 413 260 wake_up_process(k); 414 261 wait_for_completion(&kthread->exited); 415 262 }
+4 -8
kernel/rcutree.c
··· 134 134 */ 135 135 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); 136 136 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 137 - DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 138 137 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 139 138 DEFINE_PER_CPU(char, rcu_cpu_has_work); 140 139 141 140 #endif /* #ifdef CONFIG_RCU_BOOST */ 142 141 143 - static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 142 + static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 144 143 static void invoke_rcu_core(void); 145 144 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 146 145 ··· 1542 1543 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 1543 1544 1544 1545 /* Adjust any no-longer-needed kthreads. */ 1545 - rcu_stop_cpu_kthread(cpu); 1546 - rcu_node_kthread_setaffinity(rnp, -1); 1546 + rcu_boost_kthread_setaffinity(rnp, -1); 1547 1547 1548 1548 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ 1549 1549 ··· 2570 2572 break; 2571 2573 case CPU_ONLINE: 2572 2574 case CPU_DOWN_FAILED: 2573 - rcu_node_kthread_setaffinity(rnp, -1); 2574 - rcu_cpu_kthread_setrt(cpu, 1); 2575 + rcu_boost_kthread_setaffinity(rnp, -1); 2575 2576 break; 2576 2577 case CPU_DOWN_PREPARE: 2577 - rcu_node_kthread_setaffinity(rnp, cpu); 2578 - rcu_cpu_kthread_setrt(cpu, 0); 2578 + rcu_boost_kthread_setaffinity(rnp, cpu); 2579 2579 break; 2580 2580 case CPU_DYING: 2581 2581 case CPU_DYING_FROZEN:
+1 -14
kernel/rcutree.h
··· 196 196 /* Refused to boost: not sure why, though. */ 197 197 /* This can happen due to race conditions. */ 198 198 #endif /* #ifdef CONFIG_RCU_BOOST */ 199 - struct task_struct *node_kthread_task; 200 - /* kthread that takes care of this rcu_node */ 201 - /* structure, for example, awakening the */ 202 - /* per-CPU kthreads as needed. */ 203 - unsigned int node_kthread_status; 204 - /* State of node_kthread_task for tracing. */ 205 199 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; 206 200 } ____cacheline_internodealigned_in_smp; 207 201 ··· 459 465 #ifdef CONFIG_HOTPLUG_CPU 460 466 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 461 467 unsigned long flags); 462 - static void rcu_stop_cpu_kthread(int cpu); 463 468 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 464 469 static void rcu_print_detail_task_stall(struct rcu_state *rsp); 465 470 static int rcu_print_task_stall(struct rcu_node *rnp); ··· 481 488 static bool rcu_is_callbacks_kthread(void); 482 489 #ifdef CONFIG_RCU_BOOST 483 490 static void rcu_preempt_do_callbacks(void); 484 - static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 485 - cpumask_var_t cm); 486 491 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 487 - struct rcu_node *rnp, 488 - int rnp_index); 489 - static void invoke_rcu_node_kthread(struct rcu_node *rnp); 490 - static void rcu_yield(void (*f)(unsigned long), unsigned long arg); 492 + struct rcu_node *rnp); 491 493 #endif /* #ifdef CONFIG_RCU_BOOST */ 492 - static void rcu_cpu_kthread_setrt(int cpu, int to_rt); 493 494 static void __cpuinit rcu_prepare_kthreads(int cpu); 494 495 static void rcu_prepare_for_idle_init(int cpu); 495 496 static void rcu_cleanup_after_idle(int cpu);
+73 -334
kernel/rcutree_plugin.h
··· 26 26 27 27 #include <linux/delay.h> 28 28 #include <linux/oom.h> 29 + #include <linux/smpboot.h> 29 30 30 31 #define RCU_KTHREAD_PRIO 1 31 32 ··· 1091 1090 1092 1091 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 1093 1092 1093 + static void rcu_wake_cond(struct task_struct *t, int status) 1094 + { 1095 + /* 1096 + * If the thread is yielding, only wake it when this 1097 + * is invoked from idle 1098 + */ 1099 + if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) 1100 + wake_up_process(t); 1101 + } 1102 + 1094 1103 /* 1095 1104 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1096 1105 * or ->boost_tasks, advancing the pointer to the next task in the ··· 1173 1162 } 1174 1163 1175 1164 /* 1176 - * Timer handler to initiate waking up of boost kthreads that 1177 - * have yielded the CPU due to excessive numbers of tasks to 1178 - * boost. We wake up the per-rcu_node kthread, which in turn 1179 - * will wake up the booster kthread. 1180 - */ 1181 - static void rcu_boost_kthread_timer(unsigned long arg) 1182 - { 1183 - invoke_rcu_node_kthread((struct rcu_node *)arg); 1184 - } 1185 - 1186 - /* 1187 1165 * Priority-boosting kthread. One per leaf rcu_node and one for the 1188 1166 * root rcu_node. 1189 1167 */ ··· 1195 1195 else 1196 1196 spincnt = 0; 1197 1197 if (spincnt > 10) { 1198 + rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; 1198 1199 trace_rcu_utilization("End boost kthread@rcu_yield"); 1199 - rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); 1200 + schedule_timeout_interruptible(2); 1200 1201 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1201 1202 spincnt = 0; 1202 1203 } ··· 1235 1234 rnp->boost_tasks = rnp->gp_tasks; 1236 1235 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1237 1236 t = rnp->boost_kthread_task; 1238 - if (t != NULL) 1239 - wake_up_process(t); 1237 + if (t) 1238 + rcu_wake_cond(t, rnp->boost_kthread_status); 1240 1239 } else { 1241 1240 rcu_initiate_boost_trace(rnp); 1242 1241 raw_spin_unlock_irqrestore(&rnp->lock, flags); ··· 1253 1252 local_irq_save(flags); 1254 1253 __this_cpu_write(rcu_cpu_has_work, 1); 1255 1254 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && 1256 - current != __this_cpu_read(rcu_cpu_kthread_task)) 1257 - wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); 1255 + current != __this_cpu_read(rcu_cpu_kthread_task)) { 1256 + rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), 1257 + __this_cpu_read(rcu_cpu_kthread_status)); 1258 + } 1258 1259 local_irq_restore(flags); 1259 1260 } 1260 1261 ··· 1267 1264 static bool rcu_is_callbacks_kthread(void) 1268 1265 { 1269 1266 return __get_cpu_var(rcu_cpu_kthread_task) == current; 1270 - } 1271 - 1272 - /* 1273 - * Set the affinity of the boost kthread. The CPU-hotplug locks are 1274 - * held, so no one should be messing with the existence of the boost 1275 - * kthread. 1276 - */ 1277 - static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 1278 - cpumask_var_t cm) 1279 - { 1280 - struct task_struct *t; 1281 - 1282 - t = rnp->boost_kthread_task; 1283 - if (t != NULL) 1284 - set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); 1285 1267 } 1286 1268 1287 1269 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) ··· 1285 1297 * Returns zero if all is well, a negated errno otherwise. 1286 1298 */ 1287 1299 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1288 - struct rcu_node *rnp, 1289 - int rnp_index) 1300 + struct rcu_node *rnp) 1290 1301 { 1302 + int rnp_index = rnp - &rsp->node[0]; 1291 1303 unsigned long flags; 1292 1304 struct sched_param sp; 1293 1305 struct task_struct *t; 1294 1306 1295 1307 if (&rcu_preempt_state != rsp) 1296 1308 return 0; 1309 + 1310 + if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) 1311 + return 0; 1312 + 1297 1313 rsp->boost = 1; 1298 1314 if (rnp->boost_kthread_task != NULL) 1299 1315 return 0; ··· 1314 1322 return 0; 1315 1323 } 1316 1324 1317 - #ifdef CONFIG_HOTPLUG_CPU 1318 - 1319 - /* 1320 - * Stop the RCU's per-CPU kthread when its CPU goes offline,. 1321 - */ 1322 - static void rcu_stop_cpu_kthread(int cpu) 1323 - { 1324 - struct task_struct *t; 1325 - 1326 - /* Stop the CPU's kthread. */ 1327 - t = per_cpu(rcu_cpu_kthread_task, cpu); 1328 - if (t != NULL) { 1329 - per_cpu(rcu_cpu_kthread_task, cpu) = NULL; 1330 - kthread_stop(t); 1331 - } 1332 - } 1333 - 1334 - #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1335 - 1336 1325 static void rcu_kthread_do_work(void) 1337 1326 { 1338 1327 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); ··· 1321 1348 rcu_preempt_do_callbacks(); 1322 1349 } 1323 1350 1324 - /* 1325 - * Wake up the specified per-rcu_node-structure kthread. 1326 - * Because the per-rcu_node kthreads are immortal, we don't need 1327 - * to do anything to keep them alive. 1328 - */ 1329 - static void invoke_rcu_node_kthread(struct rcu_node *rnp) 1330 - { 1331 - struct task_struct *t; 1332 - 1333 - t = rnp->node_kthread_task; 1334 - if (t != NULL) 1335 - wake_up_process(t); 1336 - } 1337 - 1338 - /* 1339 - * Set the specified CPU's kthread to run RT or not, as specified by 1340 - * the to_rt argument. The CPU-hotplug locks are held, so the task 1341 - * is not going away. 1342 - */ 1343 - static void rcu_cpu_kthread_setrt(int cpu, int to_rt) 1344 - { 1345 - int policy; 1346 - struct sched_param sp; 1347 - struct task_struct *t; 1348 - 1349 - t = per_cpu(rcu_cpu_kthread_task, cpu); 1350 - if (t == NULL) 1351 - return; 1352 - if (to_rt) { 1353 - policy = SCHED_FIFO; 1354 - sp.sched_priority = RCU_KTHREAD_PRIO; 1355 - } else { 1356 - policy = SCHED_NORMAL; 1357 - sp.sched_priority = 0; 1358 - } 1359 - sched_setscheduler_nocheck(t, policy, &sp); 1360 - } 1361 - 1362 - /* 1363 - * Timer handler to initiate the waking up of per-CPU kthreads that 1364 - * have yielded the CPU due to excess numbers of RCU callbacks. 1365 - * We wake up the per-rcu_node kthread, which in turn will wake up 1366 - * the booster kthread. 1367 - */ 1368 - static void rcu_cpu_kthread_timer(unsigned long arg) 1369 - { 1370 - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1371 - struct rcu_node *rnp = rdp->mynode; 1372 - 1373 - atomic_or(rdp->grpmask, &rnp->wakemask); 1374 - invoke_rcu_node_kthread(rnp); 1375 - } 1376 - 1377 - /* 1378 - * Drop to non-real-time priority and yield, but only after posting a 1379 - * timer that will cause us to regain our real-time priority if we 1380 - * remain preempted. Either way, we restore our real-time priority 1381 - * before returning. 1382 - */ 1383 - static void rcu_yield(void (*f)(unsigned long), unsigned long arg) 1351 + static void rcu_cpu_kthread_setup(unsigned int cpu) 1384 1352 { 1385 1353 struct sched_param sp; 1386 - struct timer_list yield_timer; 1387 - int prio = current->rt_priority; 1388 1354 1389 - setup_timer_on_stack(&yield_timer, f, arg); 1390 - mod_timer(&yield_timer, jiffies + 2); 1391 - sp.sched_priority = 0; 1392 - sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1393 - set_user_nice(current, 19); 1394 - schedule(); 1395 - set_user_nice(current, 0); 1396 - sp.sched_priority = prio; 1355 + sp.sched_priority = RCU_KTHREAD_PRIO; 1397 1356 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1398 - del_timer(&yield_timer); 1399 1357 } 1400 1358 1401 - /* 1402 - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. 1403 - * This can happen while the corresponding CPU is either coming online 1404 - * or going offline. We cannot wait until the CPU is fully online 1405 - * before starting the kthread, because the various notifier functions 1406 - * can wait for RCU grace periods. So we park rcu_cpu_kthread() until 1407 - * the corresponding CPU is online. 1408 - * 1409 - * Return 1 if the kthread needs to stop, 0 otherwise. 1410 - * 1411 - * Caller must disable bh. This function can momentarily enable it. 1412 - */ 1413 - static int rcu_cpu_kthread_should_stop(int cpu) 1359 + static void rcu_cpu_kthread_park(unsigned int cpu) 1414 1360 { 1415 - while (cpu_is_offline(cpu) || 1416 - !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) || 1417 - smp_processor_id() != cpu) { 1418 - if (kthread_should_stop()) 1419 - return 1; 1420 - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1421 - per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); 1422 - local_bh_enable(); 1423 - schedule_timeout_uninterruptible(1); 1424 - if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu))) 1425 - set_cpus_allowed_ptr(current, cpumask_of(cpu)); 1426 - local_bh_disable(); 1427 - } 1428 - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1429 - return 0; 1361 + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1362 + } 1363 + 1364 + static int rcu_cpu_kthread_should_run(unsigned int cpu) 1365 + { 1366 + return __get_cpu_var(rcu_cpu_has_work); 1430 1367 } 1431 1368 1432 1369 /* ··· 1344 1461 * RCU softirq used in flavors and configurations of RCU that do not 1345 1462 * support RCU priority boosting. 1346 1463 */ 1347 - static int rcu_cpu_kthread(void *arg) 1464 + static void rcu_cpu_kthread(unsigned int cpu) 1348 1465 { 1349 - int cpu = (int)(long)arg; 1350 - unsigned long flags; 1351 - int spincnt = 0; 1352 - unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1353 - char work; 1354 - char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1466 + unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); 1467 + char work, *workp = &__get_cpu_var(rcu_cpu_has_work); 1468 + int spincnt; 1355 1469 1356 - trace_rcu_utilization("Start CPU kthread@init"); 1357 - for (;;) { 1358 - *statusp = RCU_KTHREAD_WAITING; 1359 - trace_rcu_utilization("End CPU kthread@rcu_wait"); 1360 - rcu_wait(*workp != 0 || kthread_should_stop()); 1470 + for (spincnt = 0; spincnt < 10; spincnt++) { 1361 1471 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1362 1472 local_bh_disable(); 1363 - if (rcu_cpu_kthread_should_stop(cpu)) { 1364 - local_bh_enable(); 1365 - break; 1366 - } 1367 1473 *statusp = RCU_KTHREAD_RUNNING; 1368 - per_cpu(rcu_cpu_kthread_loops, cpu)++; 1369 - local_irq_save(flags); 1474 + this_cpu_inc(rcu_cpu_kthread_loops); 1475 + local_irq_disable(); 1370 1476 work = *workp; 1371 1477 *workp = 0; 1372 - local_irq_restore(flags); 1478 + local_irq_enable(); 1373 1479 if (work) 1374 1480 rcu_kthread_do_work(); 1375 1481 local_bh_enable(); 1376 - if (*workp != 0) 1377 - spincnt++; 1378 - else 1379 - spincnt = 0; 1380 - if (spincnt > 10) { 1381 - *statusp = RCU_KTHREAD_YIELDING; 1382 - trace_rcu_utilization("End CPU kthread@rcu_yield"); 1383 - rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); 1384 - trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1385 - spincnt = 0; 1482 + if (*workp == 0) { 1483 + trace_rcu_utilization("End CPU kthread@rcu_wait"); 1484 + *statusp = RCU_KTHREAD_WAITING; 1485 + return; 1386 1486 } 1387 1487 } 1388 - *statusp = RCU_KTHREAD_STOPPED; 1389 - trace_rcu_utilization("End CPU kthread@term"); 1390 - return 0; 1391 - } 1392 - 1393 - /* 1394 - * Spawn a per-CPU kthread, setting up affinity and priority. 1395 - * Because the CPU hotplug lock is held, no other CPU will be attempting 1396 - * to manipulate rcu_cpu_kthread_task. There might be another CPU 1397 - * attempting to access it during boot, but the locking in kthread_bind() 1398 - * will enforce sufficient ordering. 1399 - * 1400 - * Please note that we cannot simply refuse to wake up the per-CPU 1401 - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, 1402 - * which can result in softlockup complaints if the task ends up being 1403 - * idle for more than a couple of minutes. 1404 - * 1405 - * However, please note also that we cannot bind the per-CPU kthread to its 1406 - * CPU until that CPU is fully online. We also cannot wait until the 1407 - * CPU is fully online before we create its per-CPU kthread, as this would 1408 - * deadlock the system when CPU notifiers tried waiting for grace 1409 - * periods. So we bind the per-CPU kthread to its CPU only if the CPU 1410 - * is online. If its CPU is not yet fully online, then the code in 1411 - * rcu_cpu_kthread() will wait until it is fully online, and then do 1412 - * the binding. 1413 - */ 1414 - static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) 1415 - { 1416 - struct sched_param sp; 1417 - struct task_struct *t; 1418 - 1419 - if (!rcu_scheduler_fully_active || 1420 - per_cpu(rcu_cpu_kthread_task, cpu) != NULL) 1421 - return 0; 1422 - t = kthread_create_on_node(rcu_cpu_kthread, 1423 - (void *)(long)cpu, 1424 - cpu_to_node(cpu), 1425 - "rcuc/%d", cpu); 1426 - if (IS_ERR(t)) 1427 - return PTR_ERR(t); 1428 - if (cpu_online(cpu)) 1429 - kthread_bind(t, cpu); 1430 - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1431 - WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1432 - sp.sched_priority = RCU_KTHREAD_PRIO; 1433 - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1434 - per_cpu(rcu_cpu_kthread_task, cpu) = t; 1435 - wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ 1436 - return 0; 1437 - } 1438 - 1439 - /* 1440 - * Per-rcu_node kthread, which is in charge of waking up the per-CPU 1441 - * kthreads when needed. We ignore requests to wake up kthreads 1442 - * for offline CPUs, which is OK because force_quiescent_state() 1443 - * takes care of this case. 1444 - */ 1445 - static int rcu_node_kthread(void *arg) 1446 - { 1447 - int cpu; 1448 - unsigned long flags; 1449 - unsigned long mask; 1450 - struct rcu_node *rnp = (struct rcu_node *)arg; 1451 - struct sched_param sp; 1452 - struct task_struct *t; 1453 - 1454 - for (;;) { 1455 - rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1456 - rcu_wait(atomic_read(&rnp->wakemask) != 0); 1457 - rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1458 - raw_spin_lock_irqsave(&rnp->lock, flags); 1459 - mask = atomic_xchg(&rnp->wakemask, 0); 1460 - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1461 - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1462 - if ((mask & 0x1) == 0) 1463 - continue; 1464 - preempt_disable(); 1465 - t = per_cpu(rcu_cpu_kthread_task, cpu); 1466 - if (!cpu_online(cpu) || t == NULL) { 1467 - preempt_enable(); 1468 - continue; 1469 - } 1470 - per_cpu(rcu_cpu_has_work, cpu) = 1; 1471 - sp.sched_priority = RCU_KTHREAD_PRIO; 1472 - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1473 - preempt_enable(); 1474 - } 1475 - } 1476 - /* NOTREACHED */ 1477 - rnp->node_kthread_status = RCU_KTHREAD_STOPPED; 1478 - return 0; 1488 + *statusp = RCU_KTHREAD_YIELDING; 1489 + trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1490 + schedule_timeout_interruptible(2); 1491 + trace_rcu_utilization("End CPU kthread@rcu_yield"); 1492 + *statusp = RCU_KTHREAD_WAITING; 1479 1493 } 1480 1494 1481 1495 /* ··· 1384 1604 * no outgoing CPU. If there are no CPUs left in the affinity set, 1385 1605 * this function allows the kthread to execute on any CPU. 1386 1606 */ 1387 - static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1607 + static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1388 1608 { 1609 + struct task_struct *t = rnp->boost_kthread_task; 1610 + unsigned long mask = rnp->qsmaskinit; 1389 1611 cpumask_var_t cm; 1390 1612 int cpu; 1391 - unsigned long mask = rnp->qsmaskinit; 1392 1613 1393 - if (rnp->node_kthread_task == NULL) 1614 + if (!t) 1394 1615 return; 1395 - if (!alloc_cpumask_var(&cm, GFP_KERNEL)) 1616 + if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) 1396 1617 return; 1397 - cpumask_clear(cm); 1398 1618 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1399 1619 if ((mask & 0x1) && cpu != outgoingcpu) 1400 1620 cpumask_set_cpu(cpu, cm); ··· 1404 1624 cpumask_clear_cpu(cpu, cm); 1405 1625 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1406 1626 } 1407 - set_cpus_allowed_ptr(rnp->node_kthread_task, cm); 1408 - rcu_boost_kthread_setaffinity(rnp, cm); 1627 + set_cpus_allowed_ptr(t, cm); 1409 1628 free_cpumask_var(cm); 1410 1629 } 1411 1630 1412 - /* 1413 - * Spawn a per-rcu_node kthread, setting priority and affinity. 1414 - * Called during boot before online/offline can happen, or, if 1415 - * during runtime, with the main CPU-hotplug locks held. So only 1416 - * one of these can be executing at a time. 1417 - */ 1418 - static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, 1419 - struct rcu_node *rnp) 1420 - { 1421 - unsigned long flags; 1422 - int rnp_index = rnp - &rsp->node[0]; 1423 - struct sched_param sp; 1424 - struct task_struct *t; 1425 - 1426 - if (!rcu_scheduler_fully_active || 1427 - rnp->qsmaskinit == 0) 1428 - return 0; 1429 - if (rnp->node_kthread_task == NULL) { 1430 - t = kthread_create(rcu_node_kthread, (void *)rnp, 1431 - "rcun/%d", rnp_index); 1432 - if (IS_ERR(t)) 1433 - return PTR_ERR(t); 1434 - raw_spin_lock_irqsave(&rnp->lock, flags); 1435 - rnp->node_kthread_task = t; 1436 - raw_spin_unlock_irqrestore(&rnp->lock, flags); 1437 - sp.sched_priority = 99; 1438 - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1439 - wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1440 - } 1441 - return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); 1442 - } 1631 + static struct smp_hotplug_thread rcu_cpu_thread_spec = { 1632 + .store = &rcu_cpu_kthread_task, 1633 + .thread_should_run = rcu_cpu_kthread_should_run, 1634 + .thread_fn = rcu_cpu_kthread, 1635 + .thread_comm = "rcuc/%u", 1636 + .setup = rcu_cpu_kthread_setup, 1637 + .park = rcu_cpu_kthread_park, 1638 + }; 1443 1639 1444 1640 /* 1445 1641 * Spawn all kthreads -- called as soon as the scheduler is running. 1446 1642 */ 1447 1643 static int __init rcu_spawn_kthreads(void) 1448 1644 { 1449 - int cpu; 1450 1645 struct rcu_node *rnp; 1646 + int cpu; 1451 1647 1452 1648 rcu_scheduler_fully_active = 1; 1453 - for_each_possible_cpu(cpu) { 1649 + for_each_possible_cpu(cpu) 1454 1650 per_cpu(rcu_cpu_has_work, cpu) = 0; 1455 - if (cpu_online(cpu)) 1456 - (void)rcu_spawn_one_cpu_kthread(cpu); 1457 - } 1651 + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1458 1652 rnp = rcu_get_root(rcu_state); 1459 - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1653 + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1460 1654 if (NUM_RCU_NODES > 1) { 1461 1655 rcu_for_each_leaf_node(rcu_state, rnp) 1462 - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1656 + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1463 1657 } 1464 1658 return 0; 1465 1659 } ··· 1445 1691 struct rcu_node *rnp = rdp->mynode; 1446 1692 1447 1693 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1448 - if (rcu_scheduler_fully_active) { 1449 - (void)rcu_spawn_one_cpu_kthread(cpu); 1450 - if (rnp->node_kthread_task == NULL) 1451 - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1452 - } 1694 + if (rcu_scheduler_fully_active) 1695 + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1453 1696 } 1454 1697 1455 1698 #else /* #ifdef CONFIG_RCU_BOOST */ ··· 1470 1719 { 1471 1720 } 1472 1721 1473 - #ifdef CONFIG_HOTPLUG_CPU 1474 - 1475 - static void rcu_stop_cpu_kthread(int cpu) 1476 - { 1477 - } 1478 - 1479 - #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1480 - 1481 - static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1482 - { 1483 - } 1484 - 1485 - static void rcu_cpu_kthread_setrt(int cpu, int to_rt) 1722 + static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1486 1723 { 1487 1724 } 1488 1725
+1 -2
kernel/rcutree_trace.c
··· 107 107 rdp->nxttail[RCU_WAIT_TAIL]], 108 108 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); 109 109 #ifdef CONFIG_RCU_BOOST 110 - seq_printf(m, " kt=%d/%c/%d ktl=%x", 110 + seq_printf(m, " kt=%d/%c ktl=%x", 111 111 per_cpu(rcu_cpu_has_work, rdp->cpu), 112 112 convert_kthread_status(per_cpu(rcu_cpu_kthread_status, 113 113 rdp->cpu)), 114 - per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), 115 114 per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); 116 115 #endif /* #ifdef CONFIG_RCU_BOOST */ 117 116 seq_printf(m, " b=%ld", rdp->blimit);
+233
kernel/smpboot.c
··· 1 1 /* 2 2 * Common SMP CPU bringup/teardown functions 3 3 */ 4 + #include <linux/cpu.h> 4 5 #include <linux/err.h> 5 6 #include <linux/smp.h> 6 7 #include <linux/init.h> 8 + #include <linux/list.h> 9 + #include <linux/slab.h> 7 10 #include <linux/sched.h> 11 + #include <linux/export.h> 8 12 #include <linux/percpu.h> 13 + #include <linux/kthread.h> 14 + #include <linux/smpboot.h> 9 15 10 16 #include "smpboot.h" 17 + 18 + #ifdef CONFIG_SMP 11 19 12 20 #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD 13 21 /* ··· 73 65 } 74 66 } 75 67 #endif 68 + 69 + #endif /* #ifdef CONFIG_SMP */ 70 + 71 + static LIST_HEAD(hotplug_threads); 72 + static DEFINE_MUTEX(smpboot_threads_lock); 73 + 74 + struct smpboot_thread_data { 75 + unsigned int cpu; 76 + unsigned int status; 77 + struct smp_hotplug_thread *ht; 78 + }; 79 + 80 + enum { 81 + HP_THREAD_NONE = 0, 82 + HP_THREAD_ACTIVE, 83 + HP_THREAD_PARKED, 84 + }; 85 + 86 + /** 87 + * smpboot_thread_fn - percpu hotplug thread loop function 88 + * @data: thread data pointer 89 + * 90 + * Checks for thread stop and park conditions. Calls the necessary 91 + * setup, cleanup, park and unpark functions for the registered 92 + * thread. 93 + * 94 + * Returns 1 when the thread should exit, 0 otherwise. 95 + */ 96 + static int smpboot_thread_fn(void *data) 97 + { 98 + struct smpboot_thread_data *td = data; 99 + struct smp_hotplug_thread *ht = td->ht; 100 + 101 + while (1) { 102 + set_current_state(TASK_INTERRUPTIBLE); 103 + preempt_disable(); 104 + if (kthread_should_stop()) { 105 + set_current_state(TASK_RUNNING); 106 + preempt_enable(); 107 + if (ht->cleanup) 108 + ht->cleanup(td->cpu, cpu_online(td->cpu)); 109 + kfree(td); 110 + return 0; 111 + } 112 + 113 + if (kthread_should_park()) { 114 + __set_current_state(TASK_RUNNING); 115 + preempt_enable(); 116 + if (ht->park && td->status == HP_THREAD_ACTIVE) { 117 + BUG_ON(td->cpu != smp_processor_id()); 118 + ht->park(td->cpu); 119 + td->status = HP_THREAD_PARKED; 120 + } 121 + kthread_parkme(); 122 + /* We might have been woken for stop */ 123 + continue; 124 + } 125 + 126 + BUG_ON(td->cpu != smp_processor_id()); 127 + 128 + /* Check for state change setup */ 129 + switch (td->status) { 130 + case HP_THREAD_NONE: 131 + preempt_enable(); 132 + if (ht->setup) 133 + ht->setup(td->cpu); 134 + td->status = HP_THREAD_ACTIVE; 135 + preempt_disable(); 136 + break; 137 + case HP_THREAD_PARKED: 138 + preempt_enable(); 139 + if (ht->unpark) 140 + ht->unpark(td->cpu); 141 + td->status = HP_THREAD_ACTIVE; 142 + preempt_disable(); 143 + break; 144 + } 145 + 146 + if (!ht->thread_should_run(td->cpu)) { 147 + preempt_enable(); 148 + schedule(); 149 + } else { 150 + set_current_state(TASK_RUNNING); 151 + preempt_enable(); 152 + ht->thread_fn(td->cpu); 153 + } 154 + } 155 + } 156 + 157 + static int 158 + __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) 159 + { 160 + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); 161 + struct smpboot_thread_data *td; 162 + 163 + if (tsk) 164 + return 0; 165 + 166 + td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); 167 + if (!td) 168 + return -ENOMEM; 169 + td->cpu = cpu; 170 + td->ht = ht; 171 + 172 + tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, 173 + ht->thread_comm); 174 + if (IS_ERR(tsk)) { 175 + kfree(td); 176 + return PTR_ERR(tsk); 177 + } 178 + 179 + get_task_struct(tsk); 180 + *per_cpu_ptr(ht->store, cpu) = tsk; 181 + return 0; 182 + } 183 + 184 + int smpboot_create_threads(unsigned int cpu) 185 + { 186 + struct smp_hotplug_thread *cur; 187 + int ret = 0; 188 + 189 + mutex_lock(&smpboot_threads_lock); 190 + list_for_each_entry(cur, &hotplug_threads, list) { 191 + ret = __smpboot_create_thread(cur, cpu); 192 + if (ret) 193 + break; 194 + } 195 + mutex_unlock(&smpboot_threads_lock); 196 + return ret; 197 + } 198 + 199 + static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu) 200 + { 201 + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); 202 + 203 + kthread_unpark(tsk); 204 + } 205 + 206 + void smpboot_unpark_threads(unsigned int cpu) 207 + { 208 + struct smp_hotplug_thread *cur; 209 + 210 + mutex_lock(&smpboot_threads_lock); 211 + list_for_each_entry(cur, &hotplug_threads, list) 212 + smpboot_unpark_thread(cur, cpu); 213 + mutex_unlock(&smpboot_threads_lock); 214 + } 215 + 216 + static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) 217 + { 218 + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); 219 + 220 + if (tsk) 221 + kthread_park(tsk); 222 + } 223 + 224 + void smpboot_park_threads(unsigned int cpu) 225 + { 226 + struct smp_hotplug_thread *cur; 227 + 228 + mutex_lock(&smpboot_threads_lock); 229 + list_for_each_entry_reverse(cur, &hotplug_threads, list) 230 + smpboot_park_thread(cur, cpu); 231 + mutex_unlock(&smpboot_threads_lock); 232 + } 233 + 234 + static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) 235 + { 236 + unsigned int cpu; 237 + 238 + /* We need to destroy also the parked threads of offline cpus */ 239 + for_each_possible_cpu(cpu) { 240 + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); 241 + 242 + if (tsk) { 243 + kthread_stop(tsk); 244 + put_task_struct(tsk); 245 + *per_cpu_ptr(ht->store, cpu) = NULL; 246 + } 247 + } 248 + } 249 + 250 + /** 251 + * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug 252 + * @plug_thread: Hotplug thread descriptor 253 + * 254 + * Creates and starts the threads on all online cpus. 255 + */ 256 + int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) 257 + { 258 + unsigned int cpu; 259 + int ret = 0; 260 + 261 + mutex_lock(&smpboot_threads_lock); 262 + for_each_online_cpu(cpu) { 263 + ret = __smpboot_create_thread(plug_thread, cpu); 264 + if (ret) { 265 + smpboot_destroy_threads(plug_thread); 266 + goto out; 267 + } 268 + smpboot_unpark_thread(plug_thread, cpu); 269 + } 270 + list_add(&plug_thread->list, &hotplug_threads); 271 + out: 272 + mutex_unlock(&smpboot_threads_lock); 273 + return ret; 274 + } 275 + EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); 276 + 277 + /** 278 + * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug 279 + * @plug_thread: Hotplug thread descriptor 280 + * 281 + * Stops all threads on all possible cpus. 282 + */ 283 + void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) 284 + { 285 + get_online_cpus(); 286 + mutex_lock(&smpboot_threads_lock); 287 + list_del(&plug_thread->list); 288 + smpboot_destroy_threads(plug_thread); 289 + mutex_unlock(&smpboot_threads_lock); 290 + put_online_cpus(); 291 + } 292 + EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
+4
kernel/smpboot.h
··· 13 13 static inline void idle_threads_init(void) { } 14 14 #endif 15 15 16 + int smpboot_create_threads(unsigned int cpu); 17 + void smpboot_park_threads(unsigned int cpu); 18 + void smpboot_unpark_threads(unsigned int cpu); 19 + 16 20 #endif
+27 -84
kernel/softirq.c
··· 23 23 #include <linux/rcupdate.h> 24 24 #include <linux/ftrace.h> 25 25 #include <linux/smp.h> 26 + #include <linux/smpboot.h> 26 27 #include <linux/tick.h> 27 28 28 29 #define CREATE_TRACE_POINTS ··· 743 742 open_softirq(HI_SOFTIRQ, tasklet_hi_action); 744 743 } 745 744 746 - static int run_ksoftirqd(void * __bind_cpu) 745 + static int ksoftirqd_should_run(unsigned int cpu) 747 746 { 748 - set_current_state(TASK_INTERRUPTIBLE); 747 + return local_softirq_pending(); 748 + } 749 749 750 - while (!kthread_should_stop()) { 751 - preempt_disable(); 752 - if (!local_softirq_pending()) { 753 - schedule_preempt_disabled(); 754 - } 755 - 756 - __set_current_state(TASK_RUNNING); 757 - 758 - while (local_softirq_pending()) { 759 - /* Preempt disable stops cpu going offline. 760 - If already offline, we'll be on wrong CPU: 761 - don't process */ 762 - if (cpu_is_offline((long)__bind_cpu)) 763 - goto wait_to_die; 764 - local_irq_disable(); 765 - if (local_softirq_pending()) 766 - __do_softirq(); 767 - local_irq_enable(); 768 - sched_preempt_enable_no_resched(); 769 - cond_resched(); 770 - preempt_disable(); 771 - rcu_note_context_switch((long)__bind_cpu); 772 - } 773 - preempt_enable(); 774 - set_current_state(TASK_INTERRUPTIBLE); 750 + static void run_ksoftirqd(unsigned int cpu) 751 + { 752 + local_irq_disable(); 753 + if (local_softirq_pending()) { 754 + __do_softirq(); 755 + rcu_note_context_switch(cpu); 756 + local_irq_enable(); 757 + cond_resched(); 758 + return; 775 759 } 776 - __set_current_state(TASK_RUNNING); 777 - return 0; 778 - 779 - wait_to_die: 780 - preempt_enable(); 781 - /* Wait for kthread_stop */ 782 - set_current_state(TASK_INTERRUPTIBLE); 783 - while (!kthread_should_stop()) { 784 - schedule(); 785 - set_current_state(TASK_INTERRUPTIBLE); 786 - } 787 - __set_current_state(TASK_RUNNING); 788 - return 0; 760 + local_irq_enable(); 789 761 } 790 762 791 763 #ifdef CONFIG_HOTPLUG_CPU ··· 824 850 unsigned long action, 825 851 void *hcpu) 826 852 { 827 - int hotcpu = (unsigned long)hcpu; 828 - struct task_struct *p; 829 - 830 853 switch (action) { 831 - case CPU_UP_PREPARE: 832 - case CPU_UP_PREPARE_FROZEN: 833 - p = kthread_create_on_node(run_ksoftirqd, 834 - hcpu, 835 - cpu_to_node(hotcpu), 836 - "ksoftirqd/%d", hotcpu); 837 - if (IS_ERR(p)) { 838 - printk("ksoftirqd for %i failed\n", hotcpu); 839 - return notifier_from_errno(PTR_ERR(p)); 840 - } 841 - kthread_bind(p, hotcpu); 842 - per_cpu(ksoftirqd, hotcpu) = p; 843 - break; 844 - case CPU_ONLINE: 845 - case CPU_ONLINE_FROZEN: 846 - wake_up_process(per_cpu(ksoftirqd, hotcpu)); 847 - break; 848 854 #ifdef CONFIG_HOTPLUG_CPU 849 - case CPU_UP_CANCELED: 850 - case CPU_UP_CANCELED_FROZEN: 851 - if (!per_cpu(ksoftirqd, hotcpu)) 852 - break; 853 - /* Unbind so it can run. Fall thru. */ 854 - kthread_bind(per_cpu(ksoftirqd, hotcpu), 855 - cpumask_any(cpu_online_mask)); 856 855 case CPU_DEAD: 857 - case CPU_DEAD_FROZEN: { 858 - static const struct sched_param param = { 859 - .sched_priority = MAX_RT_PRIO-1 860 - }; 861 - 862 - p = per_cpu(ksoftirqd, hotcpu); 863 - per_cpu(ksoftirqd, hotcpu) = NULL; 864 - sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 865 - kthread_stop(p); 866 - takeover_tasklets(hotcpu); 856 + case CPU_DEAD_FROZEN: 857 + takeover_tasklets((unsigned long)hcpu); 867 858 break; 868 - } 869 859 #endif /* CONFIG_HOTPLUG_CPU */ 870 - } 860 + } 871 861 return NOTIFY_OK; 872 862 } 873 863 ··· 839 901 .notifier_call = cpu_callback 840 902 }; 841 903 904 + static struct smp_hotplug_thread softirq_threads = { 905 + .store = &ksoftirqd, 906 + .thread_should_run = ksoftirqd_should_run, 907 + .thread_fn = run_ksoftirqd, 908 + .thread_comm = "ksoftirqd/%u", 909 + }; 910 + 842 911 static __init int spawn_ksoftirqd(void) 843 912 { 844 - void *cpu = (void *)(long)smp_processor_id(); 845 - int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 846 - 847 - BUG_ON(err != NOTIFY_OK); 848 - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 849 913 register_cpu_notifier(&cpu_nfb); 914 + 915 + BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); 916 + 850 917 return 0; 851 918 } 852 919 early_initcall(spawn_ksoftirqd);
+92 -177
kernel/watchdog.c
··· 22 22 #include <linux/notifier.h> 23 23 #include <linux/module.h> 24 24 #include <linux/sysctl.h> 25 + #include <linux/smpboot.h> 25 26 26 27 #include <asm/irq_regs.h> 27 28 #include <linux/kvm_para.h> ··· 30 29 31 30 int watchdog_enabled = 1; 32 31 int __read_mostly watchdog_thresh = 10; 32 + static int __read_mostly watchdog_disabled; 33 33 34 34 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 35 35 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); 36 36 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); 37 37 static DEFINE_PER_CPU(bool, softlockup_touch_sync); 38 38 static DEFINE_PER_CPU(bool, soft_watchdog_warn); 39 + static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); 40 + static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); 39 41 #ifdef CONFIG_HARDLOCKUP_DETECTOR 40 42 static DEFINE_PER_CPU(bool, hard_watchdog_warn); 41 43 static DEFINE_PER_CPU(bool, watchdog_nmi_touch); 42 - static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); 43 44 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 44 45 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 45 46 #endif ··· 251 248 __this_cpu_write(hard_watchdog_warn, false); 252 249 return; 253 250 } 251 + #endif /* CONFIG_HARDLOCKUP_DETECTOR */ 252 + 254 253 static void watchdog_interrupt_count(void) 255 254 { 256 255 __this_cpu_inc(hrtimer_interrupts); 257 256 } 258 - #else 259 - static inline void watchdog_interrupt_count(void) { return; } 260 - #endif /* CONFIG_HARDLOCKUP_DETECTOR */ 257 + 258 + static int watchdog_nmi_enable(unsigned int cpu); 259 + static void watchdog_nmi_disable(unsigned int cpu); 261 260 262 261 /* watchdog kicker functions */ 263 262 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) ··· 332 327 return HRTIMER_RESTART; 333 328 } 334 329 335 - 336 - /* 337 - * The watchdog thread - touches the timestamp. 338 - */ 339 - static int watchdog(void *unused) 330 + static void watchdog_set_prio(unsigned int policy, unsigned int prio) 340 331 { 341 - struct sched_param param = { .sched_priority = 0 }; 332 + struct sched_param param = { .sched_priority = prio }; 333 + 334 + sched_setscheduler(current, policy, &param); 335 + } 336 + 337 + static void watchdog_enable(unsigned int cpu) 338 + { 342 339 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 343 340 344 - /* initialize timestamp */ 345 - __touch_watchdog(); 341 + if (!watchdog_enabled) { 342 + kthread_park(current); 343 + return; 344 + } 345 + 346 + /* Enable the perf event */ 347 + watchdog_nmi_enable(cpu); 346 348 347 349 /* kick off the timer for the hardlockup detector */ 350 + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 351 + hrtimer->function = watchdog_timer_fn; 352 + 348 353 /* done here because hrtimer_start can only pin to smp_processor_id() */ 349 354 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), 350 355 HRTIMER_MODE_REL_PINNED); 351 356 352 - set_current_state(TASK_INTERRUPTIBLE); 353 - /* 354 - * Run briefly (kicked by the hrtimer callback function) once every 355 - * get_sample_period() seconds (4 seconds by default) to reset the 356 - * softlockup timestamp. If this gets delayed for more than 357 - * 2*watchdog_thresh seconds then the debug-printout triggers in 358 - * watchdog_timer_fn(). 359 - */ 360 - while (!kthread_should_stop()) { 361 - __touch_watchdog(); 362 - schedule(); 363 - 364 - if (kthread_should_stop()) 365 - break; 366 - 367 - set_current_state(TASK_INTERRUPTIBLE); 368 - } 369 - /* 370 - * Drop the policy/priority elevation during thread exit to avoid a 371 - * scheduling latency spike. 372 - */ 373 - __set_current_state(TASK_RUNNING); 374 - sched_setscheduler(current, SCHED_NORMAL, &param); 375 - return 0; 357 + /* initialize timestamp */ 358 + watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); 359 + __touch_watchdog(); 376 360 } 377 361 362 + static void watchdog_disable(unsigned int cpu) 363 + { 364 + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 365 + 366 + watchdog_set_prio(SCHED_NORMAL, 0); 367 + hrtimer_cancel(hrtimer); 368 + /* disable the perf event */ 369 + watchdog_nmi_disable(cpu); 370 + } 371 + 372 + static int watchdog_should_run(unsigned int cpu) 373 + { 374 + return __this_cpu_read(hrtimer_interrupts) != 375 + __this_cpu_read(soft_lockup_hrtimer_cnt); 376 + } 377 + 378 + /* 379 + * The watchdog thread function - touches the timestamp. 380 + * 381 + * It only runs once every get_sample_period() seconds (4 seconds by 382 + * default) to reset the softlockup timestamp. If this gets delayed 383 + * for more than 2*watchdog_thresh seconds then the debug-printout 384 + * triggers in watchdog_timer_fn(). 385 + */ 386 + static void watchdog(unsigned int cpu) 387 + { 388 + __this_cpu_write(soft_lockup_hrtimer_cnt, 389 + __this_cpu_read(hrtimer_interrupts)); 390 + __touch_watchdog(); 391 + } 378 392 379 393 #ifdef CONFIG_HARDLOCKUP_DETECTOR 380 394 /* ··· 403 379 */ 404 380 static unsigned long cpu0_err; 405 381 406 - static int watchdog_nmi_enable(int cpu) 382 + static int watchdog_nmi_enable(unsigned int cpu) 407 383 { 408 384 struct perf_event_attr *wd_attr; 409 385 struct perf_event *event = per_cpu(watchdog_ev, cpu); ··· 457 433 return 0; 458 434 } 459 435 460 - static void watchdog_nmi_disable(int cpu) 436 + static void watchdog_nmi_disable(unsigned int cpu) 461 437 { 462 438 struct perf_event *event = per_cpu(watchdog_ev, cpu); 463 439 ··· 471 447 return; 472 448 } 473 449 #else 474 - static int watchdog_nmi_enable(int cpu) { return 0; } 475 - static void watchdog_nmi_disable(int cpu) { return; } 450 + static int watchdog_nmi_enable(unsigned int cpu) { return 0; } 451 + static void watchdog_nmi_disable(unsigned int cpu) { return; } 476 452 #endif /* CONFIG_HARDLOCKUP_DETECTOR */ 477 453 478 454 /* prepare/enable/disable routines */ 479 - static void watchdog_prepare_cpu(int cpu) 480 - { 481 - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); 482 - 483 - WARN_ON(per_cpu(softlockup_watchdog, cpu)); 484 - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 485 - hrtimer->function = watchdog_timer_fn; 486 - } 487 - 488 - static int watchdog_enable(int cpu) 489 - { 490 - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 491 - int err = 0; 492 - 493 - /* enable the perf event */ 494 - err = watchdog_nmi_enable(cpu); 495 - 496 - /* Regardless of err above, fall through and start softlockup */ 497 - 498 - /* create the watchdog thread */ 499 - if (!p) { 500 - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 501 - p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); 502 - if (IS_ERR(p)) { 503 - pr_err("softlockup watchdog for %i failed\n", cpu); 504 - if (!err) { 505 - /* if hardlockup hasn't already set this */ 506 - err = PTR_ERR(p); 507 - /* and disable the perf event */ 508 - watchdog_nmi_disable(cpu); 509 - } 510 - goto out; 511 - } 512 - sched_setscheduler(p, SCHED_FIFO, &param); 513 - kthread_bind(p, cpu); 514 - per_cpu(watchdog_touch_ts, cpu) = 0; 515 - per_cpu(softlockup_watchdog, cpu) = p; 516 - wake_up_process(p); 517 - } 518 - 519 - out: 520 - return err; 521 - } 522 - 523 - static void watchdog_disable(int cpu) 524 - { 525 - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 526 - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); 527 - 528 - /* 529 - * cancel the timer first to stop incrementing the stats 530 - * and waking up the kthread 531 - */ 532 - hrtimer_cancel(hrtimer); 533 - 534 - /* disable the perf event */ 535 - watchdog_nmi_disable(cpu); 536 - 537 - /* stop the watchdog thread */ 538 - if (p) { 539 - per_cpu(softlockup_watchdog, cpu) = NULL; 540 - kthread_stop(p); 541 - } 542 - } 543 - 544 455 /* sysctl functions */ 545 456 #ifdef CONFIG_SYSCTL 546 457 static void watchdog_enable_all_cpus(void) 547 458 { 548 - int cpu; 459 + unsigned int cpu; 549 460 550 - watchdog_enabled = 0; 551 - 552 - for_each_online_cpu(cpu) 553 - if (!watchdog_enable(cpu)) 554 - /* if any cpu succeeds, watchdog is considered 555 - enabled for the system */ 556 - watchdog_enabled = 1; 557 - 558 - if (!watchdog_enabled) 559 - pr_err("failed to be enabled on some cpus\n"); 560 - 461 + if (watchdog_disabled) { 462 + watchdog_disabled = 0; 463 + for_each_online_cpu(cpu) 464 + kthread_unpark(per_cpu(softlockup_watchdog, cpu)); 465 + } 561 466 } 562 467 563 468 static void watchdog_disable_all_cpus(void) 564 469 { 565 - int cpu; 470 + unsigned int cpu; 566 471 567 - for_each_online_cpu(cpu) 568 - watchdog_disable(cpu); 569 - 570 - /* if all watchdogs are disabled, then they are disabled for the system */ 571 - watchdog_enabled = 0; 472 + if (!watchdog_disabled) { 473 + watchdog_disabled = 1; 474 + for_each_online_cpu(cpu) 475 + kthread_park(per_cpu(softlockup_watchdog, cpu)); 476 + } 572 477 } 573 - 574 478 575 479 /* 576 480 * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh ··· 509 557 { 510 558 int ret; 511 559 560 + if (watchdog_disabled < 0) 561 + return -ENODEV; 562 + 512 563 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 513 564 if (ret || !write) 514 - goto out; 565 + return ret; 515 566 516 567 if (watchdog_enabled && watchdog_thresh) 517 568 watchdog_enable_all_cpus(); 518 569 else 519 570 watchdog_disable_all_cpus(); 520 571 521 - out: 522 572 return ret; 523 573 } 524 574 #endif /* CONFIG_SYSCTL */ 525 575 526 - 527 - /* 528 - * Create/destroy watchdog threads as CPUs come and go: 529 - */ 530 - static int __cpuinit 531 - cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 532 - { 533 - int hotcpu = (unsigned long)hcpu; 534 - 535 - switch (action) { 536 - case CPU_UP_PREPARE: 537 - case CPU_UP_PREPARE_FROZEN: 538 - watchdog_prepare_cpu(hotcpu); 539 - break; 540 - case CPU_ONLINE: 541 - case CPU_ONLINE_FROZEN: 542 - if (watchdog_enabled) 543 - watchdog_enable(hotcpu); 544 - break; 545 - #ifdef CONFIG_HOTPLUG_CPU 546 - case CPU_UP_CANCELED: 547 - case CPU_UP_CANCELED_FROZEN: 548 - watchdog_disable(hotcpu); 549 - break; 550 - case CPU_DEAD: 551 - case CPU_DEAD_FROZEN: 552 - watchdog_disable(hotcpu); 553 - break; 554 - #endif /* CONFIG_HOTPLUG_CPU */ 555 - } 556 - 557 - /* 558 - * hardlockup and softlockup are not important enough 559 - * to block cpu bring up. Just always succeed and 560 - * rely on printk output to flag problems. 561 - */ 562 - return NOTIFY_OK; 563 - } 564 - 565 - static struct notifier_block __cpuinitdata cpu_nfb = { 566 - .notifier_call = cpu_callback 576 + static struct smp_hotplug_thread watchdog_threads = { 577 + .store = &softlockup_watchdog, 578 + .thread_should_run = watchdog_should_run, 579 + .thread_fn = watchdog, 580 + .thread_comm = "watchdog/%u", 581 + .setup = watchdog_enable, 582 + .park = watchdog_disable, 583 + .unpark = watchdog_enable, 567 584 }; 568 585 569 586 void __init lockup_detector_init(void) 570 587 { 571 - void *cpu = (void *)(long)smp_processor_id(); 572 - int err; 573 - 574 - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 575 - WARN_ON(notifier_to_errno(err)); 576 - 577 - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 578 - register_cpu_notifier(&cpu_nfb); 579 - 580 - return; 588 + if (smpboot_register_percpu_thread(&watchdog_threads)) { 589 + pr_err("Failed to create watchdog threads, disabled\n"); 590 + watchdog_disabled = -ENODEV; 591 + } 581 592 }