Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'bigrt.2012.09.23a', 'doctorture.2012.09.23a', 'fixes.2012.09.23a', 'hotplug.2012.09.23a' and 'idlechop.2012.09.23a' into HEAD

bigrt.2012.09.23a contains additional commits to reduce scheduling latency
from RCU on huge systems (many hundrends or thousands of CPUs).

doctorture.2012.09.23a contains documentation changes and rcutorture fixes.

fixes.2012.09.23a contains miscellaneous fixes.

hotplug.2012.09.23a contains CPU-hotplug-related changes.

idle.2012.09.23a fixes architectures for which RCU no longer considered
the idle loop to be a quiescent state due to earlier
adaptive-dynticks changes. Affected architectures are alpha,
cris, frv, h8300, m32r, m68k, mn10300, parisc, score, xtensa,
and ia64.

+316 -261
+6
Documentation/RCU/checklist.txt
··· 310 310 code under the influence of preempt_disable(), you instead 311 311 need to use synchronize_irq() or synchronize_sched(). 312 312 313 + This same limitation also applies to synchronize_rcu_bh() 314 + and synchronize_srcu(), as well as to the asynchronous and 315 + expedited forms of the three primitives, namely call_rcu(), 316 + call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(), 317 + synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited(). 318 + 313 319 12. Any lock acquired by an RCU callback must be acquired elsewhere 314 320 with softirq disabled, e.g., via spin_lock_irqsave(), 315 321 spin_lock_bh(), etc. Failing to disable irq on a given
+8 -8
Documentation/RCU/stallwarn.txt
··· 99 99 printed: 100 100 101 101 INFO: rcu_preempt detected stall on CPU 102 - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer=-1 102 + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending 103 103 (t=65000 jiffies) 104 104 105 105 The "(64628 ticks this GP)" indicates that this CPU has taken more ··· 116 116 be a small positive number if in the idle loop and a very large positive 117 117 number (as shown above) otherwise. 118 118 119 - For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the 120 - CPU is not in the process of trying to force itself into dyntick-idle 121 - state, the "." indicates that the CPU has not given up forcing RCU 122 - into dyntick-idle mode (it would be "H" otherwise), and the "timer=-1" 123 - indicates that the CPU has not recented forced RCU into dyntick-idle 124 - mode (it would otherwise indicate the number of microseconds remaining 125 - in this forced state). 119 + For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is 120 + not in the process of trying to force itself into dyntick-idle state, the 121 + "." indicates that the CPU has not given up forcing RCU into dyntick-idle 122 + mode (it would be "H" otherwise), and the "timer not pending" indicates 123 + that the CPU has not recently forced RCU into dyntick-idle mode (it 124 + would otherwise indicate the number of microseconds remaining in this 125 + forced state). 126 126 127 127 128 128 Multiple Warnings From One Stall
+7 -2
Documentation/RCU/whatisRCU.txt
··· 873 873 and code segments with preemption disabled (whether 874 874 via preempt_disable(), local_irq_save(), local_bh_disable(), 875 875 or some other mechanism) as if they were explicit RCU readers? 876 - If so, you need RCU-sched. 876 + If so, RCU-sched is the only choice that will work for you. 877 877 878 878 e. Do you need RCU grace periods to complete even in the face 879 879 of softirq monopolization of one or more of the CPUs? For ··· 884 884 RCU, but inappropriate for other synchronization mechanisms? 885 885 If so, consider SLAB_DESTROY_BY_RCU. But please be careful! 886 886 887 - g. Otherwise, use RCU. 887 + g. Do you need read-side critical sections that are respected 888 + even though they are in the middle of the idle loop, during 889 + user-mode execution, or on an offlined CPU? If so, SRCU is the 890 + only choice that will work for you. 891 + 892 + h. Otherwise, use RCU. 888 893 889 894 Of course, this all assumes that you have determined that RCU is in fact 890 895 the right tool for your job.
+5 -1
arch/alpha/kernel/process.c
··· 28 28 #include <linux/tty.h> 29 29 #include <linux/console.h> 30 30 #include <linux/slab.h> 31 + #include <linux/rcupdate.h> 31 32 32 33 #include <asm/reg.h> 33 34 #include <asm/uaccess.h> ··· 55 54 /* FIXME -- EV6 and LCA45 know how to power down 56 55 the CPU. */ 57 56 57 + rcu_idle_enter(); 58 58 while (!need_resched()) 59 59 cpu_relax(); 60 - schedule(); 60 + 61 + rcu_idle_exit(); 62 + schedule_preempt_disabled(); 61 63 } 62 64 } 63 65
+1
arch/alpha/kernel/smp.c
··· 166 166 DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", 167 167 cpuid, current, current->active_mm)); 168 168 169 + preempt_disable(); 169 170 /* Do nothing. */ 170 171 cpu_idle(); 171 172 }
+3
arch/cris/kernel/process.c
··· 25 25 #include <linux/elfcore.h> 26 26 #include <linux/mqueue.h> 27 27 #include <linux/reboot.h> 28 + #include <linux/rcupdate.h> 28 29 29 30 //#define DEBUG 30 31 ··· 75 74 { 76 75 /* endless idle loop with no priority at all */ 77 76 while (1) { 77 + rcu_idle_enter(); 78 78 while (!need_resched()) { 79 79 void (*idle)(void); 80 80 /* ··· 88 86 idle = default_idle; 89 87 idle(); 90 88 } 89 + rcu_idle_exit(); 91 90 schedule_preempt_disabled(); 92 91 } 93 92 }
+3
arch/frv/kernel/process.c
··· 25 25 #include <linux/reboot.h> 26 26 #include <linux/interrupt.h> 27 27 #include <linux/pagemap.h> 28 + #include <linux/rcupdate.h> 28 29 29 30 #include <asm/asm-offsets.h> 30 31 #include <asm/uaccess.h> ··· 70 69 { 71 70 /* endless idle loop with no priority at all */ 72 71 while (1) { 72 + rcu_idle_enter(); 73 73 while (!need_resched()) { 74 74 check_pgt_cache(); 75 75 76 76 if (!frv_dma_inprogress && idle) 77 77 idle(); 78 78 } 79 + rcu_idle_exit(); 79 80 80 81 schedule_preempt_disabled(); 81 82 }
+3
arch/h8300/kernel/process.c
··· 36 36 #include <linux/reboot.h> 37 37 #include <linux/fs.h> 38 38 #include <linux/slab.h> 39 + #include <linux/rcupdate.h> 39 40 40 41 #include <asm/uaccess.h> 41 42 #include <asm/traps.h> ··· 79 78 void cpu_idle(void) 80 79 { 81 80 while (1) { 81 + rcu_idle_enter(); 82 82 while (!need_resched()) 83 83 idle(); 84 + rcu_idle_exit(); 84 85 schedule_preempt_disabled(); 85 86 } 86 87 }
+3
arch/ia64/kernel/process.c
··· 29 29 #include <linux/kdebug.h> 30 30 #include <linux/utsname.h> 31 31 #include <linux/tracehook.h> 32 + #include <linux/rcupdate.h> 32 33 33 34 #include <asm/cpu.h> 34 35 #include <asm/delay.h> ··· 280 279 281 280 /* endless idle loop with no priority at all */ 282 281 while (1) { 282 + rcu_idle_enter(); 283 283 if (can_do_pal_halt) { 284 284 current_thread_info()->status &= ~TS_POLLING; 285 285 /* ··· 311 309 normal_xtp(); 312 310 #endif 313 311 } 312 + rcu_idle_exit(); 314 313 schedule_preempt_disabled(); 315 314 check_pgt_cache(); 316 315 if (cpu_is_offline(cpu))
+3
arch/m32r/kernel/process.c
··· 26 26 #include <linux/ptrace.h> 27 27 #include <linux/unistd.h> 28 28 #include <linux/hardirq.h> 29 + #include <linux/rcupdate.h> 29 30 30 31 #include <asm/io.h> 31 32 #include <asm/uaccess.h> ··· 83 82 { 84 83 /* endless idle loop with no priority at all */ 85 84 while (1) { 85 + rcu_idle_enter(); 86 86 while (!need_resched()) { 87 87 void (*idle)(void) = pm_idle; 88 88 ··· 92 90 93 91 idle(); 94 92 } 93 + rcu_idle_exit(); 95 94 schedule_preempt_disabled(); 96 95 } 97 96 }
+3
arch/m68k/kernel/process.c
··· 25 25 #include <linux/reboot.h> 26 26 #include <linux/init_task.h> 27 27 #include <linux/mqueue.h> 28 + #include <linux/rcupdate.h> 28 29 29 30 #include <asm/uaccess.h> 30 31 #include <asm/traps.h> ··· 76 75 { 77 76 /* endless idle loop with no priority at all */ 78 77 while (1) { 78 + rcu_idle_enter(); 79 79 while (!need_resched()) 80 80 idle(); 81 + rcu_idle_exit(); 81 82 schedule_preempt_disabled(); 82 83 } 83 84 }
+3
arch/mn10300/kernel/process.c
··· 25 25 #include <linux/err.h> 26 26 #include <linux/fs.h> 27 27 #include <linux/slab.h> 28 + #include <linux/rcupdate.h> 28 29 #include <asm/uaccess.h> 29 30 #include <asm/pgtable.h> 30 31 #include <asm/io.h> ··· 108 107 { 109 108 /* endless idle loop with no priority at all */ 110 109 for (;;) { 110 + rcu_idle_enter(); 111 111 while (!need_resched()) { 112 112 void (*idle)(void); 113 113 ··· 123 121 } 124 122 idle(); 125 123 } 124 + rcu_idle_exit(); 126 125 127 126 schedule_preempt_disabled(); 128 127 }
+3
arch/parisc/kernel/process.c
··· 48 48 #include <linux/unistd.h> 49 49 #include <linux/kallsyms.h> 50 50 #include <linux/uaccess.h> 51 + #include <linux/rcupdate.h> 51 52 52 53 #include <asm/io.h> 53 54 #include <asm/asm-offsets.h> ··· 70 69 71 70 /* endless idle loop with no priority at all */ 72 71 while (1) { 72 + rcu_idle_enter(); 73 73 while (!need_resched()) 74 74 barrier(); 75 + rcu_idle_exit(); 75 76 schedule_preempt_disabled(); 76 77 check_pgt_cache(); 77 78 }
+3 -1
arch/score/kernel/process.c
··· 27 27 #include <linux/reboot.h> 28 28 #include <linux/elfcore.h> 29 29 #include <linux/pm.h> 30 + #include <linux/rcupdate.h> 30 31 31 32 void (*pm_power_off)(void); 32 33 EXPORT_SYMBOL(pm_power_off); ··· 51 50 { 52 51 /* endless idle loop with no priority at all */ 53 52 while (1) { 53 + rcu_idle_enter(); 54 54 while (!need_resched()) 55 55 barrier(); 56 - 56 + rcu_idle_exit(); 57 57 schedule_preempt_disabled(); 58 58 } 59 59 }
+5
arch/x86/kernel/cpuid.c
··· 199 199 goto out_chrdev; 200 200 } 201 201 cpuid_class->devnode = cpuid_devnode; 202 + get_online_cpus(); 202 203 for_each_online_cpu(i) { 203 204 err = cpuid_device_create(i); 204 205 if (err != 0) 205 206 goto out_class; 206 207 } 207 208 register_hotcpu_notifier(&cpuid_class_cpu_notifier); 209 + put_online_cpus(); 208 210 209 211 err = 0; 210 212 goto out; ··· 216 214 for_each_online_cpu(i) { 217 215 cpuid_device_destroy(i); 218 216 } 217 + put_online_cpus(); 219 218 class_destroy(cpuid_class); 220 219 out_chrdev: 221 220 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); ··· 228 225 { 229 226 int cpu = 0; 230 227 228 + get_online_cpus(); 231 229 for_each_online_cpu(cpu) 232 230 cpuid_device_destroy(cpu); 233 231 class_destroy(cpuid_class); 234 232 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); 235 233 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); 234 + put_online_cpus(); 236 235 } 237 236 238 237 module_init(cpuid_init);
+5
arch/x86/kernel/msr.c
··· 257 257 goto out_chrdev; 258 258 } 259 259 msr_class->devnode = msr_devnode; 260 + get_online_cpus(); 260 261 for_each_online_cpu(i) { 261 262 err = msr_device_create(i); 262 263 if (err != 0) 263 264 goto out_class; 264 265 } 265 266 register_hotcpu_notifier(&msr_class_cpu_notifier); 267 + put_online_cpus(); 266 268 267 269 err = 0; 268 270 goto out; ··· 273 271 i = 0; 274 272 for_each_online_cpu(i) 275 273 msr_device_destroy(i); 274 + put_online_cpus(); 276 275 class_destroy(msr_class); 277 276 out_chrdev: 278 277 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); ··· 284 281 static void __exit msr_exit(void) 285 282 { 286 283 int cpu = 0; 284 + get_online_cpus(); 287 285 for_each_online_cpu(cpu) 288 286 msr_device_destroy(cpu); 289 287 class_destroy(msr_class); 290 288 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); 291 289 unregister_hotcpu_notifier(&msr_class_cpu_notifier); 290 + put_online_cpus(); 292 291 } 293 292 294 293 module_init(msr_init);
+3
arch/xtensa/kernel/process.c
··· 31 31 #include <linux/mqueue.h> 32 32 #include <linux/fs.h> 33 33 #include <linux/slab.h> 34 + #include <linux/rcupdate.h> 34 35 35 36 #include <asm/pgtable.h> 36 37 #include <asm/uaccess.h> ··· 111 110 112 111 /* endless idle loop with no priority at all */ 113 112 while (1) { 113 + rcu_idle_enter(); 114 114 while (!need_resched()) 115 115 platform_idle(); 116 + rcu_idle_exit(); 116 117 schedule_preempt_disabled(); 117 118 } 118 119 }
+2
include/linux/interrupt.h
··· 430 430 NR_SOFTIRQS 431 431 }; 432 432 433 + #define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) 434 + 433 435 /* map softirq index to softirq name. update 'softirq_to_name' in 434 436 * kernel/softirq.c when adding a new softirq. 435 437 */
+2 -4
include/linux/rcupdate.h
··· 210 210 * to nest RCU_NONIDLE() wrappers, but the nesting level is currently 211 211 * quite limited. If deeper nesting is required, it will be necessary 212 212 * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. 213 - * 214 - * This macro may be used from process-level code only. 215 213 */ 216 214 #define RCU_NONIDLE(a) \ 217 215 do { \ 218 - rcu_idle_exit(); \ 216 + rcu_irq_enter(); \ 219 217 do { a; } while (0); \ 220 - rcu_idle_enter(); \ 218 + rcu_irq_exit(); \ 221 219 } while (0) 222 220 223 221 /*
+4
kernel/rcupdate.c
··· 45 45 #include <linux/mutex.h> 46 46 #include <linux/export.h> 47 47 #include <linux/hardirq.h> 48 + #include <linux/delay.h> 48 49 49 50 #define CREATE_TRACE_POINTS 50 51 #include <trace/events/rcu.h> ··· 82 81 } else { 83 82 barrier(); /* critical section before exit code. */ 84 83 t->rcu_read_lock_nesting = INT_MIN; 84 + #ifdef CONFIG_PROVE_RCU_DELAY 85 + udelay(10); /* Make preemption more probable. */ 86 + #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ 85 87 barrier(); /* assign before ->rcu_read_unlock_special load */ 86 88 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 87 89 rcu_read_unlock_special(t);
+18 -15
kernel/rcutiny.c
··· 56 56 static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 57 57 58 58 /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 59 - static void rcu_idle_enter_common(long long oldval) 59 + static void rcu_idle_enter_common(long long newval) 60 60 { 61 - if (rcu_dynticks_nesting) { 61 + if (newval) { 62 62 RCU_TRACE(trace_rcu_dyntick("--=", 63 - oldval, rcu_dynticks_nesting)); 63 + rcu_dynticks_nesting, newval)); 64 + rcu_dynticks_nesting = newval; 64 65 return; 65 66 } 66 - RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); 67 + RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); 67 68 if (!is_idle_task(current)) { 68 69 struct task_struct *idle = idle_task(smp_processor_id()); 69 70 70 71 RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", 71 - oldval, rcu_dynticks_nesting)); 72 + rcu_dynticks_nesting, newval)); 72 73 ftrace_dump(DUMP_ALL); 73 74 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 74 75 current->pid, current->comm, 75 76 idle->pid, idle->comm); /* must be idle task! */ 76 77 } 77 78 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 79 + barrier(); 80 + rcu_dynticks_nesting = newval; 78 81 } 79 82 80 83 /* ··· 87 84 void rcu_idle_enter(void) 88 85 { 89 86 unsigned long flags; 90 - long long oldval; 87 + long long newval; 91 88 92 89 local_irq_save(flags); 93 - oldval = rcu_dynticks_nesting; 94 90 WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); 95 91 if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 96 92 DYNTICK_TASK_NEST_VALUE) 97 - rcu_dynticks_nesting = 0; 93 + newval = 0; 98 94 else 99 - rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; 100 - rcu_idle_enter_common(oldval); 95 + newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; 96 + rcu_idle_enter_common(newval); 101 97 local_irq_restore(flags); 102 98 } 103 99 EXPORT_SYMBOL_GPL(rcu_idle_enter); ··· 107 105 void rcu_irq_exit(void) 108 106 { 109 107 unsigned long flags; 110 - long long oldval; 108 + long long newval; 111 109 112 110 local_irq_save(flags); 113 - oldval = rcu_dynticks_nesting; 114 - rcu_dynticks_nesting--; 115 - WARN_ON_ONCE(rcu_dynticks_nesting < 0); 116 - rcu_idle_enter_common(oldval); 111 + newval = rcu_dynticks_nesting - 1; 112 + WARN_ON_ONCE(newval < 0); 113 + rcu_idle_enter_common(newval); 117 114 local_irq_restore(flags); 118 115 } 116 + EXPORT_SYMBOL_GPL(rcu_irq_exit); 119 117 120 118 /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ 121 119 static void rcu_idle_exit_common(long long oldval) ··· 173 171 rcu_idle_exit_common(oldval); 174 172 local_irq_restore(flags); 175 173 } 174 + EXPORT_SYMBOL_GPL(rcu_irq_enter); 176 175 177 176 #ifdef CONFIG_DEBUG_LOCK_ALLOC 178 177
+5 -5
kernel/rcutiny_plugin.h
··· 278 278 rcu_preempt_ctrlblk.exp_tasks == NULL) 279 279 return 0; /* Nothing to boost. */ 280 280 281 - raw_local_irq_save(flags); 281 + local_irq_save(flags); 282 282 283 283 /* 284 284 * Recheck with irqs disabled: all tasks in need of boosting ··· 287 287 */ 288 288 if (rcu_preempt_ctrlblk.boost_tasks == NULL && 289 289 rcu_preempt_ctrlblk.exp_tasks == NULL) { 290 - raw_local_irq_restore(flags); 290 + local_irq_restore(flags); 291 291 return 0; 292 292 } 293 293 ··· 317 317 t = container_of(tb, struct task_struct, rcu_node_entry); 318 318 rt_mutex_init_proxy_locked(&mtx, t); 319 319 t->rcu_boost_mutex = &mtx; 320 - raw_local_irq_restore(flags); 320 + local_irq_restore(flags); 321 321 rt_mutex_lock(&mtx); 322 322 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 323 323 ··· 991 991 { 992 992 unsigned long flags; 993 993 994 - raw_local_irq_save(flags); 994 + local_irq_save(flags); 995 995 rcp->qlen -= n; 996 - raw_local_irq_restore(flags); 996 + local_irq_restore(flags); 997 997 } 998 998 999 999 /*
+97 -62
kernel/rcutorture.c
··· 53 53 54 54 static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ 55 55 static int nfakewriters = 4; /* # fake writer threads */ 56 - static int stat_interval; /* Interval between stats, in seconds. */ 57 - /* Defaults to "only at end of test". */ 56 + static int stat_interval = 60; /* Interval between stats, in seconds. */ 57 + /* Zero means "only at end of test". */ 58 58 static bool verbose; /* Print more debug info. */ 59 - static bool test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ 59 + static bool test_no_idle_hz = true; 60 + /* Test RCU support for tickless idle CPUs. */ 60 61 static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ 61 62 static int stutter = 5; /* Start/stop testing interval (in sec) */ 62 63 static int irqreader = 1; /* RCU readers from irq (timers). */ ··· 120 119 121 120 #define TORTURE_FLAG "-torture:" 122 121 #define PRINTK_STRING(s) \ 123 - do { printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) 122 + do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) 124 123 #define VERBOSE_PRINTK_STRING(s) \ 125 - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) 124 + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) 126 125 #define VERBOSE_PRINTK_ERRSTRING(s) \ 127 - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) 126 + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) 128 127 129 128 static char printk_buf[4096]; 130 129 ··· 177 176 static long n_rcu_torture_timers; 178 177 static long n_offline_attempts; 179 178 static long n_offline_successes; 179 + static unsigned long sum_offline; 180 + static int min_offline = -1; 181 + static int max_offline; 180 182 static long n_online_attempts; 181 183 static long n_online_successes; 184 + static unsigned long sum_online; 185 + static int min_online = -1; 186 + static int max_online; 182 187 static long n_barrier_attempts; 183 188 static long n_barrier_successes; 184 189 static struct list_head rcu_torture_removed; ··· 242 235 if (fullstop == FULLSTOP_DONTSTOP) 243 236 fullstop = FULLSTOP_SHUTDOWN; 244 237 else 245 - printk(KERN_WARNING /* but going down anyway, so... */ 238 + pr_warn(/* but going down anyway, so... */ 246 239 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); 247 240 mutex_unlock(&fullstop_mutex); 248 241 return NOTIFY_DONE; ··· 255 248 static void rcutorture_shutdown_absorb(char *title) 256 249 { 257 250 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { 258 - printk(KERN_NOTICE 251 + pr_notice( 259 252 "rcutorture thread %s parking due to system shutdown\n", 260 253 title); 261 254 schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); ··· 1221 1214 n_rcu_torture_boost_failure, 1222 1215 n_rcu_torture_boosts, 1223 1216 n_rcu_torture_timers); 1224 - cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", 1225 - n_online_successes, 1226 - n_online_attempts, 1227 - n_offline_successes, 1228 - n_offline_attempts); 1217 + cnt += sprintf(&page[cnt], 1218 + "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", 1219 + n_online_successes, n_online_attempts, 1220 + n_offline_successes, n_offline_attempts, 1221 + min_online, max_online, 1222 + min_offline, max_offline, 1223 + sum_online, sum_offline, HZ); 1229 1224 cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", 1230 1225 n_barrier_successes, 1231 1226 n_barrier_attempts, ··· 1276 1267 int cnt; 1277 1268 1278 1269 cnt = rcu_torture_printk(printk_buf); 1279 - printk(KERN_ALERT "%s", printk_buf); 1270 + pr_alert("%s", printk_buf); 1280 1271 } 1281 1272 1282 1273 /* ··· 1389 1380 static inline void 1390 1381 rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) 1391 1382 { 1392 - printk(KERN_ALERT "%s" TORTURE_FLAG 1393 - "--- %s: nreaders=%d nfakewriters=%d " 1394 - "stat_interval=%d verbose=%d test_no_idle_hz=%d " 1395 - "shuffle_interval=%d stutter=%d irqreader=%d " 1396 - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " 1397 - "test_boost=%d/%d test_boost_interval=%d " 1398 - "test_boost_duration=%d shutdown_secs=%d " 1399 - "onoff_interval=%d onoff_holdoff=%d\n", 1400 - torture_type, tag, nrealreaders, nfakewriters, 1401 - stat_interval, verbose, test_no_idle_hz, shuffle_interval, 1402 - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, 1403 - test_boost, cur_ops->can_boost, 1404 - test_boost_interval, test_boost_duration, shutdown_secs, 1405 - onoff_interval, onoff_holdoff); 1383 + pr_alert("%s" TORTURE_FLAG 1384 + "--- %s: nreaders=%d nfakewriters=%d " 1385 + "stat_interval=%d verbose=%d test_no_idle_hz=%d " 1386 + "shuffle_interval=%d stutter=%d irqreader=%d " 1387 + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " 1388 + "test_boost=%d/%d test_boost_interval=%d " 1389 + "test_boost_duration=%d shutdown_secs=%d " 1390 + "onoff_interval=%d onoff_holdoff=%d\n", 1391 + torture_type, tag, nrealreaders, nfakewriters, 1392 + stat_interval, verbose, test_no_idle_hz, shuffle_interval, 1393 + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, 1394 + test_boost, cur_ops->can_boost, 1395 + test_boost_interval, test_boost_duration, shutdown_secs, 1396 + onoff_interval, onoff_holdoff); 1406 1397 } 1407 1398 1408 1399 static struct notifier_block rcutorture_shutdown_nb = { ··· 1469 1460 !kthread_should_stop()) { 1470 1461 delta = shutdown_time - jiffies_snap; 1471 1462 if (verbose) 1472 - printk(KERN_ALERT "%s" TORTURE_FLAG 1473 - "rcu_torture_shutdown task: %lu jiffies remaining\n", 1474 - torture_type, delta); 1463 + pr_alert("%s" TORTURE_FLAG 1464 + "rcu_torture_shutdown task: %lu jiffies remaining\n", 1465 + torture_type, delta); 1475 1466 schedule_timeout_interruptible(delta); 1476 1467 jiffies_snap = ACCESS_ONCE(jiffies); 1477 1468 } ··· 1499 1490 rcu_torture_onoff(void *arg) 1500 1491 { 1501 1492 int cpu; 1493 + unsigned long delta; 1502 1494 int maxcpu = -1; 1503 1495 DEFINE_RCU_RANDOM(rand); 1496 + unsigned long starttime; 1504 1497 1505 1498 VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); 1506 1499 for_each_online_cpu(cpu) ··· 1517 1506 cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); 1518 1507 if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { 1519 1508 if (verbose) 1520 - printk(KERN_ALERT "%s" TORTURE_FLAG 1521 - "rcu_torture_onoff task: offlining %d\n", 1522 - torture_type, cpu); 1509 + pr_alert("%s" TORTURE_FLAG 1510 + "rcu_torture_onoff task: offlining %d\n", 1511 + torture_type, cpu); 1512 + starttime = jiffies; 1523 1513 n_offline_attempts++; 1524 1514 if (cpu_down(cpu) == 0) { 1525 1515 if (verbose) 1526 - printk(KERN_ALERT "%s" TORTURE_FLAG 1527 - "rcu_torture_onoff task: offlined %d\n", 1528 - torture_type, cpu); 1516 + pr_alert("%s" TORTURE_FLAG 1517 + "rcu_torture_onoff task: offlined %d\n", 1518 + torture_type, cpu); 1529 1519 n_offline_successes++; 1520 + delta = jiffies - starttime; 1521 + sum_offline += delta; 1522 + if (min_offline < 0) { 1523 + min_offline = delta; 1524 + max_offline = delta; 1525 + } 1526 + if (min_offline > delta) 1527 + min_offline = delta; 1528 + if (max_offline < delta) 1529 + max_offline = delta; 1530 1530 } 1531 1531 } else if (cpu_is_hotpluggable(cpu)) { 1532 1532 if (verbose) 1533 - printk(KERN_ALERT "%s" TORTURE_FLAG 1534 - "rcu_torture_onoff task: onlining %d\n", 1535 - torture_type, cpu); 1533 + pr_alert("%s" TORTURE_FLAG 1534 + "rcu_torture_onoff task: onlining %d\n", 1535 + torture_type, cpu); 1536 + starttime = jiffies; 1536 1537 n_online_attempts++; 1537 1538 if (cpu_up(cpu) == 0) { 1538 1539 if (verbose) 1539 - printk(KERN_ALERT "%s" TORTURE_FLAG 1540 - "rcu_torture_onoff task: onlined %d\n", 1541 - torture_type, cpu); 1540 + pr_alert("%s" TORTURE_FLAG 1541 + "rcu_torture_onoff task: onlined %d\n", 1542 + torture_type, cpu); 1542 1543 n_online_successes++; 1544 + delta = jiffies - starttime; 1545 + sum_online += delta; 1546 + if (min_online < 0) { 1547 + min_online = delta; 1548 + max_online = delta; 1549 + } 1550 + if (min_online > delta) 1551 + min_online = delta; 1552 + if (max_online < delta) 1553 + max_online = delta; 1543 1554 } 1544 1555 } 1545 1556 schedule_timeout_interruptible(onoff_interval * HZ); ··· 1626 1593 if (!kthread_should_stop()) { 1627 1594 stop_at = get_seconds() + stall_cpu; 1628 1595 /* RCU CPU stall is expected behavior in following code. */ 1629 - printk(KERN_ALERT "rcu_torture_stall start.\n"); 1596 + pr_alert("rcu_torture_stall start.\n"); 1630 1597 rcu_read_lock(); 1631 1598 preempt_disable(); 1632 1599 while (ULONG_CMP_LT(get_seconds(), stop_at)) 1633 1600 continue; /* Induce RCU CPU stall warning. */ 1634 1601 preempt_enable(); 1635 1602 rcu_read_unlock(); 1636 - printk(KERN_ALERT "rcu_torture_stall end.\n"); 1603 + pr_alert("rcu_torture_stall end.\n"); 1637 1604 } 1638 1605 rcutorture_shutdown_absorb("rcu_torture_stall"); 1639 1606 while (!kthread_should_stop()) ··· 1749 1716 if (n_barrier_cbs == 0) 1750 1717 return 0; 1751 1718 if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { 1752 - printk(KERN_ALERT "%s" TORTURE_FLAG 1753 - " Call or barrier ops missing for %s,\n", 1754 - torture_type, cur_ops->name); 1755 - printk(KERN_ALERT "%s" TORTURE_FLAG 1756 - " RCU barrier testing omitted from run.\n", 1757 - torture_type); 1719 + pr_alert("%s" TORTURE_FLAG 1720 + " Call or barrier ops missing for %s,\n", 1721 + torture_type, cur_ops->name); 1722 + pr_alert("%s" TORTURE_FLAG 1723 + " RCU barrier testing omitted from run.\n", 1724 + torture_type); 1758 1725 return 0; 1759 1726 } 1760 1727 atomic_set(&barrier_cbs_count, 0); ··· 1847 1814 mutex_lock(&fullstop_mutex); 1848 1815 rcutorture_record_test_transition(); 1849 1816 if (fullstop == FULLSTOP_SHUTDOWN) { 1850 - printk(KERN_WARNING /* but going down anyway, so... */ 1817 + pr_warn(/* but going down anyway, so... */ 1851 1818 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); 1852 1819 mutex_unlock(&fullstop_mutex); 1853 1820 schedule_timeout_uninterruptible(10); ··· 1971 1938 break; 1972 1939 } 1973 1940 if (i == ARRAY_SIZE(torture_ops)) { 1974 - printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", 1975 - torture_type); 1976 - printk(KERN_ALERT "rcu-torture types:"); 1941 + pr_alert("rcu-torture: invalid torture type: \"%s\"\n", 1942 + torture_type); 1943 + pr_alert("rcu-torture types:"); 1977 1944 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) 1978 - printk(KERN_ALERT " %s", torture_ops[i]->name); 1979 - printk(KERN_ALERT "\n"); 1945 + pr_alert(" %s", torture_ops[i]->name); 1946 + pr_alert("\n"); 1980 1947 mutex_unlock(&fullstop_mutex); 1981 1948 return -EINVAL; 1982 1949 } 1983 1950 if (cur_ops->fqs == NULL && fqs_duration != 0) { 1984 - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); 1951 + pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); 1985 1952 fqs_duration = 0; 1986 1953 } 1987 1954 if (cur_ops->init) ··· 2029 1996 /* Start up the kthreads. */ 2030 1997 2031 1998 VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); 2032 - writer_task = kthread_run(rcu_torture_writer, NULL, 2033 - "rcu_torture_writer"); 1999 + writer_task = kthread_create(rcu_torture_writer, NULL, 2000 + "rcu_torture_writer"); 2034 2001 if (IS_ERR(writer_task)) { 2035 2002 firsterr = PTR_ERR(writer_task); 2036 2003 VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); 2037 2004 writer_task = NULL; 2038 2005 goto unwind; 2039 2006 } 2007 + wake_up_process(writer_task); 2040 2008 fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), 2041 2009 GFP_KERNEL); 2042 2010 if (fakewriter_tasks == NULL) { ··· 2152 2118 } 2153 2119 if (shutdown_secs > 0) { 2154 2120 shutdown_time = jiffies + shutdown_secs * HZ; 2155 - shutdown_task = kthread_run(rcu_torture_shutdown, NULL, 2156 - "rcu_torture_shutdown"); 2121 + shutdown_task = kthread_create(rcu_torture_shutdown, NULL, 2122 + "rcu_torture_shutdown"); 2157 2123 if (IS_ERR(shutdown_task)) { 2158 2124 firsterr = PTR_ERR(shutdown_task); 2159 2125 VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); 2160 2126 shutdown_task = NULL; 2161 2127 goto unwind; 2162 2128 } 2129 + wake_up_process(shutdown_task); 2163 2130 } 2164 2131 i = rcu_torture_onoff_init(); 2165 2132 if (i != 0) {
+52 -114
kernel/rcutree.c
··· 323 323 } 324 324 325 325 /* 326 - * If the specified CPU is offline, tell the caller that it is in 327 - * a quiescent state. Otherwise, whack it with a reschedule IPI. 328 - * Grace periods can end up waiting on an offline CPU when that 329 - * CPU is in the process of coming online -- it will be added to the 330 - * rcu_node bitmasks before it actually makes it online. The same thing 331 - * can happen while a CPU is in the process of coming online. Because this 332 - * race is quite rare, we check for it after detecting that the grace 333 - * period has been delayed rather than checking each and every CPU 334 - * each and every time we start a new grace period. 335 - */ 336 - static int rcu_implicit_offline_qs(struct rcu_data *rdp) 337 - { 338 - /* 339 - * If the CPU is offline for more than a jiffy, it is in a quiescent 340 - * state. We can trust its state not to change because interrupts 341 - * are disabled. The reason for the jiffy's worth of slack is to 342 - * handle CPUs initializing on the way up and finding their way 343 - * to the idle loop on the way down. 344 - */ 345 - if (cpu_is_offline(rdp->cpu) && 346 - ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { 347 - trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 348 - rdp->offline_fqs++; 349 - return 1; 350 - } 351 - return 0; 352 - } 353 - 354 - /* 355 326 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle 356 327 * 357 328 * If the new value of the ->dynticks_nesting counter now is zero, ··· 650 679 * Return true if the specified CPU has passed through a quiescent 651 680 * state by virtue of being in or having passed through an dynticks 652 681 * idle state since the last call to dyntick_save_progress_counter() 653 - * for this same CPU. 682 + * for this same CPU, or by virtue of having been offline. 654 683 */ 655 684 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 656 685 { ··· 674 703 return 1; 675 704 } 676 705 677 - /* Go check for the CPU being offline. */ 678 - return rcu_implicit_offline_qs(rdp); 706 + /* 707 + * Check for the CPU being offline, but only if the grace period 708 + * is old enough. We don't need to worry about the CPU changing 709 + * state: If we see it offline even once, it has been through a 710 + * quiescent state. 711 + * 712 + * The reason for insisting that the grace period be at least 713 + * one jiffy old is that CPUs that are not quite online and that 714 + * have just gone offline can still execute RCU read-side critical 715 + * sections. 716 + */ 717 + if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) 718 + return 0; /* Grace period is not old enough. */ 719 + barrier(); 720 + if (cpu_is_offline(rdp->cpu)) { 721 + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 722 + rdp->offline_fqs++; 723 + return 1; 724 + } 725 + return 0; 679 726 } 680 727 681 728 static int jiffies_till_stall_check(void) ··· 750 761 rcu_for_each_leaf_node(rsp, rnp) { 751 762 raw_spin_lock_irqsave(&rnp->lock, flags); 752 763 ndetected += rcu_print_task_stall(rnp); 764 + if (rnp->qsmask != 0) { 765 + for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 766 + if (rnp->qsmask & (1UL << cpu)) { 767 + print_cpu_stall_info(rsp, 768 + rnp->grplo + cpu); 769 + ndetected++; 770 + } 771 + } 753 772 raw_spin_unlock_irqrestore(&rnp->lock, flags); 754 - if (rnp->qsmask == 0) 755 - continue; 756 - for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 757 - if (rnp->qsmask & (1UL << cpu)) { 758 - print_cpu_stall_info(rsp, rnp->grplo + cpu); 759 - ndetected++; 760 - } 761 773 } 762 774 763 775 /* ··· 823 833 j = ACCESS_ONCE(jiffies); 824 834 js = ACCESS_ONCE(rsp->jiffies_stall); 825 835 rnp = rdp->mynode; 826 - if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { 836 + if (rcu_gp_in_progress(rsp) && 837 + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { 827 838 828 839 /* We haven't checked in, so go dump stack. */ 829 840 print_cpu_stall(rsp); ··· 1477 1486 int i; 1478 1487 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1479 1488 1480 - /* 1481 - * If there is an rcu_barrier() operation in progress, then 1482 - * only the task doing that operation is permitted to adopt 1483 - * callbacks. To do otherwise breaks rcu_barrier() and friends 1484 - * by causing them to fail to wait for the callbacks in the 1485 - * orphanage. 1486 - */ 1487 - if (rsp->rcu_barrier_in_progress && 1488 - rsp->rcu_barrier_in_progress != current) 1489 - return; 1490 - 1491 1489 /* Do the accounting first. */ 1492 1490 rdp->qlen_lazy += rsp->qlen_lazy; 1493 1491 rdp->qlen += rsp->qlen; ··· 1531 1551 * The CPU has been completely removed, and some other CPU is reporting 1532 1552 * this fact from process context. Do the remainder of the cleanup, 1533 1553 * including orphaning the outgoing CPU's RCU callbacks, and also 1534 - * adopting them, if there is no _rcu_barrier() instance running. 1535 - * There can only be one CPU hotplug operation at a time, so no other 1536 - * CPU can be attempting to update rcu_cpu_kthread_task. 1554 + * adopting them. There can only be one CPU hotplug operation at a time, 1555 + * so no other CPU can be attempting to update rcu_cpu_kthread_task. 1537 1556 */ 1538 1557 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 1539 1558 { ··· 1590 1611 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 1591 1612 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 1592 1613 cpu, rdp->qlen, rdp->nxtlist); 1614 + init_callback_list(rdp); 1615 + /* Disallow further callbacks on this CPU. */ 1616 + rdp->nxttail[RCU_NEXT_TAIL] = NULL; 1593 1617 } 1594 1618 1595 1619 #else /* #ifdef CONFIG_HOTPLUG_CPU */ 1596 - 1597 - static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 1598 - { 1599 - } 1600 1620 1601 1621 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 1602 1622 { ··· 1965 1987 head->func = func; 1966 1988 head->next = NULL; 1967 1989 1968 - smp_mb(); /* Ensure RCU update seen before callback registry. */ 1969 - 1970 1990 /* 1971 1991 * Opportunistically note grace-period endings and beginnings. 1972 1992 * Note that we might see a beginning right after we see an ··· 1975 1999 rdp = this_cpu_ptr(rsp->rda); 1976 2000 1977 2001 /* Add the callback to our list. */ 2002 + if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { 2003 + /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2004 + WARN_ON_ONCE(1); 2005 + local_irq_restore(flags); 2006 + return; 2007 + } 1978 2008 ACCESS_ONCE(rdp->qlen)++; 1979 2009 if (lazy) 1980 2010 rdp->qlen_lazy++; ··· 2349 2367 static void _rcu_barrier(struct rcu_state *rsp) 2350 2368 { 2351 2369 int cpu; 2352 - unsigned long flags; 2353 2370 struct rcu_data *rdp; 2354 - struct rcu_data rd; 2355 2371 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); 2356 2372 unsigned long snap_done; 2357 2373 2358 - init_rcu_head_on_stack(&rd.barrier_head); 2359 2374 _rcu_barrier_trace(rsp, "Begin", -1, snap); 2360 2375 2361 2376 /* Take mutex to serialize concurrent rcu_barrier() requests. */ ··· 2392 2413 /* 2393 2414 * Initialize the count to one rather than to zero in order to 2394 2415 * avoid a too-soon return to zero in case of a short grace period 2395 - * (or preemption of this task). Also flag this task as doing 2396 - * an rcu_barrier(). This will prevent anyone else from adopting 2397 - * orphaned callbacks, which could cause otherwise failure if a 2398 - * CPU went offline and quickly came back online. To see this, 2399 - * consider the following sequence of events: 2400 - * 2401 - * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. 2402 - * 2. CPU 1 goes offline, orphaning its callbacks. 2403 - * 3. CPU 0 adopts CPU 1's orphaned callbacks. 2404 - * 4. CPU 1 comes back online. 2405 - * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. 2406 - * 6. Both rcu_barrier_callback() callbacks are invoked, awakening 2407 - * us -- but before CPU 1's orphaned callbacks are invoked!!! 2416 + * (or preemption of this task). Exclude CPU-hotplug operations 2417 + * to ensure that no offline CPU has callbacks queued. 2408 2418 */ 2409 2419 init_completion(&rsp->barrier_completion); 2410 2420 atomic_set(&rsp->barrier_cpu_count, 1); 2411 - raw_spin_lock_irqsave(&rsp->onofflock, flags); 2412 - rsp->rcu_barrier_in_progress = current; 2413 - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2421 + get_online_cpus(); 2414 2422 2415 2423 /* 2416 - * Force every CPU with callbacks to register a new callback 2417 - * that will tell us when all the preceding callbacks have 2418 - * been invoked. If an offline CPU has callbacks, wait for 2419 - * it to either come back online or to finish orphaning those 2420 - * callbacks. 2424 + * Force each CPU with callbacks to register a new callback. 2425 + * When that callback is invoked, we will know that all of the 2426 + * corresponding CPU's preceding callbacks have been invoked. 2421 2427 */ 2422 - for_each_possible_cpu(cpu) { 2423 - preempt_disable(); 2428 + for_each_online_cpu(cpu) { 2424 2429 rdp = per_cpu_ptr(rsp->rda, cpu); 2425 - if (cpu_is_offline(cpu)) { 2426 - _rcu_barrier_trace(rsp, "Offline", cpu, 2427 - rsp->n_barrier_done); 2428 - preempt_enable(); 2429 - while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) 2430 - schedule_timeout_interruptible(1); 2431 - } else if (ACCESS_ONCE(rdp->qlen)) { 2430 + if (ACCESS_ONCE(rdp->qlen)) { 2432 2431 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 2433 2432 rsp->n_barrier_done); 2434 2433 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 2435 - preempt_enable(); 2436 2434 } else { 2437 2435 _rcu_barrier_trace(rsp, "OnlineNQ", cpu, 2438 2436 rsp->n_barrier_done); 2439 - preempt_enable(); 2440 2437 } 2441 2438 } 2442 - 2443 - /* 2444 - * Now that all online CPUs have rcu_barrier_callback() callbacks 2445 - * posted, we can adopt all of the orphaned callbacks and place 2446 - * an rcu_barrier_callback() callback after them. When that is done, 2447 - * we are guaranteed to have an rcu_barrier_callback() callback 2448 - * following every callback that could possibly have been 2449 - * registered before _rcu_barrier() was called. 2450 - */ 2451 - raw_spin_lock_irqsave(&rsp->onofflock, flags); 2452 - rcu_adopt_orphan_cbs(rsp); 2453 - rsp->rcu_barrier_in_progress = NULL; 2454 - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2455 - atomic_inc(&rsp->barrier_cpu_count); 2456 - smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ 2457 - rd.rsp = rsp; 2458 - rsp->call(&rd.barrier_head, rcu_barrier_callback); 2439 + put_online_cpus(); 2459 2440 2460 2441 /* 2461 2442 * Now that we have an rcu_barrier_callback() callback on each ··· 2436 2497 2437 2498 /* Other rcu_barrier() invocations can now safely proceed. */ 2438 2499 mutex_unlock(&rsp->barrier_mutex); 2439 - 2440 - destroy_rcu_head_on_stack(&rd.barrier_head); 2441 2500 } 2442 2501 2443 2502 /** ··· 2501 2564 rdp->qlen_last_fqs_check = 0; 2502 2565 rdp->n_force_qs_snap = rsp->n_force_qs; 2503 2566 rdp->blimit = blimit; 2567 + init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 2504 2568 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 2505 2569 atomic_set(&rdp->dynticks->dynticks, 2506 2570 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
-3
kernel/rcutree.h
··· 396 396 struct rcu_head **orphan_donetail; /* Tail of above. */ 397 397 long qlen_lazy; /* Number of lazy callbacks. */ 398 398 long qlen; /* Total number of callbacks. */ 399 - struct task_struct *rcu_barrier_in_progress; 400 - /* Task doing rcu_barrier(), */ 401 - /* or NULL if no barrier. */ 402 399 struct mutex barrier_mutex; /* Guards barrier fields. */ 403 400 atomic_t barrier_cpu_count; /* # CPUs waiting on. */ 404 401 struct completion barrier_completion; /* Wake at barrier end. */
+29 -18
kernel/rcutree_plugin.h
··· 421 421 unsigned long flags; 422 422 struct task_struct *t; 423 423 424 - if (!rcu_preempt_blocked_readers_cgp(rnp)) 425 - return; 426 424 raw_spin_lock_irqsave(&rnp->lock, flags); 425 + if (!rcu_preempt_blocked_readers_cgp(rnp)) { 426 + raw_spin_unlock_irqrestore(&rnp->lock, flags); 427 + return; 428 + } 427 429 t = list_entry(rnp->gp_tasks, 428 430 struct task_struct, rcu_node_entry); 429 431 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) ··· 585 583 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 586 584 } 587 585 586 + rnp->gp_tasks = NULL; 587 + rnp->exp_tasks = NULL; 588 588 #ifdef CONFIG_RCU_BOOST 589 - /* In case root is being boosted and leaf is not. */ 589 + rnp->boost_tasks = NULL; 590 + /* 591 + * In case root is being boosted and leaf was not. Make sure 592 + * that we boost the tasks blocking the current grace period 593 + * in this case. 594 + */ 590 595 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 591 596 if (rnp_root->boost_tasks != NULL && 592 - rnp_root->boost_tasks != rnp_root->gp_tasks) 597 + rnp_root->boost_tasks != rnp_root->gp_tasks && 598 + rnp_root->boost_tasks != rnp_root->exp_tasks) 593 599 rnp_root->boost_tasks = rnp_root->gp_tasks; 594 600 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 595 601 #endif /* #ifdef CONFIG_RCU_BOOST */ 596 602 597 - rnp->gp_tasks = NULL; 598 - rnp->exp_tasks = NULL; 599 603 return retval; 600 604 } 601 605 ··· 1212 1204 * kthread to start boosting them. If there is an expedited grace 1213 1205 * period in progress, it is always time to boost. 1214 1206 * 1215 - * The caller must hold rnp->lock, which this function releases, 1216 - * but irqs remain disabled. The ->boost_kthread_task is immortal, 1217 - * so we don't need to worry about it going away. 1207 + * The caller must hold rnp->lock, which this function releases. 1208 + * The ->boost_kthread_task is immortal, so we don't need to worry 1209 + * about it going away. 1218 1210 */ 1219 1211 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1220 1212 { ··· 2225 2217 { 2226 2218 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 2227 2219 struct timer_list *tltp = &rdtp->idle_gp_timer; 2220 + char c; 2228 2221 2229 - sprintf(cp, "drain=%d %c timer=%lu", 2230 - rdtp->dyntick_drain, 2231 - rdtp->dyntick_holdoff == jiffies ? 'H' : '.', 2232 - timer_pending(tltp) ? tltp->expires - jiffies : -1); 2222 + c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; 2223 + if (timer_pending(tltp)) 2224 + sprintf(cp, "drain=%d %c timer=%lu", 2225 + rdtp->dyntick_drain, c, tltp->expires - jiffies); 2226 + else 2227 + sprintf(cp, "drain=%d %c timer not pending", 2228 + rdtp->dyntick_drain, c); 2233 2229 } 2234 2230 2235 2231 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ ··· 2301 2289 /* Increment ->ticks_this_gp for all flavors of RCU. */ 2302 2290 static void increment_cpu_stall_ticks(void) 2303 2291 { 2304 - __get_cpu_var(rcu_sched_data).ticks_this_gp++; 2305 - __get_cpu_var(rcu_bh_data).ticks_this_gp++; 2306 - #ifdef CONFIG_TREE_PREEMPT_RCU 2307 - __get_cpu_var(rcu_preempt_data).ticks_this_gp++; 2308 - #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 2292 + struct rcu_state *rsp; 2293 + 2294 + for_each_rcu_flavor(rsp) 2295 + __this_cpu_ptr(rsp->rda)->ticks_this_gp++; 2309 2296 } 2310 2297 2311 2298 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
+2 -2
kernel/rcutree_trace.c
··· 51 51 struct rcu_state *rsp; 52 52 53 53 for_each_rcu_flavor(rsp) 54 - seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", 55 - rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', 54 + seq_printf(m, "%s: bcc: %d nbd: %lu\n", 55 + rsp->name, 56 56 atomic_read(&rsp->barrier_cpu_count), 57 57 rsp->n_barrier_done); 58 58 return 0;
+20 -21
kernel/sched/core.c
··· 5304 5304 } 5305 5305 5306 5306 /* 5307 - * While a dead CPU has no uninterruptible tasks queued at this point, 5308 - * it might still have a nonzero ->nr_uninterruptible counter, because 5309 - * for performance reasons the counter is not stricly tracking tasks to 5310 - * their home CPUs. So we just add the counter to another CPU's counter, 5311 - * to keep the global sum constant after CPU-down: 5307 + * Since this CPU is going 'away' for a while, fold any nr_active delta 5308 + * we might have. Assumes we're called after migrate_tasks() so that the 5309 + * nr_active count is stable. 5310 + * 5311 + * Also see the comment "Global load-average calculations". 5312 5312 */ 5313 - static void migrate_nr_uninterruptible(struct rq *rq_src) 5313 + static void calc_load_migrate(struct rq *rq) 5314 5314 { 5315 - struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); 5316 - 5317 - rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; 5318 - rq_src->nr_uninterruptible = 0; 5319 - } 5320 - 5321 - /* 5322 - * remove the tasks which were accounted by rq from calc_load_tasks. 5323 - */ 5324 - static void calc_global_load_remove(struct rq *rq) 5325 - { 5326 - atomic_long_sub(rq->calc_load_active, &calc_load_tasks); 5327 - rq->calc_load_active = 0; 5315 + long delta = calc_load_fold_active(rq); 5316 + if (delta) 5317 + atomic_long_add(delta, &calc_load_tasks); 5328 5318 } 5329 5319 5330 5320 /* ··· 5607 5617 migrate_tasks(cpu); 5608 5618 BUG_ON(rq->nr_running != 1); /* the migration thread */ 5609 5619 raw_spin_unlock_irqrestore(&rq->lock, flags); 5620 + break; 5610 5621 5611 - migrate_nr_uninterruptible(rq); 5612 - calc_global_load_remove(rq); 5622 + case CPU_DEAD: 5623 + { 5624 + struct rq *dest_rq; 5625 + 5626 + local_irq_save(flags); 5627 + dest_rq = cpu_rq(smp_processor_id()); 5628 + raw_spin_lock(&dest_rq->lock); 5629 + calc_load_migrate(rq); 5630 + raw_spin_unlock_irqrestore(&dest_rq->lock, flags); 5631 + } 5613 5632 break; 5614 5633 #endif 5615 5634 }
+2 -1
kernel/time/tick-sched.c
··· 436 436 if (unlikely(local_softirq_pending() && cpu_online(cpu))) { 437 437 static int ratelimit; 438 438 439 - if (ratelimit < 10) { 439 + if (ratelimit < 10 && 440 + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { 440 441 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 441 442 (unsigned int) local_softirq_pending()); 442 443 ratelimit++;
+14
lib/Kconfig.debug
··· 629 629 630 630 Say N if you are unsure. 631 631 632 + config PROVE_RCU_DELAY 633 + bool "RCU debugging: preemptible RCU race provocation" 634 + depends on DEBUG_KERNEL && PREEMPT_RCU 635 + default n 636 + help 637 + There is a class of races that involve an unlikely preemption 638 + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has 639 + been set to INT_MIN. This feature inserts a delay at that 640 + point to increase the probability of these races. 641 + 642 + Say Y to increase probability of preemption of __rcu_read_unlock(). 643 + 644 + Say N if you are unsure. 645 + 632 646 config SPARSE_RCU_POINTER 633 647 bool "RCU debugging: sparse-based checks for pointer usage" 634 648 default n
+2 -4
mm/kmemleak.c
··· 1483 1483 { 1484 1484 struct kmemleak_object *prev_obj = v; 1485 1485 struct kmemleak_object *next_obj = NULL; 1486 - struct list_head *n = &prev_obj->object_list; 1486 + struct kmemleak_object *obj = prev_obj; 1487 1487 1488 1488 ++(*pos); 1489 1489 1490 - list_for_each_continue_rcu(n, &object_list) { 1491 - struct kmemleak_object *obj = 1492 - list_entry(n, struct kmemleak_object, object_list); 1490 + list_for_each_entry_continue_rcu(obj, &object_list, object_list) { 1493 1491 if (get_object(obj)) { 1494 1492 next_obj = obj; 1495 1493 break;