Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
rcu: Fix whitespace inconsistencies
rcu: Fix thinko, actually initialize full tree
rcu: Apply results of code inspection of kernel/rcutree_plugin.h
rcu: Add WARN_ON_ONCE() consistency checks covering state transitions
rcu: Fix synchronize_rcu() for TREE_PREEMPT_RCU
rcu: Simplify rcu_read_unlock_special() quiescent-state accounting
rcu: Add debug checks to TREE_PREEMPT_RCU for premature grace periods
rcu: Kconfig help needs to say that TREE_PREEMPT_RCU scales down
rcutorture: Occasionally delay readers enough to make RCU force_quiescent_state
rcu: Initialize multi-level RCU grace periods holding locks
rcu: Need to update rnp->gpnum if preemptable RCU is to be reliable

Linus Torvalds 16 years ago b8c7f1dc f4eccb6d

+195 -156

11 changed files

expand all collapse all

include

linux

rculist_nulls.h

rcupdate.h

rcutree.h

sched.h

init

Kconfig

kernel

rcupdate.c

rcutorture.c

rcutree.c

rcutree.h

rcutree_plugin.h

rcutree_trace.c

+1 -1

include/linux/rculist_nulls.h

reviewed

··· 102 102 */ 103 103 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ 104 104 for (pos = rcu_dereference((head)->first); \ 105 105 - (!is_a_nulls(pos)) && \ 105 105 + (!is_a_nulls(pos)) && \ 106 106 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ 107 107 pos = rcu_dereference(pos->next)) 108 108

+8 -21

include/linux/rcupdate.h

reviewed

··· 1 1 /* 2 2 - * Read-Copy Update mechanism for mutual exclusion 2 2 + * Read-Copy Update mechanism for mutual exclusion 3 3 * 4 4 * This program is free software; you can redistribute it and/or modify 5 5 * it under the terms of the GNU General Public License as published by ··· 18 18 * Copyright IBM Corporation, 2001 19 19 * 20 20 * Author: Dipankar Sarma <dipankar@in.ibm.com> 21 21 - * 21 21 + * 22 22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 23 23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 24 24 * Papers: ··· 26 26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 27 27 * 28 28 * For detailed explanation of Read-Copy Update mechanism see - 29 29 - * http://lse.sourceforge.net/locking/rcupdate.html 29 29 + * http://lse.sourceforge.net/locking/rcupdate.html 30 30 * 31 31 */ 32 32 ··· 52 52 }; 53 53 54 54 /* Exported common interfaces */ 55 55 + #ifdef CONFIG_TREE_PREEMPT_RCU 55 56 extern void synchronize_rcu(void); 57 57 + #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 58 58 + #define synchronize_rcu synchronize_sched 59 59 + #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 56 60 extern void synchronize_rcu_bh(void); 61 61 + extern void synchronize_sched(void); 57 62 extern void rcu_barrier(void); 58 63 extern void rcu_barrier_bh(void); 59 64 extern void rcu_barrier_sched(void); ··· 265 260 }; 266 261 267 262 extern void wakeme_after_rcu(struct rcu_head *head); 268 268 - 269 269 - /** 270 270 - * synchronize_sched - block until all CPUs have exited any non-preemptive 271 271 - * kernel code sequences. 272 272 - * 273 273 - * This means that all preempt_disable code sequences, including NMI and 274 274 - * hardware-interrupt handlers, in progress on entry will have completed 275 275 - * before this primitive returns. However, this does not guarantee that 276 276 - * softirq handlers will have completed, since in some kernels, these 277 277 - * handlers can run in process context, and can block. 278 278 - * 279 279 - * This primitive provides the guarantees made by the (now removed) 280 280 - * synchronize_kernel() API. In contrast, synchronize_rcu() only 281 281 - * guarantees that rcu_read_lock() sections will have completed. 282 282 - * In "classic RCU", these two guarantees happen to be one and 283 283 - * the same, but can differ in realtime RCU implementations. 284 284 - */ 285 285 - #define synchronize_sched() __synchronize_sched() 286 263 287 264 /** 288 265 * call_rcu - Queue an RCU callback for invocation after a grace period.

+3 -3

include/linux/rcutree.h

reviewed

··· 24 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 25 * 26 26 * For detailed explanation of Read-Copy Update mechanism see - 27 27 - * Documentation/RCU 27 27 + * Documentation/RCU 28 28 */ 29 29 30 30 #ifndef __LINUX_RCUTREE_H ··· 53 53 preempt_enable(); 54 54 } 55 55 56 56 + #define __synchronize_sched() synchronize_rcu() 57 57 + 56 58 static inline void exit_rcu(void) 57 59 { 58 60 } ··· 69 67 { 70 68 local_bh_enable(); 71 69 } 72 72 - 73 73 - #define __synchronize_sched() synchronize_rcu() 74 70 75 71 extern void call_rcu_sched(struct rcu_head *head, 76 72 void (*func)(struct rcu_head *rcu));

-1

include/linux/sched.h

reviewed

··· 1755 1755 1756 1756 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1757 1757 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1758 1758 - #define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */ 1759 1758 1760 1759 static inline void rcu_copy_process(struct task_struct *p) 1761 1760 {

+2 -1

init/Kconfig

reviewed

··· 331 331 This option selects the RCU implementation that is 332 332 designed for very large SMP systems with hundreds or 333 333 thousands of CPUs, but for which real-time response 334 334 - is also required. 334 334 + is also required. It also scales down nicely to 335 335 + smaller systems. 335 336 336 337 endchoice 337 338

+45 -3

kernel/rcupdate.c

reviewed

··· 19 19 * 20 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 21 21 * Manfred Spraul <manfred@colorfullife.com> 22 22 - * 22 22 + * 23 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 25 * Papers: ··· 27 27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 28 28 * 29 29 * For detailed explanation of Read-Copy Update mechanism see - 30 30 - * http://lse.sourceforge.net/locking/rcupdate.html 30 30 + * http://lse.sourceforge.net/locking/rcupdate.html 31 31 * 32 32 */ 33 33 #include <linux/types.h> ··· 74 74 complete(&rcu->completion); 75 75 } 76 76 77 77 + #ifdef CONFIG_TREE_PREEMPT_RCU 78 78 + 77 79 /** 78 80 * synchronize_rcu - wait until a grace period has elapsed. 79 81 * ··· 89 87 { 90 88 struct rcu_synchronize rcu; 91 89 92 92 - if (rcu_blocking_is_gp()) 90 90 + if (!rcu_scheduler_active) 93 91 return; 94 92 95 93 init_completion(&rcu.completion); ··· 99 97 wait_for_completion(&rcu.completion); 100 98 } 101 99 EXPORT_SYMBOL_GPL(synchronize_rcu); 100 100 + 101 101 + #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 102 102 + 103 103 + /** 104 104 + * synchronize_sched - wait until an rcu-sched grace period has elapsed. 105 105 + * 106 106 + * Control will return to the caller some time after a full rcu-sched 107 107 + * grace period has elapsed, in other words after all currently executing 108 108 + * rcu-sched read-side critical sections have completed. These read-side 109 109 + * critical sections are delimited by rcu_read_lock_sched() and 110 110 + * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 111 111 + * local_irq_disable(), and so on may be used in place of 112 112 + * rcu_read_lock_sched(). 113 113 + * 114 114 + * This means that all preempt_disable code sequences, including NMI and 115 115 + * hardware-interrupt handlers, in progress on entry will have completed 116 116 + * before this primitive returns. However, this does not guarantee that 117 117 + * softirq handlers will have completed, since in some kernels, these 118 118 + * handlers can run in process context, and can block. 119 119 + * 120 120 + * This primitive provides the guarantees made by the (now removed) 121 121 + * synchronize_kernel() API. In contrast, synchronize_rcu() only 122 122 + * guarantees that rcu_read_lock() sections will have completed. 123 123 + * In "classic RCU", these two guarantees happen to be one and 124 124 + * the same, but can differ in realtime RCU implementations. 125 125 + */ 126 126 + void synchronize_sched(void) 127 127 + { 128 128 + struct rcu_synchronize rcu; 129 129 + 130 130 + if (rcu_blocking_is_gp()) 131 131 + return; 132 132 + 133 133 + init_completion(&rcu.completion); 134 134 + /* Will wake me after RCU finished. */ 135 135 + call_rcu_sched(&rcu.head, wakeme_after_rcu); 136 136 + /* Wait for it. */ 137 137 + wait_for_completion(&rcu.completion); 138 138 + } 139 139 + EXPORT_SYMBOL_GPL(synchronize_sched); 102 140 103 141 /** 104 142 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.

+24 -19

kernel/rcutorture.c

reviewed

··· 18 18 * Copyright (C) IBM Corporation, 2005, 2006 19 19 * 20 20 * Authors: Paul E. McKenney <paulmck@us.ibm.com> 21 21 - * Josh Triplett <josh@freedesktop.org> 21 21 + * Josh Triplett <josh@freedesktop.org> 22 22 * 23 23 * See also: Documentation/RCU/torture.txt 24 24 */ ··· 50 50 51 51 MODULE_LICENSE("GPL"); 52 52 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 53 53 - "Josh Triplett <josh@freedesktop.org>"); 53 53 + "Josh Triplett <josh@freedesktop.org>"); 54 54 55 55 static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ 56 56 static int nfakewriters = 4; /* # fake writer threads */ ··· 110 110 }; 111 111 112 112 static LIST_HEAD(rcu_torture_freelist); 113 113 - static struct rcu_torture *rcu_torture_current = NULL; 114 114 - static long rcu_torture_current_version = 0; 113 113 + static struct rcu_torture *rcu_torture_current; 114 114 + static long rcu_torture_current_version; 115 115 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; 116 116 static DEFINE_SPINLOCK(rcu_torture_lock); 117 117 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = ··· 124 124 static atomic_t n_rcu_torture_free; 125 125 static atomic_t n_rcu_torture_mberror; 126 126 static atomic_t n_rcu_torture_error; 127 127 - static long n_rcu_torture_timers = 0; 127 127 + static long n_rcu_torture_timers; 128 128 static struct list_head rcu_torture_removed; 129 129 static cpumask_var_t shuffle_tmp_mask; 130 130 131 131 - static int stutter_pause_test = 0; 131 131 + static int stutter_pause_test; 132 132 133 133 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) 134 134 #define RCUTORTURE_RUNNABLE_INIT 1 ··· 267 267 int irq_capable; 268 268 char *name; 269 269 }; 270 270 - static struct rcu_torture_ops *cur_ops = NULL; 270 270 + 271 271 + static struct rcu_torture_ops *cur_ops; 271 272 272 273 /* 273 274 * Definitions for rcu torture testing. ··· 282 281 283 282 static void rcu_read_delay(struct rcu_random_state *rrsp) 284 283 { 285 285 - long delay; 286 286 - const long longdelay = 200; 284 284 + const unsigned long shortdelay_us = 200; 285 285 + const unsigned long longdelay_ms = 50; 287 286 288 288 - /* We want there to be long-running readers, but not all the time. */ 287 287 + /* We want a short delay sometimes to make a reader delay the grace 288 288 + * period, and we want a long delay occasionally to trigger 289 289 + * force_quiescent_state. */ 289 290 290 290 - delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay); 291 291 - if (!delay) 292 292 - udelay(longdelay); 291 291 + if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) 292 292 + mdelay(longdelay_ms); 293 293 + if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) 294 294 + udelay(shortdelay_us); 293 295 } 294 296 295 297 static void rcu_torture_read_unlock(int idx) __releases(RCU) ··· 343 339 .sync = synchronize_rcu, 344 340 .cb_barrier = rcu_barrier, 345 341 .stats = NULL, 346 346 - .irq_capable = 1, 347 347 - .name = "rcu" 342 342 + .irq_capable = 1, 343 343 + .name = "rcu" 348 344 }; 349 345 350 346 static void rcu_sync_torture_deferred_free(struct rcu_torture *p) ··· 642 638 643 639 do { 644 640 schedule_timeout_uninterruptible(1); 645 645 - if ((rp = rcu_torture_alloc()) == NULL) 641 641 + rp = rcu_torture_alloc(); 642 642 + if (rp == NULL) 646 643 continue; 647 644 rp->rtort_pipe_count = 0; 648 645 udelay(rcu_random(&rand) & 0x3ff); ··· 1115 1110 printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", 1116 1111 torture_type); 1117 1112 mutex_unlock(&fullstop_mutex); 1118 1118 - return (-EINVAL); 1113 1113 + return -EINVAL; 1119 1114 } 1120 1115 if (cur_ops->init) 1121 1116 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ ··· 1166 1161 goto unwind; 1167 1162 } 1168 1163 fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), 1169 1169 - GFP_KERNEL); 1164 1164 + GFP_KERNEL); 1170 1165 if (fakewriter_tasks == NULL) { 1171 1166 VERBOSE_PRINTK_ERRSTRING("out of memory"); 1172 1167 firsterr = -ENOMEM; ··· 1175 1170 for (i = 0; i < nfakewriters; i++) { 1176 1171 VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); 1177 1172 fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, 1178 1178 - "rcu_torture_fakewriter"); 1173 1173 + "rcu_torture_fakewriter"); 1179 1174 if (IS_ERR(fakewriter_tasks[i])) { 1180 1175 firsterr = PTR_ERR(fakewriter_tasks[i]); 1181 1176 VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");

+38 -67

kernel/rcutree.c

reviewed

··· 25 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 26 26 * 27 27 * For detailed explanation of Read-Copy Update mechanism see - 28 28 - * Documentation/RCU 28 28 + * Documentation/RCU 29 29 */ 30 30 #include <linux/types.h> 31 31 #include <linux/kernel.h> ··· 107 107 */ 108 108 void rcu_sched_qs(int cpu) 109 109 { 110 110 - unsigned long flags; 111 110 struct rcu_data *rdp; 112 111 113 113 - local_irq_save(flags); 114 112 rdp = &per_cpu(rcu_sched_data, cpu); 115 115 - rdp->passed_quiesc = 1; 116 113 rdp->passed_quiesc_completed = rdp->completed; 117 117 - rcu_preempt_qs(cpu); 118 118 - local_irq_restore(flags); 114 114 + barrier(); 115 115 + rdp->passed_quiesc = 1; 116 116 + rcu_preempt_note_context_switch(cpu); 119 117 } 120 118 121 119 void rcu_bh_qs(int cpu) 122 120 { 123 123 - unsigned long flags; 124 121 struct rcu_data *rdp; 125 122 126 126 - local_irq_save(flags); 127 123 rdp = &per_cpu(rcu_bh_data, cpu); 128 128 - rdp->passed_quiesc = 1; 129 124 rdp->passed_quiesc_completed = rdp->completed; 130 130 - local_irq_restore(flags); 125 125 + barrier(); 126 126 + rdp->passed_quiesc = 1; 131 127 } 132 128 133 129 #ifdef CONFIG_NO_HZ ··· 601 605 { 602 606 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 603 607 struct rcu_node *rnp = rcu_get_root(rsp); 604 604 - struct rcu_node *rnp_cur; 605 605 - struct rcu_node *rnp_end; 606 608 607 609 if (!cpu_needs_another_gp(rsp, rdp)) { 608 610 spin_unlock_irqrestore(&rnp->lock, flags); ··· 609 615 610 616 /* Advance to a new grace period and initialize state. */ 611 617 rsp->gpnum++; 618 618 + WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 612 619 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 613 620 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 614 621 record_gp_stall_check_time(rsp); ··· 626 631 627 632 /* Special-case the common single-level case. */ 628 633 if (NUM_RCU_NODES == 1) { 634 634 + rcu_preempt_check_blocked_tasks(rnp); 629 635 rnp->qsmask = rnp->qsmaskinit; 636 636 + rnp->gpnum = rsp->gpnum; 630 637 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 631 638 spin_unlock_irqrestore(&rnp->lock, flags); 632 639 return; ··· 641 644 spin_lock(&rsp->onofflock); /* irqs already disabled. */ 642 645 643 646 /* 644 644 - * Set the quiescent-state-needed bits in all the non-leaf RCU 645 645 - * nodes for all currently online CPUs. This operation relies 646 646 - * on the layout of the hierarchy within the rsp->node[] array. 647 647 - * Note that other CPUs will access only the leaves of the 648 648 - * hierarchy, which still indicate that no grace period is in 649 649 - * progress. In addition, we have excluded CPU-hotplug operations. 650 650 - * 651 651 - * We therefore do not need to hold any locks. Any required 652 652 - * memory barriers will be supplied by the locks guarding the 653 653 - * leaf rcu_nodes in the hierarchy. 654 654 - */ 655 655 - 656 656 - rnp_end = rsp->level[NUM_RCU_LVLS - 1]; 657 657 - for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++) 658 658 - rnp_cur->qsmask = rnp_cur->qsmaskinit; 659 659 - 660 660 - /* 661 661 - * Now set up the leaf nodes. Here we must be careful. First, 662 662 - * we need to hold the lock in order to exclude other CPUs, which 663 663 - * might be contending for the leaf nodes' locks. Second, as 664 664 - * soon as we initialize a given leaf node, its CPUs might run 665 665 - * up the rest of the hierarchy. We must therefore acquire locks 666 666 - * for each node that we touch during this stage. (But we still 667 667 - * are excluding CPU-hotplug operations.) 647 647 + * Set the quiescent-state-needed bits in all the rcu_node 648 648 + * structures for all currently online CPUs in breadth-first 649 649 + * order, starting from the root rcu_node structure. This 650 650 + * operation relies on the layout of the hierarchy within the 651 651 + * rsp->node[] array. Note that other CPUs will access only 652 652 + * the leaves of the hierarchy, which still indicate that no 653 653 + * grace period is in progress, at least until the corresponding 654 654 + * leaf node has been initialized. In addition, we have excluded 655 655 + * CPU-hotplug operations. 668 656 * 669 657 * Note that the grace period cannot complete until we finish 670 658 * the initialization process, as there will be at least one 671 659 * qsmask bit set in the root node until that time, namely the 672 672 - * one corresponding to this CPU. 660 660 + * one corresponding to this CPU, due to the fact that we have 661 661 + * irqs disabled. 673 662 */ 674 674 - rnp_end = &rsp->node[NUM_RCU_NODES]; 675 675 - rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; 676 676 - for (; rnp_cur < rnp_end; rnp_cur++) { 677 677 - spin_lock(&rnp_cur->lock); /* irqs already disabled. */ 678 678 - rnp_cur->qsmask = rnp_cur->qsmaskinit; 679 679 - spin_unlock(&rnp_cur->lock); /* irqs already disabled. */ 663 663 + for (rnp = &rsp->node[0]; rnp < &rsp->node[NUM_RCU_NODES]; rnp++) { 664 664 + spin_lock(&rnp->lock); /* irqs already disabled. */ 665 665 + rcu_preempt_check_blocked_tasks(rnp); 666 666 + rnp->qsmask = rnp->qsmaskinit; 667 667 + rnp->gpnum = rsp->gpnum; 668 668 + spin_unlock(&rnp->lock); /* irqs already disabled. */ 680 669 } 681 670 682 671 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ ··· 705 722 static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) 706 723 __releases(rnp->lock) 707 724 { 725 725 + WARN_ON_ONCE(rsp->completed == rsp->gpnum); 708 726 rsp->completed = rsp->gpnum; 709 727 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); 710 728 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ ··· 723 739 unsigned long flags) 724 740 __releases(rnp->lock) 725 741 { 742 742 + struct rcu_node *rnp_c; 743 743 + 726 744 /* Walk up the rcu_node hierarchy. */ 727 745 for (;;) { 728 746 if (!(rnp->qsmask & mask)) { ··· 748 762 break; 749 763 } 750 764 spin_unlock_irqrestore(&rnp->lock, flags); 765 765 + rnp_c = rnp; 751 766 rnp = rnp->parent; 752 767 spin_lock_irqsave(&rnp->lock, flags); 768 768 + WARN_ON_ONCE(rnp_c->qsmask); 753 769 } 754 770 755 771 /* ··· 764 776 765 777 /* 766 778 * Record a quiescent state for the specified CPU, which must either be 767 767 - * the current CPU or an offline CPU. The lastcomp argument is used to 768 768 - * make sure we are still in the grace period of interest. We don't want 769 769 - * to end the current grace period based on quiescent states detected in 770 770 - * an earlier grace period! 779 779 + * the current CPU. The lastcomp argument is used to make sure we are 780 780 + * still in the grace period of interest. We don't want to end the current 781 781 + * grace period based on quiescent states detected in an earlier grace 782 782 + * period! 771 783 */ 772 784 static void 773 785 cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) ··· 802 814 * This GP can't end until cpu checks in, so all of our 803 815 * callbacks can be processed during the next GP. 804 816 */ 805 805 - rdp = rsp->rda[smp_processor_id()]; 806 817 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 807 818 808 819 cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ ··· 859 872 spin_lock_irqsave(&rsp->onofflock, flags); 860 873 861 874 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 862 862 - rnp = rdp->mynode; 875 875 + rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ 863 876 mask = rdp->grpmask; /* rnp->grplo is constant. */ 864 877 do { 865 878 spin_lock(&rnp->lock); /* irqs already disabled. */ ··· 868 881 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 869 882 break; 870 883 } 871 871 - rcu_preempt_offline_tasks(rsp, rnp); 884 884 + rcu_preempt_offline_tasks(rsp, rnp, rdp); 872 885 mask = rnp->grpmask; 873 886 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 874 887 rnp = rnp->parent; ··· 876 889 lastcomp = rsp->completed; 877 890 878 891 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 879 879 - 880 880 - /* Being offline is a quiescent state, so go record it. */ 881 881 - cpu_quiet(cpu, rsp, rdp, lastcomp); 882 892 883 893 /* 884 894 * Move callbacks from the outgoing CPU to the running CPU. ··· 1441 1457 rnp = rnp->parent; 1442 1458 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 1443 1459 1444 1444 - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1445 1445 - 1446 1446 - /* 1447 1447 - * A new grace period might start here. If so, we will be part of 1448 1448 - * it, and its gpnum will be greater than ours, so we will 1449 1449 - * participate. It is also possible for the gpnum to have been 1450 1450 - * incremented before this function was called, and the bitmasks 1451 1451 - * to not be filled out until now, in which case we will also 1452 1452 - * participate due to our gpnum being behind. 1453 1453 - */ 1454 1454 - 1455 1455 - /* Since it is coming online, the CPU is in a quiescent state. */ 1456 1456 - cpu_quiet(cpu, rsp, rdp, lastcomp); 1457 1457 - local_irq_restore(flags); 1460 1460 + spin_unlock_irqrestore(&rsp->onofflock, flags); 1458 1461 } 1459 1462 1460 1463 static void __cpuinit rcu_online_cpu(int cpu)

+1 -1

kernel/rcutree.h

reviewed

··· 142 142 */ 143 143 struct rcu_head *nxtlist; 144 144 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 145 145 - long qlen; /* # of queued callbacks */ 145 145 + long qlen; /* # of queued callbacks */ 146 146 long blimit; /* Upper limit on a processed batch */ 147 147 148 148 #ifdef CONFIG_NO_HZ

+72 -38

kernel/rcutree_plugin.h

reviewed

··· 64 64 * not in a quiescent state. There might be any number of tasks blocked 65 65 * while in an RCU read-side critical section. 66 66 */ 67 67 - static void rcu_preempt_qs_record(int cpu) 67 67 + static void rcu_preempt_qs(int cpu) 68 68 { 69 69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 70 70 - rdp->passed_quiesc = 1; 71 70 rdp->passed_quiesc_completed = rdp->completed; 71 71 + barrier(); 72 72 + rdp->passed_quiesc = 1; 72 73 } 73 74 74 75 /* 75 75 - * We have entered the scheduler or are between softirqs in ksoftirqd. 76 76 - * If we are in an RCU read-side critical section, we need to reflect 77 77 - * that in the state of the rcu_node structure corresponding to this CPU. 78 78 - * Caller must disable hardirqs. 76 76 + * We have entered the scheduler, and the current task might soon be 77 77 + * context-switched away from. If this task is in an RCU read-side 78 78 + * critical section, we will no longer be able to rely on the CPU to 79 79 + * record that fact, so we enqueue the task on the appropriate entry 80 80 + * of the blocked_tasks[] array. The task will dequeue itself when 81 81 + * it exits the outermost enclosing RCU read-side critical section. 82 82 + * Therefore, the current grace period cannot be permitted to complete 83 83 + * until the blocked_tasks[] entry indexed by the low-order bit of 84 84 + * rnp->gpnum empties. 85 85 + * 86 86 + * Caller must disable preemption. 79 87 */ 80 80 - static void rcu_preempt_qs(int cpu) 88 88 + static void rcu_preempt_note_context_switch(int cpu) 81 89 { 82 90 struct task_struct *t = current; 91 91 + unsigned long flags; 83 92 int phase; 84 93 struct rcu_data *rdp; 85 94 struct rcu_node *rnp; ··· 99 90 /* Possibly blocking in an RCU read-side critical section. */ 100 91 rdp = rcu_preempt_state.rda[cpu]; 101 92 rnp = rdp->mynode; 102 102 - spin_lock(&rnp->lock); 93 93 + spin_lock_irqsave(&rnp->lock, flags); 103 94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 104 95 t->rcu_blocked_node = rnp; 105 96 ··· 112 103 * state for the current grace period), then as long 113 104 * as that task remains queued, the current grace period 114 105 * cannot end. 106 106 + * 107 107 + * But first, note that the current CPU must still be 108 108 + * on line! 115 109 */ 116 116 - phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); 110 110 + WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 111 111 + WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 112 112 + phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 117 113 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 118 118 - smp_mb(); /* Ensure later ctxt swtch seen after above. */ 119 119 - spin_unlock(&rnp->lock); 114 114 + spin_unlock_irqrestore(&rnp->lock, flags); 120 115 } 121 116 122 117 /* ··· 132 119 * grace period, then the fact that the task has been enqueued 133 120 * means that we continue to block the current grace period. 134 121 */ 135 135 - rcu_preempt_qs_record(cpu); 136 136 - t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | 137 137 - RCU_READ_UNLOCK_GOT_QS); 122 122 + rcu_preempt_qs(cpu); 123 123 + local_irq_save(flags); 124 124 + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 125 125 + local_irq_restore(flags); 138 126 } 139 127 140 128 /* ··· 171 157 special = t->rcu_read_unlock_special; 172 158 if (special & RCU_READ_UNLOCK_NEED_QS) { 173 159 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 174 174 - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; 160 160 + rcu_preempt_qs(smp_processor_id()); 175 161 } 176 162 177 163 /* Hardware IRQ handlers cannot block. */ ··· 191 177 */ 192 178 for (;;) { 193 179 rnp = t->rcu_blocked_node; 194 194 - spin_lock(&rnp->lock); 180 180 + spin_lock(&rnp->lock); /* irqs already disabled. */ 195 181 if (rnp == t->rcu_blocked_node) 196 182 break; 197 197 - spin_unlock(&rnp->lock); 183 183 + spin_unlock(&rnp->lock); /* irqs remain disabled. */ 198 184 } 199 185 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 200 186 list_del_init(&t->rcu_node_entry); ··· 208 194 */ 209 195 if (!empty && rnp->qsmask == 0 && 210 196 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { 211 211 - t->rcu_read_unlock_special &= 212 212 - ~(RCU_READ_UNLOCK_NEED_QS | 213 213 - RCU_READ_UNLOCK_GOT_QS); 197 197 + struct rcu_node *rnp_p; 198 198 + 214 199 if (rnp->parent == NULL) { 215 200 /* Only one rcu_node in the tree. */ 216 201 cpu_quiet_msk_finish(&rcu_preempt_state, flags); ··· 218 205 /* Report up the rest of the hierarchy. */ 219 206 mask = rnp->grpmask; 220 207 spin_unlock_irqrestore(&rnp->lock, flags); 221 221 - rnp = rnp->parent; 222 222 - spin_lock_irqsave(&rnp->lock, flags); 223 223 - cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags); 208 208 + rnp_p = rnp->parent; 209 209 + spin_lock_irqsave(&rnp_p->lock, flags); 210 210 + WARN_ON_ONCE(rnp->qsmask); 211 211 + cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); 224 212 return; 225 213 } 226 214 spin_unlock(&rnp->lock); ··· 273 259 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 274 260 275 261 /* 262 262 + * Check that the list of blocked tasks for the newly completed grace 263 263 + * period is in fact empty. It is a serious bug to complete a grace 264 264 + * period that still has RCU readers blocked! This function must be 265 265 + * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 266 266 + * must be held by the caller. 267 267 + */ 268 268 + static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 269 269 + { 270 270 + WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])); 271 271 + WARN_ON_ONCE(rnp->qsmask); 272 272 + } 273 273 + 274 274 + /* 276 275 * Check for preempted RCU readers for the specified rcu_node structure. 277 276 * If the caller needs a reliable answer, it must hold the rcu_node's 278 277 * >lock. ··· 307 280 * The caller must hold rnp->lock with irqs disabled. 308 281 */ 309 282 static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 310 310 - struct rcu_node *rnp) 283 283 + struct rcu_node *rnp, 284 284 + struct rcu_data *rdp) 311 285 { 312 286 int i; 313 287 struct list_head *lp; ··· 320 292 WARN_ONCE(1, "Last CPU thought to be offlined?"); 321 293 return; /* Shouldn't happen: at least one CPU online. */ 322 294 } 295 295 + WARN_ON_ONCE(rnp != rdp->mynode && 296 296 + (!list_empty(&rnp->blocked_tasks[0]) || 297 297 + !list_empty(&rnp->blocked_tasks[1]))); 323 298 324 299 /* 325 300 * Move tasks up to root rcu_node. Rely on the fact that the ··· 366 335 struct task_struct *t = current; 367 336 368 337 if (t->rcu_read_lock_nesting == 0) { 369 369 - t->rcu_read_unlock_special &= 370 370 - ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); 371 371 - rcu_preempt_qs_record(cpu); 338 338 + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 339 339 + rcu_preempt_qs(cpu); 372 340 return; 373 341 } 374 374 - if (per_cpu(rcu_preempt_data, cpu).qs_pending) { 375 375 - if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { 376 376 - rcu_preempt_qs_record(cpu); 377 377 - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS; 378 378 - } else if (!(t->rcu_read_unlock_special & 379 379 - RCU_READ_UNLOCK_NEED_QS)) { 380 380 - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 381 381 - } 382 382 - } 342 342 + if (per_cpu(rcu_preempt_data, cpu).qs_pending) 343 343 + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 383 344 } 384 345 385 346 /* ··· 457 434 * Because preemptable RCU does not exist, we never have to check for 458 435 * CPUs being in quiescent states. 459 436 */ 460 460 - static void rcu_preempt_qs(int cpu) 437 437 + static void rcu_preempt_note_context_switch(int cpu) 461 438 { 462 439 } 463 440 ··· 472 449 } 473 450 474 451 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 452 452 + 453 453 + /* 454 454 + * Because there is no preemptable RCU, there can be no readers blocked, 455 455 + * so there is no need to check for blocked tasks. So check only for 456 456 + * bogus qsmask values. 457 457 + */ 458 458 + static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 459 459 + { 460 460 + WARN_ON_ONCE(rnp->qsmask); 461 461 + } 475 462 476 463 /* 477 464 * Because preemptable RCU does not exist, there are never any preempted ··· 499 466 * tasks that were blocked within RCU read-side critical sections. 500 467 */ 501 468 static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 502 502 - struct rcu_node *rnp) 469 469 + struct rcu_node *rnp, 470 470 + struct rcu_data *rdp) 503 471 { 504 472 } 505 473

+1 -1

kernel/rcutree_trace.c

reviewed

··· 20 20 * Papers: http://www.rdrop.com/users/paulmck/RCU 21 21 * 22 22 * For detailed explanation of Read-Copy Update mechanism see - 23 23 - * Documentation/RCU 23 23 + * Documentation/RCU 24 24 * 25 25 */ 26 26 #include <linux/types.h>