Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
rcu: Fix whitespace inconsistencies
rcu: Fix thinko, actually initialize full tree
rcu: Apply results of code inspection of kernel/rcutree_plugin.h
rcu: Add WARN_ON_ONCE() consistency checks covering state transitions
rcu: Fix synchronize_rcu() for TREE_PREEMPT_RCU
rcu: Simplify rcu_read_unlock_special() quiescent-state accounting
rcu: Add debug checks to TREE_PREEMPT_RCU for premature grace periods
rcu: Kconfig help needs to say that TREE_PREEMPT_RCU scales down
rcutorture: Occasionally delay readers enough to make RCU force_quiescent_state
rcu: Initialize multi-level RCU grace periods holding locks
rcu: Need to update rnp->gpnum if preemptable RCU is to be reliable

+195 -156
+1 -1
include/linux/rculist_nulls.h
··· 102 102 */ 103 103 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ 104 104 for (pos = rcu_dereference((head)->first); \ 105 - (!is_a_nulls(pos)) && \ 105 + (!is_a_nulls(pos)) && \ 106 106 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ 107 107 pos = rcu_dereference(pos->next)) 108 108
+8 -21
include/linux/rcupdate.h
··· 1 1 /* 2 - * Read-Copy Update mechanism for mutual exclusion 2 + * Read-Copy Update mechanism for mutual exclusion 3 3 * 4 4 * This program is free software; you can redistribute it and/or modify 5 5 * it under the terms of the GNU General Public License as published by ··· 18 18 * Copyright IBM Corporation, 2001 19 19 * 20 20 * Author: Dipankar Sarma <dipankar@in.ibm.com> 21 - * 21 + * 22 22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 23 23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 24 24 * Papers: ··· 26 26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 27 27 * 28 28 * For detailed explanation of Read-Copy Update mechanism see - 29 - * http://lse.sourceforge.net/locking/rcupdate.html 29 + * http://lse.sourceforge.net/locking/rcupdate.html 30 30 * 31 31 */ 32 32 ··· 52 52 }; 53 53 54 54 /* Exported common interfaces */ 55 + #ifdef CONFIG_TREE_PREEMPT_RCU 55 56 extern void synchronize_rcu(void); 57 + #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 58 + #define synchronize_rcu synchronize_sched 59 + #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 56 60 extern void synchronize_rcu_bh(void); 61 + extern void synchronize_sched(void); 57 62 extern void rcu_barrier(void); 58 63 extern void rcu_barrier_bh(void); 59 64 extern void rcu_barrier_sched(void); ··· 265 260 }; 266 261 267 262 extern void wakeme_after_rcu(struct rcu_head *head); 268 - 269 - /** 270 - * synchronize_sched - block until all CPUs have exited any non-preemptive 271 - * kernel code sequences. 272 - * 273 - * This means that all preempt_disable code sequences, including NMI and 274 - * hardware-interrupt handlers, in progress on entry will have completed 275 - * before this primitive returns. However, this does not guarantee that 276 - * softirq handlers will have completed, since in some kernels, these 277 - * handlers can run in process context, and can block. 278 - * 279 - * This primitive provides the guarantees made by the (now removed) 280 - * synchronize_kernel() API. In contrast, synchronize_rcu() only 281 - * guarantees that rcu_read_lock() sections will have completed. 282 - * In "classic RCU", these two guarantees happen to be one and 283 - * the same, but can differ in realtime RCU implementations. 284 - */ 285 - #define synchronize_sched() __synchronize_sched() 286 263 287 264 /** 288 265 * call_rcu - Queue an RCU callback for invocation after a grace period.
+3 -3
include/linux/rcutree.h
··· 24 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 25 * 26 26 * For detailed explanation of Read-Copy Update mechanism see - 27 - * Documentation/RCU 27 + * Documentation/RCU 28 28 */ 29 29 30 30 #ifndef __LINUX_RCUTREE_H ··· 53 53 preempt_enable(); 54 54 } 55 55 56 + #define __synchronize_sched() synchronize_rcu() 57 + 56 58 static inline void exit_rcu(void) 57 59 { 58 60 } ··· 69 67 { 70 68 local_bh_enable(); 71 69 } 72 - 73 - #define __synchronize_sched() synchronize_rcu() 74 70 75 71 extern void call_rcu_sched(struct rcu_head *head, 76 72 void (*func)(struct rcu_head *rcu));
-1
include/linux/sched.h
··· 1755 1755 1756 1756 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1757 1757 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1758 - #define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */ 1759 1758 1760 1759 static inline void rcu_copy_process(struct task_struct *p) 1761 1760 {
+2 -1
init/Kconfig
··· 331 331 This option selects the RCU implementation that is 332 332 designed for very large SMP systems with hundreds or 333 333 thousands of CPUs, but for which real-time response 334 - is also required. 334 + is also required. It also scales down nicely to 335 + smaller systems. 335 336 336 337 endchoice 337 338
+45 -3
kernel/rcupdate.c
··· 19 19 * 20 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 21 21 * Manfred Spraul <manfred@colorfullife.com> 22 - * 22 + * 23 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 25 * Papers: ··· 27 27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 28 28 * 29 29 * For detailed explanation of Read-Copy Update mechanism see - 30 - * http://lse.sourceforge.net/locking/rcupdate.html 30 + * http://lse.sourceforge.net/locking/rcupdate.html 31 31 * 32 32 */ 33 33 #include <linux/types.h> ··· 74 74 complete(&rcu->completion); 75 75 } 76 76 77 + #ifdef CONFIG_TREE_PREEMPT_RCU 78 + 77 79 /** 78 80 * synchronize_rcu - wait until a grace period has elapsed. 79 81 * ··· 89 87 { 90 88 struct rcu_synchronize rcu; 91 89 92 - if (rcu_blocking_is_gp()) 90 + if (!rcu_scheduler_active) 93 91 return; 94 92 95 93 init_completion(&rcu.completion); ··· 99 97 wait_for_completion(&rcu.completion); 100 98 } 101 99 EXPORT_SYMBOL_GPL(synchronize_rcu); 100 + 101 + #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 102 + 103 + /** 104 + * synchronize_sched - wait until an rcu-sched grace period has elapsed. 105 + * 106 + * Control will return to the caller some time after a full rcu-sched 107 + * grace period has elapsed, in other words after all currently executing 108 + * rcu-sched read-side critical sections have completed. These read-side 109 + * critical sections are delimited by rcu_read_lock_sched() and 110 + * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 111 + * local_irq_disable(), and so on may be used in place of 112 + * rcu_read_lock_sched(). 113 + * 114 + * This means that all preempt_disable code sequences, including NMI and 115 + * hardware-interrupt handlers, in progress on entry will have completed 116 + * before this primitive returns. However, this does not guarantee that 117 + * softirq handlers will have completed, since in some kernels, these 118 + * handlers can run in process context, and can block. 119 + * 120 + * This primitive provides the guarantees made by the (now removed) 121 + * synchronize_kernel() API. In contrast, synchronize_rcu() only 122 + * guarantees that rcu_read_lock() sections will have completed. 123 + * In "classic RCU", these two guarantees happen to be one and 124 + * the same, but can differ in realtime RCU implementations. 125 + */ 126 + void synchronize_sched(void) 127 + { 128 + struct rcu_synchronize rcu; 129 + 130 + if (rcu_blocking_is_gp()) 131 + return; 132 + 133 + init_completion(&rcu.completion); 134 + /* Will wake me after RCU finished. */ 135 + call_rcu_sched(&rcu.head, wakeme_after_rcu); 136 + /* Wait for it. */ 137 + wait_for_completion(&rcu.completion); 138 + } 139 + EXPORT_SYMBOL_GPL(synchronize_sched); 102 140 103 141 /** 104 142 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+24 -19
kernel/rcutorture.c
··· 18 18 * Copyright (C) IBM Corporation, 2005, 2006 19 19 * 20 20 * Authors: Paul E. McKenney <paulmck@us.ibm.com> 21 - * Josh Triplett <josh@freedesktop.org> 21 + * Josh Triplett <josh@freedesktop.org> 22 22 * 23 23 * See also: Documentation/RCU/torture.txt 24 24 */ ··· 50 50 51 51 MODULE_LICENSE("GPL"); 52 52 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 53 - "Josh Triplett <josh@freedesktop.org>"); 53 + "Josh Triplett <josh@freedesktop.org>"); 54 54 55 55 static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ 56 56 static int nfakewriters = 4; /* # fake writer threads */ ··· 110 110 }; 111 111 112 112 static LIST_HEAD(rcu_torture_freelist); 113 - static struct rcu_torture *rcu_torture_current = NULL; 114 - static long rcu_torture_current_version = 0; 113 + static struct rcu_torture *rcu_torture_current; 114 + static long rcu_torture_current_version; 115 115 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; 116 116 static DEFINE_SPINLOCK(rcu_torture_lock); 117 117 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = ··· 124 124 static atomic_t n_rcu_torture_free; 125 125 static atomic_t n_rcu_torture_mberror; 126 126 static atomic_t n_rcu_torture_error; 127 - static long n_rcu_torture_timers = 0; 127 + static long n_rcu_torture_timers; 128 128 static struct list_head rcu_torture_removed; 129 129 static cpumask_var_t shuffle_tmp_mask; 130 130 131 - static int stutter_pause_test = 0; 131 + static int stutter_pause_test; 132 132 133 133 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) 134 134 #define RCUTORTURE_RUNNABLE_INIT 1 ··· 267 267 int irq_capable; 268 268 char *name; 269 269 }; 270 - static struct rcu_torture_ops *cur_ops = NULL; 270 + 271 + static struct rcu_torture_ops *cur_ops; 271 272 272 273 /* 273 274 * Definitions for rcu torture testing. ··· 282 281 283 282 static void rcu_read_delay(struct rcu_random_state *rrsp) 284 283 { 285 - long delay; 286 - const long longdelay = 200; 284 + const unsigned long shortdelay_us = 200; 285 + const unsigned long longdelay_ms = 50; 287 286 288 - /* We want there to be long-running readers, but not all the time. */ 287 + /* We want a short delay sometimes to make a reader delay the grace 288 + * period, and we want a long delay occasionally to trigger 289 + * force_quiescent_state. */ 289 290 290 - delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay); 291 - if (!delay) 292 - udelay(longdelay); 291 + if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) 292 + mdelay(longdelay_ms); 293 + if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) 294 + udelay(shortdelay_us); 293 295 } 294 296 295 297 static void rcu_torture_read_unlock(int idx) __releases(RCU) ··· 343 339 .sync = synchronize_rcu, 344 340 .cb_barrier = rcu_barrier, 345 341 .stats = NULL, 346 - .irq_capable = 1, 347 - .name = "rcu" 342 + .irq_capable = 1, 343 + .name = "rcu" 348 344 }; 349 345 350 346 static void rcu_sync_torture_deferred_free(struct rcu_torture *p) ··· 642 638 643 639 do { 644 640 schedule_timeout_uninterruptible(1); 645 - if ((rp = rcu_torture_alloc()) == NULL) 641 + rp = rcu_torture_alloc(); 642 + if (rp == NULL) 646 643 continue; 647 644 rp->rtort_pipe_count = 0; 648 645 udelay(rcu_random(&rand) & 0x3ff); ··· 1115 1110 printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", 1116 1111 torture_type); 1117 1112 mutex_unlock(&fullstop_mutex); 1118 - return (-EINVAL); 1113 + return -EINVAL; 1119 1114 } 1120 1115 if (cur_ops->init) 1121 1116 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ ··· 1166 1161 goto unwind; 1167 1162 } 1168 1163 fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), 1169 - GFP_KERNEL); 1164 + GFP_KERNEL); 1170 1165 if (fakewriter_tasks == NULL) { 1171 1166 VERBOSE_PRINTK_ERRSTRING("out of memory"); 1172 1167 firsterr = -ENOMEM; ··· 1175 1170 for (i = 0; i < nfakewriters; i++) { 1176 1171 VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); 1177 1172 fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, 1178 - "rcu_torture_fakewriter"); 1173 + "rcu_torture_fakewriter"); 1179 1174 if (IS_ERR(fakewriter_tasks[i])) { 1180 1175 firsterr = PTR_ERR(fakewriter_tasks[i]); 1181 1176 VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
+38 -67
kernel/rcutree.c
··· 25 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 26 26 * 27 27 * For detailed explanation of Read-Copy Update mechanism see - 28 - * Documentation/RCU 28 + * Documentation/RCU 29 29 */ 30 30 #include <linux/types.h> 31 31 #include <linux/kernel.h> ··· 107 107 */ 108 108 void rcu_sched_qs(int cpu) 109 109 { 110 - unsigned long flags; 111 110 struct rcu_data *rdp; 112 111 113 - local_irq_save(flags); 114 112 rdp = &per_cpu(rcu_sched_data, cpu); 115 - rdp->passed_quiesc = 1; 116 113 rdp->passed_quiesc_completed = rdp->completed; 117 - rcu_preempt_qs(cpu); 118 - local_irq_restore(flags); 114 + barrier(); 115 + rdp->passed_quiesc = 1; 116 + rcu_preempt_note_context_switch(cpu); 119 117 } 120 118 121 119 void rcu_bh_qs(int cpu) 122 120 { 123 - unsigned long flags; 124 121 struct rcu_data *rdp; 125 122 126 - local_irq_save(flags); 127 123 rdp = &per_cpu(rcu_bh_data, cpu); 128 - rdp->passed_quiesc = 1; 129 124 rdp->passed_quiesc_completed = rdp->completed; 130 - local_irq_restore(flags); 125 + barrier(); 126 + rdp->passed_quiesc = 1; 131 127 } 132 128 133 129 #ifdef CONFIG_NO_HZ ··· 601 605 { 602 606 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 603 607 struct rcu_node *rnp = rcu_get_root(rsp); 604 - struct rcu_node *rnp_cur; 605 - struct rcu_node *rnp_end; 606 608 607 609 if (!cpu_needs_another_gp(rsp, rdp)) { 608 610 spin_unlock_irqrestore(&rnp->lock, flags); ··· 609 615 610 616 /* Advance to a new grace period and initialize state. */ 611 617 rsp->gpnum++; 618 + WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 612 619 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 613 620 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 614 621 record_gp_stall_check_time(rsp); ··· 626 631 627 632 /* Special-case the common single-level case. */ 628 633 if (NUM_RCU_NODES == 1) { 634 + rcu_preempt_check_blocked_tasks(rnp); 629 635 rnp->qsmask = rnp->qsmaskinit; 636 + rnp->gpnum = rsp->gpnum; 630 637 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 631 638 spin_unlock_irqrestore(&rnp->lock, flags); 632 639 return; ··· 641 644 spin_lock(&rsp->onofflock); /* irqs already disabled. */ 642 645 643 646 /* 644 - * Set the quiescent-state-needed bits in all the non-leaf RCU 645 - * nodes for all currently online CPUs. This operation relies 646 - * on the layout of the hierarchy within the rsp->node[] array. 647 - * Note that other CPUs will access only the leaves of the 648 - * hierarchy, which still indicate that no grace period is in 649 - * progress. In addition, we have excluded CPU-hotplug operations. 650 - * 651 - * We therefore do not need to hold any locks. Any required 652 - * memory barriers will be supplied by the locks guarding the 653 - * leaf rcu_nodes in the hierarchy. 654 - */ 655 - 656 - rnp_end = rsp->level[NUM_RCU_LVLS - 1]; 657 - for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++) 658 - rnp_cur->qsmask = rnp_cur->qsmaskinit; 659 - 660 - /* 661 - * Now set up the leaf nodes. Here we must be careful. First, 662 - * we need to hold the lock in order to exclude other CPUs, which 663 - * might be contending for the leaf nodes' locks. Second, as 664 - * soon as we initialize a given leaf node, its CPUs might run 665 - * up the rest of the hierarchy. We must therefore acquire locks 666 - * for each node that we touch during this stage. (But we still 667 - * are excluding CPU-hotplug operations.) 647 + * Set the quiescent-state-needed bits in all the rcu_node 648 + * structures for all currently online CPUs in breadth-first 649 + * order, starting from the root rcu_node structure. This 650 + * operation relies on the layout of the hierarchy within the 651 + * rsp->node[] array. Note that other CPUs will access only 652 + * the leaves of the hierarchy, which still indicate that no 653 + * grace period is in progress, at least until the corresponding 654 + * leaf node has been initialized. In addition, we have excluded 655 + * CPU-hotplug operations. 668 656 * 669 657 * Note that the grace period cannot complete until we finish 670 658 * the initialization process, as there will be at least one 671 659 * qsmask bit set in the root node until that time, namely the 672 - * one corresponding to this CPU. 660 + * one corresponding to this CPU, due to the fact that we have 661 + * irqs disabled. 673 662 */ 674 - rnp_end = &rsp->node[NUM_RCU_NODES]; 675 - rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; 676 - for (; rnp_cur < rnp_end; rnp_cur++) { 677 - spin_lock(&rnp_cur->lock); /* irqs already disabled. */ 678 - rnp_cur->qsmask = rnp_cur->qsmaskinit; 679 - spin_unlock(&rnp_cur->lock); /* irqs already disabled. */ 663 + for (rnp = &rsp->node[0]; rnp < &rsp->node[NUM_RCU_NODES]; rnp++) { 664 + spin_lock(&rnp->lock); /* irqs already disabled. */ 665 + rcu_preempt_check_blocked_tasks(rnp); 666 + rnp->qsmask = rnp->qsmaskinit; 667 + rnp->gpnum = rsp->gpnum; 668 + spin_unlock(&rnp->lock); /* irqs already disabled. */ 680 669 } 681 670 682 671 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ ··· 705 722 static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) 706 723 __releases(rnp->lock) 707 724 { 725 + WARN_ON_ONCE(rsp->completed == rsp->gpnum); 708 726 rsp->completed = rsp->gpnum; 709 727 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); 710 728 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ ··· 723 739 unsigned long flags) 724 740 __releases(rnp->lock) 725 741 { 742 + struct rcu_node *rnp_c; 743 + 726 744 /* Walk up the rcu_node hierarchy. */ 727 745 for (;;) { 728 746 if (!(rnp->qsmask & mask)) { ··· 748 762 break; 749 763 } 750 764 spin_unlock_irqrestore(&rnp->lock, flags); 765 + rnp_c = rnp; 751 766 rnp = rnp->parent; 752 767 spin_lock_irqsave(&rnp->lock, flags); 768 + WARN_ON_ONCE(rnp_c->qsmask); 753 769 } 754 770 755 771 /* ··· 764 776 765 777 /* 766 778 * Record a quiescent state for the specified CPU, which must either be 767 - * the current CPU or an offline CPU. The lastcomp argument is used to 768 - * make sure we are still in the grace period of interest. We don't want 769 - * to end the current grace period based on quiescent states detected in 770 - * an earlier grace period! 779 + * the current CPU. The lastcomp argument is used to make sure we are 780 + * still in the grace period of interest. We don't want to end the current 781 + * grace period based on quiescent states detected in an earlier grace 782 + * period! 771 783 */ 772 784 static void 773 785 cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) ··· 802 814 * This GP can't end until cpu checks in, so all of our 803 815 * callbacks can be processed during the next GP. 804 816 */ 805 - rdp = rsp->rda[smp_processor_id()]; 806 817 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 807 818 808 819 cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ ··· 859 872 spin_lock_irqsave(&rsp->onofflock, flags); 860 873 861 874 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 862 - rnp = rdp->mynode; 875 + rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ 863 876 mask = rdp->grpmask; /* rnp->grplo is constant. */ 864 877 do { 865 878 spin_lock(&rnp->lock); /* irqs already disabled. */ ··· 868 881 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 869 882 break; 870 883 } 871 - rcu_preempt_offline_tasks(rsp, rnp); 884 + rcu_preempt_offline_tasks(rsp, rnp, rdp); 872 885 mask = rnp->grpmask; 873 886 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 874 887 rnp = rnp->parent; ··· 876 889 lastcomp = rsp->completed; 877 890 878 891 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 879 - 880 - /* Being offline is a quiescent state, so go record it. */ 881 - cpu_quiet(cpu, rsp, rdp, lastcomp); 882 892 883 893 /* 884 894 * Move callbacks from the outgoing CPU to the running CPU. ··· 1441 1457 rnp = rnp->parent; 1442 1458 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 1443 1459 1444 - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1445 - 1446 - /* 1447 - * A new grace period might start here. If so, we will be part of 1448 - * it, and its gpnum will be greater than ours, so we will 1449 - * participate. It is also possible for the gpnum to have been 1450 - * incremented before this function was called, and the bitmasks 1451 - * to not be filled out until now, in which case we will also 1452 - * participate due to our gpnum being behind. 1453 - */ 1454 - 1455 - /* Since it is coming online, the CPU is in a quiescent state. */ 1456 - cpu_quiet(cpu, rsp, rdp, lastcomp); 1457 - local_irq_restore(flags); 1460 + spin_unlock_irqrestore(&rsp->onofflock, flags); 1458 1461 } 1459 1462 1460 1463 static void __cpuinit rcu_online_cpu(int cpu)
+1 -1
kernel/rcutree.h
··· 142 142 */ 143 143 struct rcu_head *nxtlist; 144 144 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 145 - long qlen; /* # of queued callbacks */ 145 + long qlen; /* # of queued callbacks */ 146 146 long blimit; /* Upper limit on a processed batch */ 147 147 148 148 #ifdef CONFIG_NO_HZ
+72 -38
kernel/rcutree_plugin.h
··· 64 64 * not in a quiescent state. There might be any number of tasks blocked 65 65 * while in an RCU read-side critical section. 66 66 */ 67 - static void rcu_preempt_qs_record(int cpu) 67 + static void rcu_preempt_qs(int cpu) 68 68 { 69 69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 70 - rdp->passed_quiesc = 1; 71 70 rdp->passed_quiesc_completed = rdp->completed; 71 + barrier(); 72 + rdp->passed_quiesc = 1; 72 73 } 73 74 74 75 /* 75 - * We have entered the scheduler or are between softirqs in ksoftirqd. 76 - * If we are in an RCU read-side critical section, we need to reflect 77 - * that in the state of the rcu_node structure corresponding to this CPU. 78 - * Caller must disable hardirqs. 76 + * We have entered the scheduler, and the current task might soon be 77 + * context-switched away from. If this task is in an RCU read-side 78 + * critical section, we will no longer be able to rely on the CPU to 79 + * record that fact, so we enqueue the task on the appropriate entry 80 + * of the blocked_tasks[] array. The task will dequeue itself when 81 + * it exits the outermost enclosing RCU read-side critical section. 82 + * Therefore, the current grace period cannot be permitted to complete 83 + * until the blocked_tasks[] entry indexed by the low-order bit of 84 + * rnp->gpnum empties. 85 + * 86 + * Caller must disable preemption. 79 87 */ 80 - static void rcu_preempt_qs(int cpu) 88 + static void rcu_preempt_note_context_switch(int cpu) 81 89 { 82 90 struct task_struct *t = current; 91 + unsigned long flags; 83 92 int phase; 84 93 struct rcu_data *rdp; 85 94 struct rcu_node *rnp; ··· 99 90 /* Possibly blocking in an RCU read-side critical section. */ 100 91 rdp = rcu_preempt_state.rda[cpu]; 101 92 rnp = rdp->mynode; 102 - spin_lock(&rnp->lock); 93 + spin_lock_irqsave(&rnp->lock, flags); 103 94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 104 95 t->rcu_blocked_node = rnp; 105 96 ··· 112 103 * state for the current grace period), then as long 113 104 * as that task remains queued, the current grace period 114 105 * cannot end. 106 + * 107 + * But first, note that the current CPU must still be 108 + * on line! 115 109 */ 116 - phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); 110 + WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 111 + WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 112 + phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 117 113 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 118 - smp_mb(); /* Ensure later ctxt swtch seen after above. */ 119 - spin_unlock(&rnp->lock); 114 + spin_unlock_irqrestore(&rnp->lock, flags); 120 115 } 121 116 122 117 /* ··· 132 119 * grace period, then the fact that the task has been enqueued 133 120 * means that we continue to block the current grace period. 134 121 */ 135 - rcu_preempt_qs_record(cpu); 136 - t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | 137 - RCU_READ_UNLOCK_GOT_QS); 122 + rcu_preempt_qs(cpu); 123 + local_irq_save(flags); 124 + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 125 + local_irq_restore(flags); 138 126 } 139 127 140 128 /* ··· 171 157 special = t->rcu_read_unlock_special; 172 158 if (special & RCU_READ_UNLOCK_NEED_QS) { 173 159 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 174 - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; 160 + rcu_preempt_qs(smp_processor_id()); 175 161 } 176 162 177 163 /* Hardware IRQ handlers cannot block. */ ··· 191 177 */ 192 178 for (;;) { 193 179 rnp = t->rcu_blocked_node; 194 - spin_lock(&rnp->lock); 180 + spin_lock(&rnp->lock); /* irqs already disabled. */ 195 181 if (rnp == t->rcu_blocked_node) 196 182 break; 197 - spin_unlock(&rnp->lock); 183 + spin_unlock(&rnp->lock); /* irqs remain disabled. */ 198 184 } 199 185 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 200 186 list_del_init(&t->rcu_node_entry); ··· 208 194 */ 209 195 if (!empty && rnp->qsmask == 0 && 210 196 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { 211 - t->rcu_read_unlock_special &= 212 - ~(RCU_READ_UNLOCK_NEED_QS | 213 - RCU_READ_UNLOCK_GOT_QS); 197 + struct rcu_node *rnp_p; 198 + 214 199 if (rnp->parent == NULL) { 215 200 /* Only one rcu_node in the tree. */ 216 201 cpu_quiet_msk_finish(&rcu_preempt_state, flags); ··· 218 205 /* Report up the rest of the hierarchy. */ 219 206 mask = rnp->grpmask; 220 207 spin_unlock_irqrestore(&rnp->lock, flags); 221 - rnp = rnp->parent; 222 - spin_lock_irqsave(&rnp->lock, flags); 223 - cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags); 208 + rnp_p = rnp->parent; 209 + spin_lock_irqsave(&rnp_p->lock, flags); 210 + WARN_ON_ONCE(rnp->qsmask); 211 + cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); 224 212 return; 225 213 } 226 214 spin_unlock(&rnp->lock); ··· 273 259 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 274 260 275 261 /* 262 + * Check that the list of blocked tasks for the newly completed grace 263 + * period is in fact empty. It is a serious bug to complete a grace 264 + * period that still has RCU readers blocked! This function must be 265 + * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 266 + * must be held by the caller. 267 + */ 268 + static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 269 + { 270 + WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])); 271 + WARN_ON_ONCE(rnp->qsmask); 272 + } 273 + 274 + /* 276 275 * Check for preempted RCU readers for the specified rcu_node structure. 277 276 * If the caller needs a reliable answer, it must hold the rcu_node's 278 277 * >lock. ··· 307 280 * The caller must hold rnp->lock with irqs disabled. 308 281 */ 309 282 static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 310 - struct rcu_node *rnp) 283 + struct rcu_node *rnp, 284 + struct rcu_data *rdp) 311 285 { 312 286 int i; 313 287 struct list_head *lp; ··· 320 292 WARN_ONCE(1, "Last CPU thought to be offlined?"); 321 293 return; /* Shouldn't happen: at least one CPU online. */ 322 294 } 295 + WARN_ON_ONCE(rnp != rdp->mynode && 296 + (!list_empty(&rnp->blocked_tasks[0]) || 297 + !list_empty(&rnp->blocked_tasks[1]))); 323 298 324 299 /* 325 300 * Move tasks up to root rcu_node. Rely on the fact that the ··· 366 335 struct task_struct *t = current; 367 336 368 337 if (t->rcu_read_lock_nesting == 0) { 369 - t->rcu_read_unlock_special &= 370 - ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); 371 - rcu_preempt_qs_record(cpu); 338 + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 339 + rcu_preempt_qs(cpu); 372 340 return; 373 341 } 374 - if (per_cpu(rcu_preempt_data, cpu).qs_pending) { 375 - if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { 376 - rcu_preempt_qs_record(cpu); 377 - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS; 378 - } else if (!(t->rcu_read_unlock_special & 379 - RCU_READ_UNLOCK_NEED_QS)) { 380 - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 381 - } 382 - } 342 + if (per_cpu(rcu_preempt_data, cpu).qs_pending) 343 + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 383 344 } 384 345 385 346 /* ··· 457 434 * Because preemptable RCU does not exist, we never have to check for 458 435 * CPUs being in quiescent states. 459 436 */ 460 - static void rcu_preempt_qs(int cpu) 437 + static void rcu_preempt_note_context_switch(int cpu) 461 438 { 462 439 } 463 440 ··· 472 449 } 473 450 474 451 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 452 + 453 + /* 454 + * Because there is no preemptable RCU, there can be no readers blocked, 455 + * so there is no need to check for blocked tasks. So check only for 456 + * bogus qsmask values. 457 + */ 458 + static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 459 + { 460 + WARN_ON_ONCE(rnp->qsmask); 461 + } 475 462 476 463 /* 477 464 * Because preemptable RCU does not exist, there are never any preempted ··· 499 466 * tasks that were blocked within RCU read-side critical sections. 500 467 */ 501 468 static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 502 - struct rcu_node *rnp) 469 + struct rcu_node *rnp, 470 + struct rcu_data *rdp) 503 471 { 504 472 } 505 473
+1 -1
kernel/rcutree_trace.c
··· 20 20 * Papers: http://www.rdrop.com/users/paulmck/RCU 21 21 * 22 22 * For detailed explanation of Read-Copy Update mechanism see - 23 - * Documentation/RCU 23 + * Documentation/RCU 24 24 * 25 25 */ 26 26 #include <linux/types.h>