Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

+2 -2

include/linux/futex.h

··· 33 33 #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) 34 34 #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) 35 35 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) 36 - #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) 37 - #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) 36 + #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG) 37 + #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG) 38 38 #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ 39 39 FUTEX_PRIVATE_FLAG) 40 40 #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \

+11 -7

include/linux/rcupdate.h

··· 77 77 #error "Unknown RCU implementation specified to kernel configuration" 78 78 #endif 79 79 80 - #define RCU_HEAD_INIT { .next = NULL, .func = NULL } 80 + #define RCU_HEAD_INIT { .next = NULL, .func = NULL } 81 81 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT 82 82 #define INIT_RCU_HEAD(ptr) do { \ 83 83 (ptr)->next = NULL; (ptr)->func = NULL; \ ··· 129 129 rcu_read_acquire(); 130 130 } 131 131 132 - /** 133 - * rcu_read_unlock - marks the end of an RCU read-side critical section. 134 - * 135 - * See rcu_read_lock() for more information. 136 - */ 137 - 138 132 /* 139 133 * So where is rcu_write_lock()? It does not exist, as there is no 140 134 * way for writers to lock out RCU readers. This is a feature, not ··· 137 143 * spinlock primitives work well for this, but any other technique may be 138 144 * used as well. RCU does not care how the writers keep out of each 139 145 * others' way, as long as they do so. 146 + */ 147 + 148 + /** 149 + * rcu_read_unlock - marks the end of an RCU read-side critical section. 150 + * 151 + * See rcu_read_lock() for more information. 140 152 */ 141 153 static inline void rcu_read_unlock(void) 142 154 { ··· 196 196 __acquire(RCU_SCHED); 197 197 rcu_read_acquire(); 198 198 } 199 + 200 + /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 199 201 static inline notrace void rcu_read_lock_sched_notrace(void) 200 202 { 201 203 preempt_disable_notrace(); ··· 215 213 __release(RCU_SCHED); 216 214 preempt_enable(); 217 215 } 216 + 217 + /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 218 218 static inline notrace void rcu_read_unlock_sched_notrace(void) 219 219 { 220 220 __release(RCU_SCHED);

+6 -7

include/linux/rcutree.h

··· 30 30 #ifndef __LINUX_RCUTREE_H 31 31 #define __LINUX_RCUTREE_H 32 32 33 + struct notifier_block; 34 + 33 35 extern void rcu_sched_qs(int cpu); 34 36 extern void rcu_bh_qs(int cpu); 35 - 37 + extern int rcu_cpu_notify(struct notifier_block *self, 38 + unsigned long action, void *hcpu); 36 39 extern int rcu_needs_cpu(int cpu); 40 + extern int rcu_expedited_torture_stats(char *page); 37 41 38 42 #ifdef CONFIG_TREE_PREEMPT_RCU 39 43 ··· 89 85 90 86 extern void __rcu_init(void); 91 87 extern void rcu_check_callbacks(int cpu, int user); 92 - extern void rcu_restart_cpu(int cpu); 93 88 94 89 extern long rcu_batches_completed(void); 95 90 extern long rcu_batches_completed_bh(void); 96 91 extern long rcu_batches_completed_sched(void); 97 - 98 - static inline void rcu_init_sched(void) 99 - { 100 - } 101 92 102 93 #ifdef CONFIG_NO_HZ 103 94 void rcu_enter_nohz(void); ··· 106 107 } 107 108 #endif /* CONFIG_NO_HZ */ 108 109 109 - /* A context switch is a grace period for rcutree. */ 110 + /* A context switch is a grace period for RCU-sched and RCU-bh. */ 110 111 static inline int rcu_blocking_is_gp(void) 111 112 { 112 113 return num_online_cpus() == 1;

-1

init/main.c

··· 778 778 */ 779 779 static void __init do_basic_setup(void) 780 780 { 781 - rcu_init_sched(); /* needed by module_init stage. */ 782 781 init_workqueues(); 783 782 cpuset_init_smp(); 784 783 usermodehelper_init();

-2

kernel/exit.c

··· 991 991 tsk->mempolicy = NULL; 992 992 #endif 993 993 #ifdef CONFIG_FUTEX 994 - if (unlikely(!list_empty(&tsk->pi_state_list))) 995 - exit_pi_state_list(tsk); 996 994 if (unlikely(current->pi_state_cache)) 997 995 kfree(current->pi_state_cache); 998 996 #endif

+8 -2

kernel/fork.c

··· 570 570 571 571 /* Get rid of any futexes when releasing the mm */ 572 572 #ifdef CONFIG_FUTEX 573 - if (unlikely(tsk->robust_list)) 573 + if (unlikely(tsk->robust_list)) { 574 574 exit_robust_list(tsk); 575 + tsk->robust_list = NULL; 576 + } 575 577 #ifdef CONFIG_COMPAT 576 - if (unlikely(tsk->compat_robust_list)) 578 + if (unlikely(tsk->compat_robust_list)) { 577 579 compat_exit_robust_list(tsk); 580 + tsk->compat_robust_list = NULL; 581 + } 578 582 #endif 583 + if (unlikely(!list_empty(&tsk->pi_state_list))) 584 + exit_pi_state_list(tsk); 579 585 #endif 580 586 581 587 /* Get rid of any cached register state */

+1 -2

kernel/futex.c

··· 916 916 hb1 = hash_futex(&key1); 917 917 hb2 = hash_futex(&key2); 918 918 919 - double_lock_hb(hb1, hb2); 920 919 retry_private: 920 + double_lock_hb(hb1, hb2); 921 921 op_ret = futex_atomic_op_inuser(op, uaddr2); 922 922 if (unlikely(op_ret < 0)) { 923 923 ··· 2117 2117 * Unqueue the futex_q and determine which it was. 2118 2118 */ 2119 2119 plist_del(&q->list, &q->list.plist); 2120 - drop_futex_key_refs(&q->key); 2121 2120 2122 2121 if (timeout && !timeout->task) 2123 2122 ret = -ETIMEDOUT;

+2 -1

kernel/panic.c

··· 90 90 91 91 atomic_notifier_call_chain(&panic_notifier_list, 0, buf); 92 92 93 + bust_spinlocks(0); 94 + 93 95 if (!panic_blink) 94 96 panic_blink = no_blink; 95 97 ··· 138 136 mdelay(1); 139 137 i++; 140 138 } 141 - bust_spinlocks(0); 142 139 } 143 140 144 141 EXPORT_SYMBOL(panic);

+7 -133

kernel/rcupdate.c

··· 46 46 #include <linux/module.h> 47 47 #include <linux/kernel_stat.h> 48 48 49 - enum rcu_barrier { 50 - RCU_BARRIER_STD, 51 - RCU_BARRIER_BH, 52 - RCU_BARRIER_SCHED, 53 - }; 49 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 50 + static struct lock_class_key rcu_lock_key; 51 + struct lockdep_map rcu_lock_map = 52 + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); 53 + EXPORT_SYMBOL_GPL(rcu_lock_map); 54 + #endif 54 55 55 - static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 56 - static atomic_t rcu_barrier_cpu_count; 57 - static DEFINE_MUTEX(rcu_barrier_mutex); 58 - static struct completion rcu_barrier_completion; 59 56 int rcu_scheduler_active __read_mostly; 60 - 61 - static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0); 62 - static struct rcu_head rcu_migrate_head[3]; 63 - static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq); 64 57 65 58 /* 66 59 * Awaken the corresponding synchronize_rcu() instance now that a ··· 157 164 } 158 165 EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 159 166 160 - static void rcu_barrier_callback(struct rcu_head *notused) 161 - { 162 - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 163 - complete(&rcu_barrier_completion); 164 - } 165 - 166 - /* 167 - * Called with preemption disabled, and from cross-cpu IRQ context. 168 - */ 169 - static void rcu_barrier_func(void *type) 170 - { 171 - int cpu = smp_processor_id(); 172 - struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 173 - 174 - atomic_inc(&rcu_barrier_cpu_count); 175 - switch ((enum rcu_barrier)type) { 176 - case RCU_BARRIER_STD: 177 - call_rcu(head, rcu_barrier_callback); 178 - break; 179 - case RCU_BARRIER_BH: 180 - call_rcu_bh(head, rcu_barrier_callback); 181 - break; 182 - case RCU_BARRIER_SCHED: 183 - call_rcu_sched(head, rcu_barrier_callback); 184 - break; 185 - } 186 - } 187 - 188 - static inline void wait_migrated_callbacks(void) 189 - { 190 - wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); 191 - smp_mb(); /* In case we didn't sleep. */ 192 - } 193 - 194 - /* 195 - * Orchestrate the specified type of RCU barrier, waiting for all 196 - * RCU callbacks of the specified type to complete. 197 - */ 198 - static void _rcu_barrier(enum rcu_barrier type) 199 - { 200 - BUG_ON(in_interrupt()); 201 - /* Take cpucontrol mutex to protect against CPU hotplug */ 202 - mutex_lock(&rcu_barrier_mutex); 203 - init_completion(&rcu_barrier_completion); 204 - /* 205 - * Initialize rcu_barrier_cpu_count to 1, then invoke 206 - * rcu_barrier_func() on each CPU, so that each CPU also has 207 - * incremented rcu_barrier_cpu_count. Only then is it safe to 208 - * decrement rcu_barrier_cpu_count -- otherwise the first CPU 209 - * might complete its grace period before all of the other CPUs 210 - * did their increment, causing this function to return too 211 - * early. 212 - */ 213 - atomic_set(&rcu_barrier_cpu_count, 1); 214 - on_each_cpu(rcu_barrier_func, (void *)type, 1); 215 - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 216 - complete(&rcu_barrier_completion); 217 - wait_for_completion(&rcu_barrier_completion); 218 - mutex_unlock(&rcu_barrier_mutex); 219 - wait_migrated_callbacks(); 220 - } 221 - 222 - /** 223 - * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 224 - */ 225 - void rcu_barrier(void) 226 - { 227 - _rcu_barrier(RCU_BARRIER_STD); 228 - } 229 - EXPORT_SYMBOL_GPL(rcu_barrier); 230 - 231 - /** 232 - * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 233 - */ 234 - void rcu_barrier_bh(void) 235 - { 236 - _rcu_barrier(RCU_BARRIER_BH); 237 - } 238 - EXPORT_SYMBOL_GPL(rcu_barrier_bh); 239 - 240 - /** 241 - * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 242 - */ 243 - void rcu_barrier_sched(void) 244 - { 245 - _rcu_barrier(RCU_BARRIER_SCHED); 246 - } 247 - EXPORT_SYMBOL_GPL(rcu_barrier_sched); 248 - 249 - static void rcu_migrate_callback(struct rcu_head *notused) 250 - { 251 - if (atomic_dec_and_test(&rcu_migrate_type_count)) 252 - wake_up(&rcu_migrate_wq); 253 - } 254 - 255 - extern int rcu_cpu_notify(struct notifier_block *self, 256 - unsigned long action, void *hcpu); 257 - 258 167 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, 259 168 unsigned long action, void *hcpu) 260 169 { 261 - rcu_cpu_notify(self, action, hcpu); 262 - if (action == CPU_DYING) { 263 - /* 264 - * preempt_disable() in on_each_cpu() prevents stop_machine(), 265 - * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" 266 - * returns, all online cpus have queued rcu_barrier_func(), 267 - * and the dead cpu(if it exist) queues rcu_migrate_callback()s. 268 - * 269 - * These callbacks ensure _rcu_barrier() waits for all 270 - * RCU callbacks of the specified type to complete. 271 - */ 272 - atomic_set(&rcu_migrate_type_count, 3); 273 - call_rcu_bh(rcu_migrate_head, rcu_migrate_callback); 274 - call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback); 275 - call_rcu(rcu_migrate_head + 2, rcu_migrate_callback); 276 - } else if (action == CPU_DOWN_PREPARE) { 277 - /* Don't need to wait until next removal operation. */ 278 - /* rcu_migrate_head is protected by cpu_add_remove_lock */ 279 - wait_migrated_callbacks(); 280 - } 281 - 282 - return NOTIFY_OK; 170 + return rcu_cpu_notify(self, action, hcpu); 283 171 } 284 172 285 173 void __init rcu_init(void)

+1 -3

kernel/rcutorture.c

··· 606 606 .name = "sched_sync" 607 607 }; 608 608 609 - extern int rcu_expedited_torture_stats(char *page); 610 - 611 609 static struct rcu_torture_ops sched_expedited_ops = { 612 610 .init = rcu_sync_torture_init, 613 611 .cleanup = NULL, ··· 648 650 old_rp = rcu_torture_current; 649 651 rp->rtort_mbtest = 1; 650 652 rcu_assign_pointer(rcu_torture_current, rp); 651 - smp_wmb(); 653 + smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ 652 654 if (old_rp) { 653 655 i = old_rp->rtort_pipe_count; 654 656 if (i > RCU_TORTURE_PIPE_LEN)

+216 -114

kernel/rcutree.c

··· 49 49 50 50 #include "rcutree.h" 51 51 52 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 53 - static struct lock_class_key rcu_lock_key; 54 - struct lockdep_map rcu_lock_map = 55 - STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); 56 - EXPORT_SYMBOL_GPL(rcu_lock_map); 57 - #endif 58 - 59 52 /* Data structures. */ 60 53 61 54 #define RCU_STATE_INITIALIZER(name) { \ ··· 63 70 .gpnum = -300, \ 64 71 .completed = -300, \ 65 72 .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ 73 + .orphan_cbs_list = NULL, \ 74 + .orphan_cbs_tail = &name.orphan_cbs_list, \ 75 + .orphan_qlen = 0, \ 66 76 .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ 67 77 .n_force_qs = 0, \ 68 78 .n_force_qs_ngp = 0, \ ··· 77 81 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 78 82 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 79 83 80 - extern long rcu_batches_completed_sched(void); 81 - static struct rcu_node *rcu_get_root(struct rcu_state *rsp); 82 - static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, 83 - struct rcu_node *rnp, unsigned long flags); 84 - static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags); 85 - #ifdef CONFIG_HOTPLUG_CPU 86 - static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp); 87 - #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 88 - static void __rcu_process_callbacks(struct rcu_state *rsp, 89 - struct rcu_data *rdp); 90 - static void __call_rcu(struct rcu_head *head, 91 - void (*func)(struct rcu_head *rcu), 92 - struct rcu_state *rsp); 93 - static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp); 94 - static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, 95 - int preemptable); 96 84 97 - #include "rcutree_plugin.h" 85 + /* 86 + * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 87 + * permit this function to be invoked without holding the root rcu_node 88 + * structure's ->lock, but of course results can be subject to change. 89 + */ 90 + static int rcu_gp_in_progress(struct rcu_state *rsp) 91 + { 92 + return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); 93 + } 98 94 99 95 /* 100 96 * Note a quiescent state. Because we do not need to know ··· 125 137 static int qhimark = 10000; /* If this many pending, ignore blimit. */ 126 138 static int qlowmark = 100; /* Once only this many pending, use blimit. */ 127 139 140 + module_param(blimit, int, 0); 141 + module_param(qhimark, int, 0); 142 + module_param(qlowmark, int, 0); 143 + 128 144 static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 129 145 static int rcu_pending(int cpu); 130 146 ··· 165 173 static int 166 174 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 167 175 { 168 - /* ACCESS_ONCE() because we are accessing outside of lock. */ 169 - return *rdp->nxttail[RCU_DONE_TAIL] && 170 - ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum); 176 + return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); 171 177 } 172 178 173 179 /* ··· 359 369 /* 360 370 * Snapshot the specified CPU's dynticks counter so that we can later 361 371 * credit them with an implicit quiescent state. Return 1 if this CPU 362 - * is already in a quiescent state courtesy of dynticks idle mode. 372 + * is in dynticks idle mode, which is an extended quiescent state. 363 373 */ 364 374 static int dyntick_save_progress_counter(struct rcu_data *rdp) 365 375 { ··· 465 475 long delta; 466 476 unsigned long flags; 467 477 struct rcu_node *rnp = rcu_get_root(rsp); 468 - struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; 469 - struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; 470 478 471 479 /* Only let one CPU complain about others per time interval. */ 472 480 473 481 spin_lock_irqsave(&rnp->lock, flags); 474 482 delta = jiffies - rsp->jiffies_stall; 475 - if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) { 483 + if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 476 484 spin_unlock_irqrestore(&rnp->lock, flags); 477 485 return; 478 486 } 479 487 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 488 + 489 + /* 490 + * Now rat on any tasks that got kicked up to the root rcu_node 491 + * due to CPU offlining. 492 + */ 493 + rcu_print_task_stall(rnp); 480 494 spin_unlock_irqrestore(&rnp->lock, flags); 481 495 482 496 /* OK, time to rat on our buddy... */ 483 497 484 498 printk(KERN_ERR "INFO: RCU detected CPU stalls:"); 485 - for (; rnp_cur < rnp_end; rnp_cur++) { 499 + rcu_for_each_leaf_node(rsp, rnp) { 486 500 rcu_print_task_stall(rnp); 487 - if (rnp_cur->qsmask == 0) 501 + if (rnp->qsmask == 0) 488 502 continue; 489 - for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) 490 - if (rnp_cur->qsmask & (1UL << cpu)) 491 - printk(" %d", rnp_cur->grplo + cpu); 503 + for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 504 + if (rnp->qsmask & (1UL << cpu)) 505 + printk(" %d", rnp->grplo + cpu); 492 506 } 493 507 printk(" (detected by %d, t=%ld jiffies)\n", 494 508 smp_processor_id(), (long)(jiffies - rsp->gp_start)); ··· 531 537 /* We haven't checked in, so go dump stack. */ 532 538 print_cpu_stall(rsp); 533 539 534 - } else if (rsp->gpnum != rsp->completed && 535 - delta >= RCU_STALL_RAT_DELAY) { 540 + } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) { 536 541 537 542 /* They had two time units to dump stack, so complain. */ 538 543 print_other_cpu_stall(rsp); ··· 610 617 note_new_gpnum(rsp, rdp); 611 618 612 619 /* 613 - * Because we are first, we know that all our callbacks will 614 - * be covered by this upcoming grace period, even the ones 615 - * that were registered arbitrarily recently. 620 + * Because this CPU just now started the new grace period, we know 621 + * that all of its callbacks will be covered by this upcoming grace 622 + * period, even the ones that were registered arbitrarily recently. 623 + * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. 624 + * 625 + * Other CPUs cannot be sure exactly when the grace period started. 626 + * Therefore, their recently registered callbacks must pass through 627 + * an additional RCU_NEXT_READY stage, so that they will be handled 628 + * by the next RCU grace period. 616 629 */ 617 630 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 618 631 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; ··· 656 657 * one corresponding to this CPU, due to the fact that we have 657 658 * irqs disabled. 658 659 */ 659 - for (rnp = &rsp->node[0]; rnp < &rsp->node[NUM_RCU_NODES]; rnp++) { 660 + rcu_for_each_node_breadth_first(rsp, rnp) { 660 661 spin_lock(&rnp->lock); /* irqs already disabled. */ 661 662 rcu_preempt_check_blocked_tasks(rnp); 662 663 rnp->qsmask = rnp->qsmaskinit; ··· 702 703 * hold rnp->lock, as required by rcu_start_gp(), which will release it. 703 704 */ 704 705 static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) 705 - __releases(rnp->lock) 706 + __releases(rcu_get_root(rsp)->lock) 706 707 { 707 - WARN_ON_ONCE(rsp->completed == rsp->gpnum); 708 + WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 708 709 rsp->completed = rsp->gpnum; 709 710 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); 710 711 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ ··· 841 842 #ifdef CONFIG_HOTPLUG_CPU 842 843 843 844 /* 845 + * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the 846 + * specified flavor of RCU. The callbacks will be adopted by the next 847 + * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever 848 + * comes first. Because this is invoked from the CPU_DYING notifier, 849 + * irqs are already disabled. 850 + */ 851 + static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 852 + { 853 + int i; 854 + struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 855 + 856 + if (rdp->nxtlist == NULL) 857 + return; /* irqs disabled, so comparison is stable. */ 858 + spin_lock(&rsp->onofflock); /* irqs already disabled. */ 859 + *rsp->orphan_cbs_tail = rdp->nxtlist; 860 + rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; 861 + rdp->nxtlist = NULL; 862 + for (i = 0; i < RCU_NEXT_SIZE; i++) 863 + rdp->nxttail[i] = &rdp->nxtlist; 864 + rsp->orphan_qlen += rdp->qlen; 865 + rdp->qlen = 0; 866 + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 867 + } 868 + 869 + /* 870 + * Adopt previously orphaned RCU callbacks. 871 + */ 872 + static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 873 + { 874 + unsigned long flags; 875 + struct rcu_data *rdp; 876 + 877 + spin_lock_irqsave(&rsp->onofflock, flags); 878 + rdp = rsp->rda[smp_processor_id()]; 879 + if (rsp->orphan_cbs_list == NULL) { 880 + spin_unlock_irqrestore(&rsp->onofflock, flags); 881 + return; 882 + } 883 + *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; 884 + rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; 885 + rdp->qlen += rsp->orphan_qlen; 886 + rsp->orphan_cbs_list = NULL; 887 + rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; 888 + rsp->orphan_qlen = 0; 889 + spin_unlock_irqrestore(&rsp->onofflock, flags); 890 + } 891 + 892 + /* 844 893 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 845 894 * and move all callbacks from the outgoing CPU to the current one. 846 895 */ 847 896 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 848 897 { 849 - int i; 850 898 unsigned long flags; 851 899 long lastcomp; 852 900 unsigned long mask; 853 901 struct rcu_data *rdp = rsp->rda[cpu]; 854 - struct rcu_data *rdp_me; 855 902 struct rcu_node *rnp; 856 903 857 904 /* Exclude any attempts to start a new grace period. */ ··· 920 875 } while (rnp != NULL); 921 876 lastcomp = rsp->completed; 922 877 923 - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 878 + spin_unlock_irqrestore(&rsp->onofflock, flags); 924 879 925 - /* 926 - * Move callbacks from the outgoing CPU to the running CPU. 927 - * Note that the outgoing CPU is now quiscent, so it is now 928 - * (uncharacteristically) safe to access its rcu_data structure. 929 - * Note also that we must carefully retain the order of the 930 - * outgoing CPU's callbacks in order for rcu_barrier() to work 931 - * correctly. Finally, note that we start all the callbacks 932 - * afresh, even those that have passed through a grace period 933 - * and are therefore ready to invoke. The theory is that hotplug 934 - * events are rare, and that if they are frequent enough to 935 - * indefinitely delay callbacks, you have far worse things to 936 - * be worrying about. 937 - */ 938 - rdp_me = rsp->rda[smp_processor_id()]; 939 - if (rdp->nxtlist != NULL) { 940 - *rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; 941 - rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 942 - rdp->nxtlist = NULL; 943 - for (i = 0; i < RCU_NEXT_SIZE; i++) 944 - rdp->nxttail[i] = &rdp->nxtlist; 945 - rdp_me->qlen += rdp->qlen; 946 - rdp->qlen = 0; 947 - } 948 - local_irq_restore(flags); 880 + rcu_adopt_orphan_cbs(rsp); 949 881 } 950 882 951 883 /* ··· 939 917 } 940 918 941 919 #else /* #ifdef CONFIG_HOTPLUG_CPU */ 920 + 921 + static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 922 + { 923 + } 924 + 925 + static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 926 + { 927 + } 942 928 943 929 static void rcu_offline_cpu(int cpu) 944 930 { ··· 1080 1050 int cpu; 1081 1051 unsigned long flags; 1082 1052 unsigned long mask; 1083 - struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; 1084 - struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; 1053 + struct rcu_node *rnp; 1085 1054 1086 - for (; rnp_cur < rnp_end; rnp_cur++) { 1055 + rcu_for_each_leaf_node(rsp, rnp) { 1087 1056 mask = 0; 1088 - spin_lock_irqsave(&rnp_cur->lock, flags); 1057 + spin_lock_irqsave(&rnp->lock, flags); 1089 1058 if (rsp->completed != lastcomp) { 1090 - spin_unlock_irqrestore(&rnp_cur->lock, flags); 1059 + spin_unlock_irqrestore(&rnp->lock, flags); 1091 1060 return 1; 1092 1061 } 1093 - if (rnp_cur->qsmask == 0) { 1094 - spin_unlock_irqrestore(&rnp_cur->lock, flags); 1062 + if (rnp->qsmask == 0) { 1063 + spin_unlock_irqrestore(&rnp->lock, flags); 1095 1064 continue; 1096 1065 } 1097 - cpu = rnp_cur->grplo; 1066 + cpu = rnp->grplo; 1098 1067 bit = 1; 1099 - for (; cpu <= rnp_cur->grphi; cpu++, bit <<= 1) { 1100 - if ((rnp_cur->qsmask & bit) != 0 && f(rsp->rda[cpu])) 1068 + for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 1069 + if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) 1101 1070 mask |= bit; 1102 1071 } 1103 1072 if (mask != 0 && rsp->completed == lastcomp) { 1104 1073 1105 - /* cpu_quiet_msk() releases rnp_cur->lock. */ 1106 - cpu_quiet_msk(mask, rsp, rnp_cur, flags); 1074 + /* cpu_quiet_msk() releases rnp->lock. */ 1075 + cpu_quiet_msk(mask, rsp, rnp, flags); 1107 1076 continue; 1108 1077 } 1109 - spin_unlock_irqrestore(&rnp_cur->lock, flags); 1078 + spin_unlock_irqrestore(&rnp->lock, flags); 1110 1079 } 1111 1080 return 0; 1112 1081 } ··· 1121 1092 struct rcu_node *rnp = rcu_get_root(rsp); 1122 1093 u8 signaled; 1123 1094 1124 - if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) 1095 + if (!rcu_gp_in_progress(rsp)) 1125 1096 return; /* No grace period in progress, nothing to force. */ 1126 1097 if (!spin_trylock_irqsave(&rsp->fqslock, flags)) { 1127 1098 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ ··· 1280 1251 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1281 1252 1282 1253 /* Start a new grace period if one not already started. */ 1283 - if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) { 1254 + if (!rcu_gp_in_progress(rsp)) { 1284 1255 unsigned long nestflag; 1285 1256 struct rcu_node *rnp_root = rcu_get_root(rsp); 1286 1257 ··· 1360 1331 } 1361 1332 1362 1333 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1363 - if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && 1334 + if (rcu_gp_in_progress(rsp) && 1364 1335 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { 1365 1336 rdp->n_rp_need_fqs++; 1366 1337 return 1; ··· 1396 1367 per_cpu(rcu_bh_data, cpu).nxtlist || 1397 1368 rcu_preempt_needs_cpu(cpu); 1398 1369 } 1370 + 1371 + static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 1372 + static atomic_t rcu_barrier_cpu_count; 1373 + static DEFINE_MUTEX(rcu_barrier_mutex); 1374 + static struct completion rcu_barrier_completion; 1375 + 1376 + static void rcu_barrier_callback(struct rcu_head *notused) 1377 + { 1378 + if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1379 + complete(&rcu_barrier_completion); 1380 + } 1381 + 1382 + /* 1383 + * Called with preemption disabled, and from cross-cpu IRQ context. 1384 + */ 1385 + static void rcu_barrier_func(void *type) 1386 + { 1387 + int cpu = smp_processor_id(); 1388 + struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 1389 + void (*call_rcu_func)(struct rcu_head *head, 1390 + void (*func)(struct rcu_head *head)); 1391 + 1392 + atomic_inc(&rcu_barrier_cpu_count); 1393 + call_rcu_func = type; 1394 + call_rcu_func(head, rcu_barrier_callback); 1395 + } 1396 + 1397 + /* 1398 + * Orchestrate the specified type of RCU barrier, waiting for all 1399 + * RCU callbacks of the specified type to complete. 1400 + */ 1401 + static void _rcu_barrier(struct rcu_state *rsp, 1402 + void (*call_rcu_func)(struct rcu_head *head, 1403 + void (*func)(struct rcu_head *head))) 1404 + { 1405 + BUG_ON(in_interrupt()); 1406 + /* Take mutex to serialize concurrent rcu_barrier() requests. */ 1407 + mutex_lock(&rcu_barrier_mutex); 1408 + init_completion(&rcu_barrier_completion); 1409 + /* 1410 + * Initialize rcu_barrier_cpu_count to 1, then invoke 1411 + * rcu_barrier_func() on each CPU, so that each CPU also has 1412 + * incremented rcu_barrier_cpu_count. Only then is it safe to 1413 + * decrement rcu_barrier_cpu_count -- otherwise the first CPU 1414 + * might complete its grace period before all of the other CPUs 1415 + * did their increment, causing this function to return too 1416 + * early. 1417 + */ 1418 + atomic_set(&rcu_barrier_cpu_count, 1); 1419 + preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ 1420 + rcu_adopt_orphan_cbs(rsp); 1421 + on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); 1422 + preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ 1423 + if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1424 + complete(&rcu_barrier_completion); 1425 + wait_for_completion(&rcu_barrier_completion); 1426 + mutex_unlock(&rcu_barrier_mutex); 1427 + } 1428 + 1429 + /** 1430 + * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 1431 + */ 1432 + void rcu_barrier_bh(void) 1433 + { 1434 + _rcu_barrier(&rcu_bh_state, call_rcu_bh); 1435 + } 1436 + EXPORT_SYMBOL_GPL(rcu_barrier_bh); 1437 + 1438 + /** 1439 + * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 1440 + */ 1441 + void rcu_barrier_sched(void) 1442 + { 1443 + _rcu_barrier(&rcu_sched_state, call_rcu_sched); 1444 + } 1445 + EXPORT_SYMBOL_GPL(rcu_barrier_sched); 1399 1446 1400 1447 /* 1401 1448 * Do boot-time initialization of a CPU's per-CPU RCU data. ··· 1569 1464 case CPU_UP_PREPARE_FROZEN: 1570 1465 rcu_online_cpu(cpu); 1571 1466 break; 1467 + case CPU_DYING: 1468 + case CPU_DYING_FROZEN: 1469 + /* 1470 + * preempt_disable() in _rcu_barrier() prevents stop_machine(), 1471 + * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" 1472 + * returns, all online cpus have queued rcu_barrier_func(). 1473 + * The dying CPU clears its cpu_online_mask bit and 1474 + * moves all of its RCU callbacks to ->orphan_cbs_list 1475 + * in the context of stop_machine(), so subsequent calls 1476 + * to _rcu_barrier() will adopt these callbacks and only 1477 + * then queue rcu_barrier_func() on all remaining CPUs. 1478 + */ 1479 + rcu_send_cbs_to_orphanage(&rcu_bh_state); 1480 + rcu_send_cbs_to_orphanage(&rcu_sched_state); 1481 + rcu_preempt_send_cbs_to_orphanage(); 1482 + break; 1572 1483 case CPU_DEAD: 1573 1484 case CPU_DEAD_FROZEN: 1574 1485 case CPU_UP_CANCELED: ··· 1647 1526 cpustride *= rsp->levelspread[i]; 1648 1527 rnp = rsp->level[i]; 1649 1528 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 1650 - spin_lock_init(&rnp->lock); 1529 + if (rnp != rcu_get_root(rsp)) 1530 + spin_lock_init(&rnp->lock); 1651 1531 rnp->gpnum = 0; 1652 1532 rnp->qsmask = 0; 1653 1533 rnp->qsmaskinit = 0; ··· 1671 1549 INIT_LIST_HEAD(&rnp->blocked_tasks[1]); 1672 1550 } 1673 1551 } 1552 + spin_lock_init(&rcu_get_root(rsp)->lock); 1674 1553 } 1675 1554 1676 1555 /* ··· 1681 1558 */ 1682 1559 #define RCU_INIT_FLAVOR(rsp, rcu_data) \ 1683 1560 do { \ 1561 + int i; \ 1562 + int j; \ 1563 + struct rcu_node *rnp; \ 1564 + \ 1684 1565 rcu_init_one(rsp); \ 1685 1566 rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ 1686 1567 j = 0; \ ··· 1697 1570 } \ 1698 1571 } while (0) 1699 1572 1700 - #ifdef CONFIG_TREE_PREEMPT_RCU 1701 - 1702 - void __init __rcu_init_preempt(void) 1703 - { 1704 - int i; /* All used by RCU_INIT_FLAVOR(). */ 1705 - int j; 1706 - struct rcu_node *rnp; 1707 - 1708 - RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); 1709 - } 1710 - 1711 - #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1712 - 1713 - void __init __rcu_init_preempt(void) 1714 - { 1715 - } 1716 - 1717 - #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1718 - 1719 1573 void __init __rcu_init(void) 1720 1574 { 1721 - int i; /* All used by RCU_INIT_FLAVOR(). */ 1722 - int j; 1723 - struct rcu_node *rnp; 1724 - 1725 1575 rcu_bootup_announce(); 1726 1576 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1727 1577 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); ··· 1709 1605 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1710 1606 } 1711 1607 1712 - module_param(blimit, int, 0); 1713 - module_param(qhimark, int, 0); 1714 - module_param(qlowmark, int, 0); 1608 + #include "rcutree_plugin.h"

+73 -13

kernel/rcutree.h

··· 48 48 #elif NR_CPUS <= RCU_FANOUT_SQ 49 49 # define NUM_RCU_LVLS 2 50 50 # define NUM_RCU_LVL_0 1 51 - # define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) 51 + # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) 52 52 # define NUM_RCU_LVL_2 (NR_CPUS) 53 53 # define NUM_RCU_LVL_3 0 54 54 #elif NR_CPUS <= RCU_FANOUT_CUBE 55 55 # define NUM_RCU_LVLS 3 56 56 # define NUM_RCU_LVL_0 1 57 - # define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) 58 - # define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) 57 + # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) 58 + # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) 59 59 # define NUM_RCU_LVL_3 NR_CPUS 60 60 #else 61 61 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" ··· 79 79 * Definition for node within the RCU grace-period-detection hierarchy. 80 80 */ 81 81 struct rcu_node { 82 - spinlock_t lock; 82 + spinlock_t lock; /* Root rcu_node's lock protects some */ 83 + /* rcu_state fields as well as following. */ 83 84 long gpnum; /* Current grace period for this node. */ 84 85 /* This will either be equal to or one */ 85 86 /* behind the root rcu_node's gpnum. */ 86 87 unsigned long qsmask; /* CPUs or groups that need to switch in */ 87 88 /* order for current grace period to proceed.*/ 89 + /* In leaf rcu_node, each bit corresponds to */ 90 + /* an rcu_data structure, otherwise, each */ 91 + /* bit corresponds to a child rcu_node */ 92 + /* structure. */ 88 93 unsigned long qsmaskinit; 89 94 /* Per-GP initialization for qsmask. */ 90 95 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 96 + /* Only one bit will be set in this mask. */ 91 97 int grplo; /* lowest-numbered CPU or group here. */ 92 98 int grphi; /* highest-numbered CPU or group here. */ 93 99 u8 grpnum; /* CPU/group number for next level up. */ ··· 101 95 struct rcu_node *parent; 102 96 struct list_head blocked_tasks[2]; 103 97 /* Tasks blocked in RCU read-side critsect. */ 98 + /* Grace period number (->gpnum) x blocked */ 99 + /* by tasks on the (x & 0x1) element of the */ 100 + /* blocked_tasks[] array. */ 104 101 } ____cacheline_internodealigned_in_smp; 102 + 103 + /* 104 + * Do a full breadth-first scan of the rcu_node structures for the 105 + * specified rcu_state structure. 106 + */ 107 + #define rcu_for_each_node_breadth_first(rsp, rnp) \ 108 + for ((rnp) = &(rsp)->node[0]; \ 109 + (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 110 + 111 + #define rcu_for_each_leaf_node(rsp, rnp) \ 112 + for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ 113 + (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 105 114 106 115 /* Index values for nxttail array in struct rcu_data. */ 107 116 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ ··· 147 126 * Any of the partitions might be empty, in which case the 148 127 * pointer to that partition will be equal to the pointer for 149 128 * the following partition. When the list is empty, all of 150 - * the nxttail elements point to nxtlist, which is NULL. 129 + * the nxttail elements point to the ->nxtlist pointer itself, 130 + * which in that case is NULL. 151 131 * 152 - * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): 153 - * Entries that might have arrived after current GP ended 154 - * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): 155 - * Entries known to have arrived before current GP ended 156 - * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): 157 - * Entries that batch # <= ->completed - 1: waiting for current GP 158 132 * [nxtlist, *nxttail[RCU_DONE_TAIL]): 159 133 * Entries that batch # <= ->completed 160 134 * The grace period for these entries has completed, and 161 135 * the other grace-period-completed entries may be moved 162 136 * here temporarily in rcu_process_callbacks(). 137 + * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): 138 + * Entries that batch # <= ->completed - 1: waiting for current GP 139 + * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): 140 + * Entries known to have arrived before current GP ended 141 + * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): 142 + * Entries that might have arrived after current GP ended 143 + * Note that the value of *nxttail[RCU_NEXT_TAIL] will 144 + * always be NULL, as this is the end of the list. 163 145 */ 164 146 struct rcu_head *nxtlist; 165 147 struct rcu_head **nxttail[RCU_NEXT_SIZE]; ··· 240 216 /* Force QS state. */ 241 217 long gpnum; /* Current gp number. */ 242 218 long completed; /* # of last completed gp. */ 219 + 220 + /* End of fields guarded by root rcu_node's lock. */ 221 + 243 222 spinlock_t onofflock; /* exclude on/offline and */ 244 - /* starting new GP. */ 223 + /* starting new GP. Also */ 224 + /* protects the following */ 225 + /* orphan_cbs fields. */ 226 + struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */ 227 + /* orphaned by all CPUs in */ 228 + /* a given leaf rcu_node */ 229 + /* going offline. */ 230 + struct rcu_head **orphan_cbs_tail; /* And tail pointer. */ 231 + long orphan_qlen; /* Number of orphaned cbs. */ 245 232 spinlock_t fqslock; /* Only one task forcing */ 246 233 /* quiescent states. */ 247 234 unsigned long jiffies_force_qs; /* Time at which to invoke */ ··· 290 255 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 291 256 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 292 257 293 - #endif /* #ifdef RCU_TREE_NONCORE */ 258 + #else /* #ifdef RCU_TREE_NONCORE */ 294 259 260 + /* Forward declarations for rcutree_plugin.h */ 261 + static inline void rcu_bootup_announce(void); 262 + long rcu_batches_completed(void); 263 + static void rcu_preempt_note_context_switch(int cpu); 264 + static int rcu_preempted_readers(struct rcu_node *rnp); 265 + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 266 + static void rcu_print_task_stall(struct rcu_node *rnp); 267 + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 268 + static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 269 + #ifdef CONFIG_HOTPLUG_CPU 270 + static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 271 + struct rcu_node *rnp, 272 + struct rcu_data *rdp); 273 + static void rcu_preempt_offline_cpu(int cpu); 274 + #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 275 + static void rcu_preempt_check_callbacks(int cpu); 276 + static void rcu_preempt_process_callbacks(void); 277 + void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 278 + static int rcu_preempt_pending(int cpu); 279 + static int rcu_preempt_needs_cpu(int cpu); 280 + static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 281 + static void rcu_preempt_send_cbs_to_orphanage(void); 282 + static void __init __rcu_init_preempt(void); 283 + 284 + #endif /* #else #ifdef RCU_TREE_NONCORE */

+76 -27

kernel/rcutree_plugin.h

··· 150 150 } 151 151 EXPORT_SYMBOL_GPL(__rcu_read_lock); 152 152 153 + /* 154 + * Check for preempted RCU readers blocking the current grace period 155 + * for the specified rcu_node structure. If the caller needs a reliable 156 + * answer, it must hold the rcu_node's ->lock. 157 + */ 158 + static int rcu_preempted_readers(struct rcu_node *rnp) 159 + { 160 + return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 161 + } 162 + 153 163 static void rcu_read_unlock_special(struct task_struct *t) 154 164 { 155 165 int empty; ··· 206 196 break; 207 197 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 208 198 } 209 - empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 199 + empty = !rcu_preempted_readers(rnp); 210 200 list_del_init(&t->rcu_node_entry); 211 201 t->rcu_blocked_node = NULL; 212 202 ··· 217 207 * drop rnp->lock and restore irq. 218 208 */ 219 209 if (!empty && rnp->qsmask == 0 && 220 - list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { 210 + !rcu_preempted_readers(rnp)) { 221 211 struct rcu_node *rnp_p; 222 212 223 213 if (rnp->parent == NULL) { ··· 267 257 { 268 258 unsigned long flags; 269 259 struct list_head *lp; 270 - int phase = rnp->gpnum & 0x1; 260 + int phase; 271 261 struct task_struct *t; 272 262 273 - if (!list_empty(&rnp->blocked_tasks[phase])) { 263 + if (rcu_preempted_readers(rnp)) { 274 264 spin_lock_irqsave(&rnp->lock, flags); 275 - phase = rnp->gpnum & 0x1; /* re-read under lock. */ 265 + phase = rnp->gpnum & 0x1; 276 266 lp = &rnp->blocked_tasks[phase]; 277 267 list_for_each_entry(t, lp, rcu_node_entry) 278 268 printk(" P%d", t->pid); ··· 291 281 */ 292 282 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 293 283 { 294 - WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])); 284 + WARN_ON_ONCE(rcu_preempted_readers(rnp)); 295 285 WARN_ON_ONCE(rnp->qsmask); 296 - } 297 - 298 - /* 299 - * Check for preempted RCU readers for the specified rcu_node structure. 300 - * If the caller needs a reliable answer, it must hold the rcu_node's 301 - * >lock. 302 - */ 303 - static int rcu_preempted_readers(struct rcu_node *rnp) 304 - { 305 - return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 306 286 } 307 287 308 288 #ifdef CONFIG_HOTPLUG_CPU ··· 410 410 return !!per_cpu(rcu_preempt_data, cpu).nxtlist; 411 411 } 412 412 413 + /** 414 + * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 415 + */ 416 + void rcu_barrier(void) 417 + { 418 + _rcu_barrier(&rcu_preempt_state, call_rcu); 419 + } 420 + EXPORT_SYMBOL_GPL(rcu_barrier); 421 + 413 422 /* 414 423 * Initialize preemptable RCU's per-CPU data. 415 424 */ 416 425 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 417 426 { 418 427 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); 428 + } 429 + 430 + /* 431 + * Move preemptable RCU's callbacks to ->orphan_cbs_list. 432 + */ 433 + static void rcu_preempt_send_cbs_to_orphanage(void) 434 + { 435 + rcu_send_cbs_to_orphanage(&rcu_preempt_state); 436 + } 437 + 438 + /* 439 + * Initialize preemptable RCU's state structures. 440 + */ 441 + static void __init __rcu_init_preempt(void) 442 + { 443 + RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); 419 444 } 420 445 421 446 /* ··· 486 461 { 487 462 } 488 463 464 + /* 465 + * Because preemptable RCU does not exist, there are never any preempted 466 + * RCU readers. 467 + */ 468 + static int rcu_preempted_readers(struct rcu_node *rnp) 469 + { 470 + return 0; 471 + } 472 + 489 473 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 490 474 491 475 /* ··· 515 481 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 516 482 { 517 483 WARN_ON_ONCE(rnp->qsmask); 518 - } 519 - 520 - /* 521 - * Because preemptable RCU does not exist, there are never any preempted 522 - * RCU readers. 523 - */ 524 - static int rcu_preempted_readers(struct rcu_node *rnp) 525 - { 526 - return 0; 527 484 } 528 485 529 486 #ifdef CONFIG_HOTPLUG_CPU ··· 543 518 * Because preemptable RCU does not exist, it never has any callbacks 544 519 * to check. 545 520 */ 546 - void rcu_preempt_check_callbacks(int cpu) 521 + static void rcu_preempt_check_callbacks(int cpu) 547 522 { 548 523 } 549 524 ··· 551 526 * Because preemptable RCU does not exist, it never has any callbacks 552 527 * to process. 553 528 */ 554 - void rcu_preempt_process_callbacks(void) 529 + static void rcu_preempt_process_callbacks(void) 555 530 { 556 531 } 557 532 ··· 581 556 } 582 557 583 558 /* 559 + * Because preemptable RCU does not exist, rcu_barrier() is just 560 + * another name for rcu_barrier_sched(). 561 + */ 562 + void rcu_barrier(void) 563 + { 564 + rcu_barrier_sched(); 565 + } 566 + EXPORT_SYMBOL_GPL(rcu_barrier); 567 + 568 + /* 584 569 * Because preemptable RCU does not exist, there is no per-CPU 585 570 * data to initialize. 586 571 */ 587 572 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 573 + { 574 + } 575 + 576 + /* 577 + * Because there is no preemptable RCU, there are no callbacks to move. 578 + */ 579 + static void rcu_preempt_send_cbs_to_orphanage(void) 580 + { 581 + } 582 + 583 + /* 584 + * Because preemptable RCU does not exist, it need not be initialized. 585 + */ 586 + static void __init __rcu_init_preempt(void) 588 587 { 589 588 } 590 589

+2 -2

kernel/rcutree_trace.c

··· 159 159 struct rcu_node *rnp; 160 160 161 161 seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x " 162 - "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", 162 + "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", 163 163 rsp->completed, rsp->gpnum, rsp->signaled, 164 164 (long)(rsp->jiffies_force_qs - jiffies), 165 165 (int)(jiffies & 0xffff), 166 166 rsp->n_force_qs, rsp->n_force_qs_ngp, 167 167 rsp->n_force_qs - rsp->n_force_qs_ngp, 168 - rsp->n_force_qs_lh); 168 + rsp->n_force_qs_lh, rsp->orphan_qlen); 169 169 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { 170 170 if (rnp->level != level) { 171 171 seq_puts(m, "\n");