Merge branch 'rcu-v28-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

+1 -1

Documentation/RCU/checklist.txt

··· 210 210 number of updates per grace period. 211 211 212 212 9. All RCU list-traversal primitives, which include 213 - rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(), 213 + rcu_dereference(), list_for_each_entry_rcu(), 214 214 list_for_each_continue_rcu(), and list_for_each_safe_rcu(), 215 215 must be either within an RCU read-side critical section or 216 216 must be protected by appropriate update-side locks. RCU

+8 -8

Documentation/RCU/rcuref.txt

··· 29 29 } 30 30 31 31 If this list/array is made lock free using RCU as in changing the 32 - write_lock() in add() and delete() to spin_lock and changing read_lock 33 - in search_and_reference to rcu_read_lock(), the atomic_get in 34 - search_and_reference could potentially hold reference to an element which 32 + write_lock() in add() and delete() to spin_lock() and changing read_lock() 33 + in search_and_reference() to rcu_read_lock(), the atomic_inc() in 34 + search_and_reference() could potentially hold reference to an element which 35 35 has already been deleted from the list/array. Use atomic_inc_not_zero() 36 36 in this scenario as follows: 37 37 ··· 40 40 { { 41 41 alloc_object rcu_read_lock(); 42 42 ... search_for_element 43 - atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) { 44 - write_lock(&list_lock); rcu_read_unlock(); 43 + atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) { 44 + spin_lock(&list_lock); rcu_read_unlock(); 45 45 return FAIL; 46 46 add_element } 47 47 ... ... 48 - write_unlock(&list_lock); rcu_read_unlock(); 48 + spin_unlock(&list_lock); rcu_read_unlock(); 49 49 } } 50 50 3. 4. 51 51 release_referenced() delete() 52 52 { { 53 - ... write_lock(&list_lock); 53 + ... spin_lock(&list_lock); 54 54 if (atomic_dec_and_test(&el->rc)) ... 55 55 call_rcu(&el->head, el_free); delete_element 56 - ... write_unlock(&list_lock); 56 + ... spin_unlock(&list_lock); 57 57 } ... 58 58 if (atomic_dec_and_test(&el->rc)) 59 59 call_rcu(&el->head, el_free);

-2

Documentation/RCU/whatisRCU.txt

··· 786 786 list_for_each_entry_rcu 787 787 hlist_for_each_entry_rcu 788 788 789 - list_for_each_rcu (to be deprecated in favor of 790 - list_for_each_entry_rcu) 791 789 list_for_each_continue_rcu (to be deprecated in favor of new 792 790 list_for_each_entry_continue_rcu) 793 791

+3 -1

include/linux/compiler.h

··· 190 190 * ACCESS_ONCE() in different C statements. 191 191 * 192 192 * This macro does absolutely -nothing- to prevent the CPU from reordering, 193 - * merging, or refetching absolutely anything at any time. 193 + * merging, or refetching absolutely anything at any time. Its main intended 194 + * use is to mediate communication between process-level code and irq/NMI 195 + * handlers, all running on the same CPU. 194 196 */ 195 197 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 196 198

+27 -10

include/linux/rcuclassic.h

··· 40 40 #include <linux/cpumask.h> 41 41 #include <linux/seqlock.h> 42 42 43 + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 44 + #define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ 45 + #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ 46 + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 43 47 44 48 /* Global control variables for rcupdate callback mechanism. */ 45 49 struct rcu_ctrlblk { 46 50 long cur; /* Current batch number. */ 47 51 long completed; /* Number of the last completed batch */ 48 - int next_pending; /* Is the next batch already waiting? */ 52 + long pending; /* Number of the last pending batch */ 53 + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 54 + unsigned long gp_start; /* Time at which GP started in jiffies. */ 55 + unsigned long jiffies_stall; 56 + /* Time at which to check for CPU stalls. */ 57 + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 49 58 50 59 int signaled; 51 60 ··· 75 66 return (a - b) > 0; 76 67 } 77 68 78 - /* 79 - * Per-CPU data for Read-Copy UPdate. 80 - * nxtlist - new callbacks are added here 81 - * curlist - current batch for which quiescent cycle started if any 82 - */ 69 + /* Per-CPU data for Read-Copy UPdate. */ 83 70 struct rcu_data { 84 71 /* 1) quiescent state handling : */ 85 72 long quiescbatch; /* Batch # for grace period */ ··· 83 78 int qs_pending; /* core waits for quiesc state */ 84 79 85 80 /* 2) batch handling */ 86 - long batch; /* Batch # for current RCU batch */ 81 + /* 82 + * if nxtlist is not NULL, then: 83 + * batch: 84 + * The batch # for the last entry of nxtlist 85 + * [*nxttail[1], NULL = *nxttail[2]): 86 + * Entries that batch # <= batch 87 + * [*nxttail[0], *nxttail[1]): 88 + * Entries that batch # <= batch - 1 89 + * [nxtlist, *nxttail[0]): 90 + * Entries that batch # <= batch - 2 91 + * The grace period for these entries has completed, and 92 + * the other grace-period-completed entries may be moved 93 + * here temporarily in rcu_process_callbacks(). 94 + */ 95 + long batch; 87 96 struct rcu_head *nxtlist; 88 - struct rcu_head **nxttail; 97 + struct rcu_head **nxttail[3]; 89 98 long qlen; /* # of queued callbacks */ 90 - struct rcu_head *curlist; 91 - struct rcu_head **curtail; 92 99 struct rcu_head *donelist; 93 100 struct rcu_head **donetail; 94 101 long blimit; /* Upper limit on a processed batch */

-14

include/linux/rculist.h

··· 198 198 at->prev = last; 199 199 } 200 200 201 - /** 202 - * list_for_each_rcu - iterate over an rcu-protected list 203 - * @pos: the &struct list_head to use as a loop cursor. 204 - * @head: the head for your list. 205 - * 206 - * This list-traversal primitive may safely run concurrently with 207 - * the _rcu list-mutation primitives such as list_add_rcu() 208 - * as long as the traversal is guarded by rcu_read_lock(). 209 - */ 210 - #define list_for_each_rcu(pos, head) \ 211 - for (pos = rcu_dereference((head)->next); \ 212 - prefetch(pos->next), pos != (head); \ 213 - pos = rcu_dereference(pos->next)) 214 - 215 201 #define __list_for_each_rcu(pos, head) \ 216 202 for (pos = rcu_dereference((head)->next); \ 217 203 pos != (head); \

+20

include/linux/rcupdate.h

··· 133 133 #define rcu_read_unlock_bh() __rcu_read_unlock_bh() 134 134 135 135 /** 136 + * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section 137 + * 138 + * Should be used with either 139 + * - synchronize_sched() 140 + * or 141 + * - call_rcu_sched() and rcu_barrier_sched() 142 + * on the write-side to insure proper synchronization. 143 + */ 144 + #define rcu_read_lock_sched() preempt_disable() 145 + 146 + /* 147 + * rcu_read_unlock_sched - marks the end of a RCU-classic critical section 148 + * 149 + * See rcu_read_lock_sched for more information. 150 + */ 151 + #define rcu_read_unlock_sched() preempt_enable() 152 + 153 + 154 + 155 + /** 136 156 * rcu_dereference - fetch an RCU-protected pointer in an 137 157 * RCU read-side critical section. This pointer may later 138 158 * be safely dereferenced.

+8 -3

include/linux/rcupreempt.h

··· 57 57 rdssp->sched_qs++; 58 58 } 59 59 #define rcu_bh_qsctr_inc(cpu) 60 - #define call_rcu_bh(head, rcu) call_rcu(head, rcu) 60 + 61 + /* 62 + * Someone might want to pass call_rcu_bh as a function pointer. 63 + * So this needs to just be a rename and not a macro function. 64 + * (no parentheses) 65 + */ 66 + #define call_rcu_bh call_rcu 61 67 62 68 /** 63 69 * call_rcu_sched - Queue RCU callback for invocation after sched grace period. ··· 117 111 struct softirq_action; 118 112 119 113 #ifdef CONFIG_NO_HZ 120 - DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); 121 114 122 115 static inline void rcu_enter_nohz(void) 123 116 { ··· 131 126 { 132 127 static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); 133 128 134 - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 135 129 __get_cpu_var(rcu_dyntick_sched).dynticks++; 130 + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 136 131 WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), 137 132 &rs); 138 133 }

+251 -86

kernel/rcuclassic.c

··· 47 47 #include <linux/notifier.h> 48 48 #include <linux/cpu.h> 49 49 #include <linux/mutex.h> 50 + #include <linux/time.h> 50 51 51 52 #ifdef CONFIG_DEBUG_LOCK_ALLOC 52 53 static struct lock_class_key rcu_lock_key; ··· 61 60 static struct rcu_ctrlblk rcu_ctrlblk = { 62 61 .cur = -300, 63 62 .completed = -300, 63 + .pending = -300, 64 64 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), 65 65 .cpumask = CPU_MASK_NONE, 66 66 }; 67 67 static struct rcu_ctrlblk rcu_bh_ctrlblk = { 68 68 .cur = -300, 69 69 .completed = -300, 70 + .pending = -300, 70 71 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock), 71 72 .cpumask = CPU_MASK_NONE, 72 73 }; ··· 86 83 { 87 84 int cpu; 88 85 cpumask_t cpumask; 86 + unsigned long flags; 87 + 89 88 set_need_resched(); 89 + spin_lock_irqsave(&rcp->lock, flags); 90 90 if (unlikely(!rcp->signaled)) { 91 91 rcp->signaled = 1; 92 92 /* ··· 115 109 for_each_cpu_mask_nr(cpu, cpumask) 116 110 smp_send_reschedule(cpu); 117 111 } 112 + spin_unlock_irqrestore(&rcp->lock, flags); 118 113 } 119 114 #else 120 115 static inline void force_quiescent_state(struct rcu_data *rdp, ··· 124 117 set_need_resched(); 125 118 } 126 119 #endif 120 + 121 + static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp, 122 + struct rcu_data *rdp) 123 + { 124 + long batch; 125 + 126 + head->next = NULL; 127 + smp_mb(); /* Read of rcu->cur must happen after any change by caller. */ 128 + 129 + /* 130 + * Determine the batch number of this callback. 131 + * 132 + * Using ACCESS_ONCE to avoid the following error when gcc eliminates 133 + * local variable "batch" and emits codes like this: 134 + * 1) rdp->batch = rcp->cur + 1 # gets old value 135 + * ...... 136 + * 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value 137 + * then [*nxttail[0], *nxttail[1]) may contain callbacks 138 + * that batch# = rdp->batch, see the comment of struct rcu_data. 139 + */ 140 + batch = ACCESS_ONCE(rcp->cur) + 1; 141 + 142 + if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) { 143 + /* process callbacks */ 144 + rdp->nxttail[0] = rdp->nxttail[1]; 145 + rdp->nxttail[1] = rdp->nxttail[2]; 146 + if (rcu_batch_after(batch - 1, rdp->batch)) 147 + rdp->nxttail[0] = rdp->nxttail[2]; 148 + } 149 + 150 + rdp->batch = batch; 151 + *rdp->nxttail[2] = head; 152 + rdp->nxttail[2] = &head->next; 153 + 154 + if (unlikely(++rdp->qlen > qhimark)) { 155 + rdp->blimit = INT_MAX; 156 + force_quiescent_state(rdp, &rcu_ctrlblk); 157 + } 158 + } 159 + 160 + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 161 + 162 + static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp) 163 + { 164 + rcp->gp_start = jiffies; 165 + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; 166 + } 167 + 168 + static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) 169 + { 170 + int cpu; 171 + long delta; 172 + unsigned long flags; 173 + 174 + /* Only let one CPU complain about others per time interval. */ 175 + 176 + spin_lock_irqsave(&rcp->lock, flags); 177 + delta = jiffies - rcp->jiffies_stall; 178 + if (delta < 2 || rcp->cur != rcp->completed) { 179 + spin_unlock_irqrestore(&rcp->lock, flags); 180 + return; 181 + } 182 + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 183 + spin_unlock_irqrestore(&rcp->lock, flags); 184 + 185 + /* OK, time to rat on our buddy... */ 186 + 187 + printk(KERN_ERR "RCU detected CPU stalls:"); 188 + for_each_possible_cpu(cpu) { 189 + if (cpu_isset(cpu, rcp->cpumask)) 190 + printk(" %d", cpu); 191 + } 192 + printk(" (detected by %d, t=%ld jiffies)\n", 193 + smp_processor_id(), (long)(jiffies - rcp->gp_start)); 194 + } 195 + 196 + static void print_cpu_stall(struct rcu_ctrlblk *rcp) 197 + { 198 + unsigned long flags; 199 + 200 + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", 201 + smp_processor_id(), jiffies, 202 + jiffies - rcp->gp_start); 203 + dump_stack(); 204 + spin_lock_irqsave(&rcp->lock, flags); 205 + if ((long)(jiffies - rcp->jiffies_stall) >= 0) 206 + rcp->jiffies_stall = 207 + jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 208 + spin_unlock_irqrestore(&rcp->lock, flags); 209 + set_need_resched(); /* kick ourselves to get things going. */ 210 + } 211 + 212 + static void check_cpu_stall(struct rcu_ctrlblk *rcp) 213 + { 214 + long delta; 215 + 216 + delta = jiffies - rcp->jiffies_stall; 217 + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) { 218 + 219 + /* We haven't checked in, so go dump stack. */ 220 + print_cpu_stall(rcp); 221 + 222 + } else if (rcp->cur != rcp->completed && delta >= 2) { 223 + 224 + /* They had two seconds to dump stack, so complain. */ 225 + print_other_cpu_stall(rcp); 226 + } 227 + } 228 + 229 + #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 230 + 231 + static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp) 232 + { 233 + } 234 + 235 + static inline void check_cpu_stall(struct rcu_ctrlblk *rcp) 236 + { 237 + } 238 + 239 + #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 127 240 128 241 /** 129 242 * call_rcu - Queue an RCU callback for invocation after a grace period. ··· 260 133 void (*func)(struct rcu_head *rcu)) 261 134 { 262 135 unsigned long flags; 263 - struct rcu_data *rdp; 264 136 265 137 head->func = func; 266 - head->next = NULL; 267 138 local_irq_save(flags); 268 - rdp = &__get_cpu_var(rcu_data); 269 - *rdp->nxttail = head; 270 - rdp->nxttail = &head->next; 271 - if (unlikely(++rdp->qlen > qhimark)) { 272 - rdp->blimit = INT_MAX; 273 - force_quiescent_state(rdp, &rcu_ctrlblk); 274 - } 139 + __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data)); 275 140 local_irq_restore(flags); 276 141 } 277 142 EXPORT_SYMBOL_GPL(call_rcu); ··· 288 169 void (*func)(struct rcu_head *rcu)) 289 170 { 290 171 unsigned long flags; 291 - struct rcu_data *rdp; 292 172 293 173 head->func = func; 294 - head->next = NULL; 295 174 local_irq_save(flags); 296 - rdp = &__get_cpu_var(rcu_bh_data); 297 - *rdp->nxttail = head; 298 - rdp->nxttail = &head->next; 299 - 300 - if (unlikely(++rdp->qlen > qhimark)) { 301 - rdp->blimit = INT_MAX; 302 - force_quiescent_state(rdp, &rcu_bh_ctrlblk); 303 - } 304 - 175 + __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); 305 176 local_irq_restore(flags); 306 177 } 307 178 EXPORT_SYMBOL_GPL(call_rcu_bh); ··· 320 211 static inline void raise_rcu_softirq(void) 321 212 { 322 213 raise_softirq(RCU_SOFTIRQ); 323 - /* 324 - * The smp_mb() here is required to ensure that this cpu's 325 - * __rcu_process_callbacks() reads the most recently updated 326 - * value of rcu->cur. 327 - */ 328 - smp_mb(); 329 214 } 330 215 331 216 /* ··· 328 225 */ 329 226 static void rcu_do_batch(struct rcu_data *rdp) 330 227 { 228 + unsigned long flags; 331 229 struct rcu_head *next, *list; 332 230 int count = 0; 333 231 ··· 343 239 } 344 240 rdp->donelist = list; 345 241 346 - local_irq_disable(); 242 + local_irq_save(flags); 347 243 rdp->qlen -= count; 348 - local_irq_enable(); 244 + local_irq_restore(flags); 349 245 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) 350 246 rdp->blimit = blimit; 351 247 ··· 373 269 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace 374 270 * period (if necessary). 375 271 */ 272 + 376 273 /* 377 274 * Register a new batch of callbacks, and start it up if there is currently no 378 275 * active batch and the batch to be registered has not already occurred. ··· 381 276 */ 382 277 static void rcu_start_batch(struct rcu_ctrlblk *rcp) 383 278 { 384 - if (rcp->next_pending && 279 + if (rcp->cur != rcp->pending && 385 280 rcp->completed == rcp->cur) { 386 - rcp->next_pending = 0; 387 - /* 388 - * next_pending == 0 must be visible in 389 - * __rcu_process_callbacks() before it can see new value of cur. 390 - */ 391 - smp_wmb(); 392 281 rcp->cur++; 282 + record_gp_stall_check_time(rcp); 393 283 394 284 /* 395 285 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a ··· 422 322 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, 423 323 struct rcu_data *rdp) 424 324 { 325 + unsigned long flags; 326 + 425 327 if (rdp->quiescbatch != rcp->cur) { 426 328 /* start new grace period: */ 427 329 rdp->qs_pending = 1; ··· 447 345 return; 448 346 rdp->qs_pending = 0; 449 347 450 - spin_lock(&rcp->lock); 348 + spin_lock_irqsave(&rcp->lock, flags); 451 349 /* 452 350 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync 453 351 * during cpu startup. Ignore the quiescent state. ··· 455 353 if (likely(rdp->quiescbatch == rcp->cur)) 456 354 cpu_quiet(rdp->cpu, rcp); 457 355 458 - spin_unlock(&rcp->lock); 356 + spin_unlock_irqrestore(&rcp->lock, flags); 459 357 } 460 358 461 359 ··· 466 364 * which is dead and hence not processing interrupts. 467 365 */ 468 366 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, 469 - struct rcu_head **tail) 367 + struct rcu_head **tail, long batch) 470 368 { 471 - local_irq_disable(); 472 - *this_rdp->nxttail = list; 473 - if (list) 474 - this_rdp->nxttail = tail; 475 - local_irq_enable(); 369 + unsigned long flags; 370 + 371 + if (list) { 372 + local_irq_save(flags); 373 + this_rdp->batch = batch; 374 + *this_rdp->nxttail[2] = list; 375 + this_rdp->nxttail[2] = tail; 376 + local_irq_restore(flags); 377 + } 476 378 } 477 379 478 380 static void __rcu_offline_cpu(struct rcu_data *this_rdp, 479 381 struct rcu_ctrlblk *rcp, struct rcu_data *rdp) 480 382 { 481 - /* if the cpu going offline owns the grace period 383 + unsigned long flags; 384 + 385 + /* 386 + * if the cpu going offline owns the grace period 482 387 * we can block indefinitely waiting for it, so flush 483 388 * it here 484 389 */ 485 - spin_lock_bh(&rcp->lock); 390 + spin_lock_irqsave(&rcp->lock, flags); 486 391 if (rcp->cur != rcp->completed) 487 392 cpu_quiet(rdp->cpu, rcp); 488 - spin_unlock_bh(&rcp->lock); 489 - rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); 490 - rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); 491 - rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); 393 + rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1); 394 + rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1); 395 + spin_unlock(&rcp->lock); 492 396 493 - local_irq_disable(); 494 397 this_rdp->qlen += rdp->qlen; 495 - local_irq_enable(); 398 + local_irq_restore(flags); 496 399 } 497 400 498 401 static void rcu_offline_cpu(int cpu) ··· 527 420 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, 528 421 struct rcu_data *rdp) 529 422 { 530 - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { 531 - *rdp->donetail = rdp->curlist; 532 - rdp->donetail = rdp->curtail; 533 - rdp->curlist = NULL; 534 - rdp->curtail = &rdp->curlist; 535 - } 423 + unsigned long flags; 424 + long completed_snap; 536 425 537 - if (rdp->nxtlist && !rdp->curlist) { 538 - local_irq_disable(); 539 - rdp->curlist = rdp->nxtlist; 540 - rdp->curtail = rdp->nxttail; 541 - rdp->nxtlist = NULL; 542 - rdp->nxttail = &rdp->nxtlist; 543 - local_irq_enable(); 426 + if (rdp->nxtlist) { 427 + local_irq_save(flags); 428 + completed_snap = ACCESS_ONCE(rcp->completed); 544 429 545 430 /* 546 - * start the next batch of callbacks 431 + * move the other grace-period-completed entries to 432 + * [rdp->nxtlist, *rdp->nxttail[0]) temporarily 547 433 */ 434 + if (!rcu_batch_before(completed_snap, rdp->batch)) 435 + rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2]; 436 + else if (!rcu_batch_before(completed_snap, rdp->batch - 1)) 437 + rdp->nxttail[0] = rdp->nxttail[1]; 548 438 549 - /* determine batch number */ 550 - rdp->batch = rcp->cur + 1; 551 - /* see the comment and corresponding wmb() in 552 - * the rcu_start_batch() 439 + /* 440 + * the grace period for entries in 441 + * [rdp->nxtlist, *rdp->nxttail[0]) has completed and 442 + * move these entries to donelist 553 443 */ 554 - smp_rmb(); 444 + if (rdp->nxttail[0] != &rdp->nxtlist) { 445 + *rdp->donetail = rdp->nxtlist; 446 + rdp->donetail = rdp->nxttail[0]; 447 + rdp->nxtlist = *rdp->nxttail[0]; 448 + *rdp->donetail = NULL; 555 449 556 - if (!rcp->next_pending) { 450 + if (rdp->nxttail[1] == rdp->nxttail[0]) 451 + rdp->nxttail[1] = &rdp->nxtlist; 452 + if (rdp->nxttail[2] == rdp->nxttail[0]) 453 + rdp->nxttail[2] = &rdp->nxtlist; 454 + rdp->nxttail[0] = &rdp->nxtlist; 455 + } 456 + 457 + local_irq_restore(flags); 458 + 459 + if (rcu_batch_after(rdp->batch, rcp->pending)) { 460 + unsigned long flags2; 461 + 557 462 /* and start it/schedule start if it's a new batch */ 558 - spin_lock(&rcp->lock); 559 - rcp->next_pending = 1; 560 - rcu_start_batch(rcp); 561 - spin_unlock(&rcp->lock); 463 + spin_lock_irqsave(&rcp->lock, flags2); 464 + if (rcu_batch_after(rdp->batch, rcp->pending)) { 465 + rcp->pending = rdp->batch; 466 + rcu_start_batch(rcp); 467 + } 468 + spin_unlock_irqrestore(&rcp->lock, flags2); 562 469 } 563 470 } 564 471 ··· 583 462 584 463 static void rcu_process_callbacks(struct softirq_action *unused) 585 464 { 465 + /* 466 + * Memory references from any prior RCU read-side critical sections 467 + * executed by the interrupted code must be see before any RCU 468 + * grace-period manupulations below. 469 + */ 470 + 471 + smp_mb(); /* See above block comment. */ 472 + 586 473 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); 587 474 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); 475 + 476 + /* 477 + * Memory references from any later RCU read-side critical sections 478 + * executed by the interrupted code must be see after any RCU 479 + * grace-period manupulations above. 480 + */ 481 + 482 + smp_mb(); /* See above block comment. */ 588 483 } 589 484 590 485 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) 591 486 { 592 - /* This cpu has pending rcu entries and the grace period 593 - * for them has completed. 594 - */ 595 - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) 596 - return 1; 487 + /* Check for CPU stalls, if enabled. */ 488 + check_cpu_stall(rcp); 597 489 598 - /* This cpu has no pending entries, but there are new entries */ 599 - if (!rdp->curlist && rdp->nxtlist) 600 - return 1; 490 + if (rdp->nxtlist) { 491 + long completed_snap = ACCESS_ONCE(rcp->completed); 492 + 493 + /* 494 + * This cpu has pending rcu entries and the grace period 495 + * for them has completed. 496 + */ 497 + if (!rcu_batch_before(completed_snap, rdp->batch)) 498 + return 1; 499 + if (!rcu_batch_before(completed_snap, rdp->batch - 1) && 500 + rdp->nxttail[0] != rdp->nxttail[1]) 501 + return 1; 502 + if (rdp->nxttail[0] != &rdp->nxtlist) 503 + return 1; 504 + 505 + /* 506 + * This cpu has pending rcu entries and the new batch 507 + * for then hasn't been started nor scheduled start 508 + */ 509 + if (rcu_batch_after(rdp->batch, rcp->pending)) 510 + return 1; 511 + } 601 512 602 513 /* This cpu has finished callbacks to invoke */ 603 514 if (rdp->donelist) ··· 665 512 struct rcu_data *rdp = &per_cpu(rcu_data, cpu); 666 513 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); 667 514 668 - return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); 515 + return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu); 669 516 } 670 517 518 + /* 519 + * Top-level function driving RCU grace-period detection, normally 520 + * invoked from the scheduler-clock interrupt. This function simply 521 + * increments counters that are read only from softirq by this same 522 + * CPU, so there are no memory barriers required. 523 + */ 671 524 void rcu_check_callbacks(int cpu, int user) 672 525 { 673 526 if (user || ··· 717 558 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, 718 559 struct rcu_data *rdp) 719 560 { 561 + unsigned long flags; 562 + 563 + spin_lock_irqsave(&rcp->lock, flags); 720 564 memset(rdp, 0, sizeof(*rdp)); 721 - rdp->curtail = &rdp->curlist; 722 - rdp->nxttail = &rdp->nxtlist; 565 + rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist; 723 566 rdp->donetail = &rdp->donelist; 724 567 rdp->quiescbatch = rcp->completed; 725 568 rdp->qs_pending = 0; 726 569 rdp->cpu = cpu; 727 570 rdp->blimit = blimit; 571 + spin_unlock_irqrestore(&rcp->lock, flags); 728 572 } 729 573 730 574 static void __cpuinit rcu_online_cpu(int cpu) ··· 772 610 */ 773 611 void __init __rcu_init(void) 774 612 { 613 + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 614 + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 615 + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 775 616 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, 776 617 (void *)(long)smp_processor_id()); 777 618 /* Register notifier for non-boot CPUs */

-8

kernel/rcupreempt.c

··· 59 59 #include <linux/rcupreempt_trace.h> 60 60 61 61 /* 62 - * Macro that prevents the compiler from reordering accesses, but does 63 - * absolutely -nothing- to prevent CPUs from reordering. This is used 64 - * only to mediate communication between mainline code and hardware 65 - * interrupt and NMI handlers. 66 - */ 67 - #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 68 - 69 - /* 70 62 * PREEMPT_RCU data structures. 71 63 */ 72 64

+6 -1

kernel/rcupreempt_trace.c

··· 308 308 309 309 static int __init rcupreempt_trace_init(void) 310 310 { 311 + int ret; 312 + 311 313 mutex_init(&rcupreempt_trace_mutex); 312 314 rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL); 313 315 if (!rcupreempt_trace_buf) 314 316 return 1; 315 - return rcupreempt_debugfs_init(); 317 + ret = rcupreempt_debugfs_init(); 318 + if (ret) 319 + kfree(rcupreempt_trace_buf); 320 + return ret; 316 321 } 317 322 318 323 static void __exit rcupreempt_trace_cleanup(void)

+13

lib/Kconfig.debug

··· 597 597 Say N here if you want the RCU torture tests to start only 598 598 after being manually enabled via /proc. 599 599 600 + config RCU_CPU_STALL_DETECTOR 601 + bool "Check for stalled CPUs delaying RCU grace periods" 602 + depends on CLASSIC_RCU 603 + default n 604 + help 605 + This option causes RCU to printk information on which 606 + CPUs are delaying the current grace period, but only when 607 + the grace period extends for excessive time periods. 608 + 609 + Say Y if you want RCU to perform such checks. 610 + 611 + Say N if you are unsure. 612 + 600 613 config KPROBES_SANITY_TEST 601 614 bool "Kprobes sanity tests" 602 615 depends on DEBUG_KERNEL