Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu

-7

Documentation/timers/NO_HZ.txt

··· 131 131 this means that your system must have at least two CPUs in order for 132 132 CONFIG_NO_HZ_FULL=y to do anything for you. 133 133 134 - Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies 135 - that all CPUs other than the boot CPU are adaptive-ticks CPUs. This 136 - Kconfig parameter will be overridden by the "nohz_full=" boot parameter, 137 - so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and 138 - the "nohz_full=1" boot parameter is specified, the boot parameter will 139 - prevail so that only CPU 1 will be an adaptive-ticks CPU. 140 - 141 134 Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. 142 135 This is covered in the "RCU IMPLICATIONS" section below. 143 136

+6 -4

include/linux/rcupdate.h

··· 214 214 #endif 215 215 216 216 /* 217 - * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 218 - * initialization and destruction of rcu_head on the stack. rcu_head structures 219 - * allocated dynamically in the heap or defined statically don't need any 220 - * initialization. 217 + * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls 218 + * are needed for dynamic initialization and destruction of rcu_head 219 + * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for 220 + * dynamic initialization and destruction of statically allocated rcu_head 221 + * structures. However, rcu_head structures allocated dynamically in the 222 + * heap don't need any initialization. 221 223 */ 222 224 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 223 225 void init_rcu_head(struct rcu_head *head);

+1 -1

include/linux/types.h

··· 217 217 * 218 218 * This guarantee is important for few reasons: 219 219 * - future call_rcu_lazy() will make use of lower bits in the pointer; 220 - * - the structure shares storage spacer in struct page with @compound_head, 220 + * - the structure shares storage space in struct page with @compound_head, 221 221 * which encode PageTail() in bit 0. The guarantee is needed to avoid 222 222 * false-positive PageTail(). 223 223 */

+4

include/trace/events/rcu.h

··· 179 179 * 180 180 * "snap": Captured snapshot of expedited grace period sequence number. 181 181 * "start": Started a real expedited grace period. 182 + * "reset": Started resetting the tree 183 + * "select": Started selecting the CPUs to wait on. 184 + * "selectofl": Selected CPU partially offline. 185 + * "startwait": Started waiting on selected CPUs. 182 186 * "end": Ended a real expedited grace period. 183 187 * "endwake": Woke piggybackers up. 184 188 * "done": Someone else did the expedited grace period for us.

+27 -11

kernel/rcu/rcu.h

··· 77 77 WARN_ON_ONCE(rcu_seq_state(*sp) != 1); 78 78 } 79 79 80 + /* Compute the end-of-grace-period value for the specified sequence number. */ 81 + static inline unsigned long rcu_seq_endval(unsigned long *sp) 82 + { 83 + return (*sp | RCU_SEQ_STATE_MASK) + 1; 84 + } 85 + 80 86 /* Adjust sequence number for end of update-side operation. */ 81 87 static inline void rcu_seq_end(unsigned long *sp) 82 88 { 83 89 smp_mb(); /* Ensure update-side operation before counter increment. */ 84 90 WARN_ON_ONCE(!rcu_seq_state(*sp)); 85 - WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1); 91 + WRITE_ONCE(*sp, rcu_seq_endval(sp)); 86 92 } 87 93 88 94 /* Take a snapshot of the update side's sequence number. */ ··· 301 295 * Iterate over all possible CPUs in a leaf RCU node. 302 296 */ 303 297 #define for_each_leaf_node_possible_cpu(rnp, cpu) \ 304 - for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \ 305 - cpu <= rnp->grphi; \ 306 - cpu = cpumask_next((cpu), cpu_possible_mask)) 298 + for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \ 299 + (cpu) <= rnp->grphi; \ 300 + (cpu) = cpumask_next((cpu), cpu_possible_mask)) 301 + 302 + /* 303 + * Iterate over all CPUs in a leaf RCU node's specified mask. 304 + */ 305 + #define rcu_find_next_bit(rnp, cpu, mask) \ 306 + ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu))) 307 + #define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \ 308 + for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \ 309 + (cpu) <= rnp->grphi; \ 310 + (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask))) 307 311 308 312 /* 309 313 * Wrappers for the rcu_node::lock acquire and release. ··· 353 337 } while (0) 354 338 355 339 #define raw_spin_unlock_irqrestore_rcu_node(p, flags) \ 356 - raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ 340 + raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) 357 341 358 342 #define raw_spin_trylock_rcu_node(p) \ 359 343 ({ \ ··· 364 348 ___locked; \ 365 349 }) 366 350 351 + #define raw_lockdep_assert_held_rcu_node(p) \ 352 + lockdep_assert_held(&ACCESS_PRIVATE(p, lock)) 353 + 367 354 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */ 368 355 369 356 #ifdef CONFIG_TINY_RCU ··· 375 356 static inline bool rcu_gp_is_expedited(void) { return false; } 376 357 static inline void rcu_expedite_gp(void) { } 377 358 static inline void rcu_unexpedite_gp(void) { } 359 + static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } 378 360 #else /* #ifdef CONFIG_TINY_RCU */ 379 361 bool rcu_gp_is_normal(void); /* Internal RCU use. */ 380 362 bool rcu_gp_is_expedited(void); /* Internal RCU use. */ 381 363 void rcu_expedite_gp(void); 382 364 void rcu_unexpedite_gp(void); 383 365 void rcupdate_announce_bootup_oddness(void); 366 + void rcu_request_urgent_qs_task(struct task_struct *t); 384 367 #endif /* #else #ifdef CONFIG_TINY_RCU */ 385 368 386 369 #define RCU_SCHEDULER_INACTIVE 0 387 370 #define RCU_SCHEDULER_INIT 1 388 371 #define RCU_SCHEDULER_RUNNING 2 389 - 390 - #ifdef CONFIG_TINY_RCU 391 - static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } 392 - #else /* #ifdef CONFIG_TINY_RCU */ 393 - void rcu_request_urgent_qs_task(struct task_struct *t); 394 - #endif /* #else #ifdef CONFIG_TINY_RCU */ 395 372 396 373 enum rcutorture_type { 397 374 RCU_FLAVOR, ··· 485 470 void rcu_force_quiescent_state(void); 486 471 void rcu_bh_force_quiescent_state(void); 487 472 void rcu_sched_force_quiescent_state(void); 473 + extern struct workqueue_struct *rcu_gp_wq; 488 474 #endif /* #else #ifdef CONFIG_TINY_RCU */ 489 475 490 476 #ifdef CONFIG_RCU_NOCB_CPU

+20 -1

kernel/rcu/rcuperf.c

··· 61 61 #define VERBOSE_PERFOUT_ERRSTRING(s) \ 62 62 do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) 63 63 64 + /* 65 + * The intended use cases for the nreaders and nwriters module parameters 66 + * are as follows: 67 + * 68 + * 1. Specify only the nr_cpus kernel boot parameter. This will 69 + * set both nreaders and nwriters to the value specified by 70 + * nr_cpus for a mixed reader/writer test. 71 + * 72 + * 2. Specify the nr_cpus kernel boot parameter, but set 73 + * rcuperf.nreaders to zero. This will set nwriters to the 74 + * value specified by nr_cpus for an update-only test. 75 + * 76 + * 3. Specify the nr_cpus kernel boot parameter, but set 77 + * rcuperf.nwriters to zero. This will set nreaders to the 78 + * value specified by nr_cpus for a read-only test. 79 + * 80 + * Various other use cases may of course be specified. 81 + */ 82 + 64 83 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); 65 84 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); 66 85 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); 67 86 torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); 68 - torture_param(int, nreaders, 0, "Number of RCU reader threads"); 87 + torture_param(int, nreaders, -1, "Number of RCU reader threads"); 69 88 torture_param(int, nwriters, -1, "Number of RCU updater threads"); 70 89 torture_param(bool, shutdown, !IS_ENABLED(MODULE), 71 90 "Shutdown at end of performance tests.");

+41 -31

kernel/rcu/rcutorture.c

··· 909 909 int nsynctypes = 0; 910 910 911 911 VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); 912 - if (!can_expedite) { 912 + if (!can_expedite) 913 913 pr_alert("%s" TORTURE_FLAG 914 - " GP expediting controlled from boot/sysfs for %s,\n", 914 + " GP expediting controlled from boot/sysfs for %s.\n", 915 915 torture_type, cur_ops->name); 916 - pr_alert("%s" TORTURE_FLAG 917 - " Disabled dynamic grace-period expediting.\n", 918 - torture_type); 919 - } 920 916 921 917 /* Initialize synctype[] array. If none set, take default. */ 922 918 if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) 923 919 gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; 924 - if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) 920 + if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) { 925 921 synctype[nsynctypes++] = RTWS_COND_GET; 926 - else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) 927 - pr_alert("rcu_torture_writer: gp_cond without primitives.\n"); 928 - if (gp_exp1 && cur_ops->exp_sync) 922 + pr_info("%s: Testing conditional GPs.\n", __func__); 923 + } else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) { 924 + pr_alert("%s: gp_cond without primitives.\n", __func__); 925 + } 926 + if (gp_exp1 && cur_ops->exp_sync) { 929 927 synctype[nsynctypes++] = RTWS_EXP_SYNC; 930 - else if (gp_exp && !cur_ops->exp_sync) 931 - pr_alert("rcu_torture_writer: gp_exp without primitives.\n"); 932 - if (gp_normal1 && cur_ops->deferred_free) 928 + pr_info("%s: Testing expedited GPs.\n", __func__); 929 + } else if (gp_exp && !cur_ops->exp_sync) { 930 + pr_alert("%s: gp_exp without primitives.\n", __func__); 931 + } 932 + if (gp_normal1 && cur_ops->deferred_free) { 933 933 synctype[nsynctypes++] = RTWS_DEF_FREE; 934 - else if (gp_normal && !cur_ops->deferred_free) 935 - pr_alert("rcu_torture_writer: gp_normal without primitives.\n"); 936 - if (gp_sync1 && cur_ops->sync) 934 + pr_info("%s: Testing asynchronous GPs.\n", __func__); 935 + } else if (gp_normal && !cur_ops->deferred_free) { 936 + pr_alert("%s: gp_normal without primitives.\n", __func__); 937 + } 938 + if (gp_sync1 && cur_ops->sync) { 937 939 synctype[nsynctypes++] = RTWS_SYNC; 938 - else if (gp_sync && !cur_ops->sync) 939 - pr_alert("rcu_torture_writer: gp_sync without primitives.\n"); 940 + pr_info("%s: Testing normal GPs.\n", __func__); 941 + } else if (gp_sync && !cur_ops->sync) { 942 + pr_alert("%s: gp_sync without primitives.\n", __func__); 943 + } 940 944 if (WARN_ONCE(nsynctypes == 0, 941 945 "rcu_torture_writer: No update-side primitives.\n")) { 942 946 /* ··· 1015 1011 rcu_unexpedite_gp(); 1016 1012 if (++expediting > 3) 1017 1013 expediting = -expediting; 1014 + } else if (!can_expedite) { /* Disabled during boot, recheck. */ 1015 + can_expedite = !rcu_gp_is_expedited() && 1016 + !rcu_gp_is_normal(); 1018 1017 } 1019 1018 rcu_torture_writer_state = RTWS_STUTTER; 1020 1019 stutter_wait("rcu_torture_writer"); ··· 1028 1021 while (can_expedite && expediting++ < 0) 1029 1022 rcu_unexpedite_gp(); 1030 1023 WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited()); 1024 + if (!can_expedite) 1025 + pr_alert("%s" TORTURE_FLAG 1026 + " Dynamic grace-period expediting was disabled.\n", 1027 + torture_type); 1031 1028 rcu_torture_writer_state = RTWS_STOPPING; 1032 1029 torture_kthread_stopping("rcu_torture_writer"); 1033 1030 return 0; ··· 1056 1045 torture_random(&rand) % (nfakewriters * 8) == 0) { 1057 1046 cur_ops->cb_barrier(); 1058 1047 } else if (gp_normal == gp_exp) { 1059 - if (torture_random(&rand) & 0x80) 1048 + if (cur_ops->sync && torture_random(&rand) & 0x80) 1060 1049 cur_ops->sync(); 1061 - else 1050 + else if (cur_ops->exp_sync) 1062 1051 cur_ops->exp_sync(); 1063 - } else if (gp_normal) { 1052 + } else if (gp_normal && cur_ops->sync) { 1064 1053 cur_ops->sync(); 1065 - } else { 1054 + } else if (cur_ops->exp_sync) { 1066 1055 cur_ops->exp_sync(); 1067 1056 } 1068 1057 stutter_wait("rcu_torture_fakewriter"); ··· 1568 1557 atomic_set(&barrier_cbs_count, 0); 1569 1558 atomic_set(&barrier_cbs_invoked, 0); 1570 1559 barrier_cbs_tasks = 1571 - kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]), 1560 + kcalloc(n_barrier_cbs, sizeof(barrier_cbs_tasks[0]), 1572 1561 GFP_KERNEL); 1573 1562 barrier_cbs_wq = 1574 - kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), 1575 - GFP_KERNEL); 1563 + kcalloc(n_barrier_cbs, sizeof(barrier_cbs_wq[0]), GFP_KERNEL); 1576 1564 if (barrier_cbs_tasks == NULL || !barrier_cbs_wq) 1577 1565 return -ENOMEM; 1578 1566 for (i = 0; i < n_barrier_cbs; i++) { ··· 1684 1674 * next grace period. Unlikely, but can happen. If it 1685 1675 * does happen, the debug-objects subsystem won't have splatted. 1686 1676 */ 1687 - pr_alert("rcutorture: duplicated callback was invoked.\n"); 1677 + pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME); 1688 1678 } 1689 1679 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 1690 1680 ··· 1701 1691 1702 1692 init_rcu_head_on_stack(&rh1); 1703 1693 init_rcu_head_on_stack(&rh2); 1704 - pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n"); 1694 + pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME); 1705 1695 1706 1696 /* Try to queue the rh2 pair of callbacks for the same grace period. */ 1707 1697 preempt_disable(); /* Prevent preemption from interrupting test. */ ··· 1716 1706 1717 1707 /* Wait for them all to get done so we can safely return. */ 1718 1708 rcu_barrier(); 1719 - pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n"); 1709 + pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME); 1720 1710 destroy_rcu_head_on_stack(&rh1); 1721 1711 destroy_rcu_head_on_stack(&rh2); 1722 1712 #else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 1723 - pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n"); 1713 + pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME); 1724 1714 #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 1725 1715 } 1726 1716 ··· 1809 1799 if (firsterr) 1810 1800 goto unwind; 1811 1801 if (nfakewriters > 0) { 1812 - fakewriter_tasks = kzalloc(nfakewriters * 1802 + fakewriter_tasks = kcalloc(nfakewriters, 1813 1803 sizeof(fakewriter_tasks[0]), 1814 1804 GFP_KERNEL); 1815 1805 if (fakewriter_tasks == NULL) { ··· 1824 1814 if (firsterr) 1825 1815 goto unwind; 1826 1816 } 1827 - reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), 1817 + reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), 1828 1818 GFP_KERNEL); 1829 1819 if (reader_tasks == NULL) { 1830 1820 VERBOSE_TOROUT_ERRSTRING("out of memory");

+14 -15

kernel/rcu/srcutree.c

··· 386 386 flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); 387 387 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 388 388 WARN_ON(srcu_readers_active(sp))) { 389 - pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 389 + pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 390 390 return; /* Caller forgot to stop doing call_srcu()? */ 391 391 } 392 392 free_percpu(sp->sda); ··· 439 439 struct srcu_data *sdp = this_cpu_ptr(sp->sda); 440 440 int state; 441 441 442 - lockdep_assert_held(&sp->lock); 442 + lockdep_assert_held(&ACCESS_PRIVATE(sp, lock)); 443 443 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 444 444 rcu_segcblist_advance(&sdp->srcu_cblist, 445 445 rcu_seq_current(&sp->srcu_gp_seq)); ··· 492 492 */ 493 493 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) 494 494 { 495 - srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, 496 - &sdp->work, delay); 495 + srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay); 497 496 } 498 497 499 498 /* ··· 526 527 { 527 528 unsigned long cbdelay; 528 529 bool cbs; 530 + bool last_lvl; 529 531 int cpu; 530 532 unsigned long flags; 531 533 unsigned long gpseq; 532 534 int idx; 533 - int idxnext; 534 535 unsigned long mask; 535 536 struct srcu_data *sdp; 536 537 struct srcu_node *snp; ··· 554 555 555 556 /* Initiate callback invocation as needed. */ 556 557 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 557 - idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); 558 558 rcu_for_each_node_breadth_first(sp, snp) { 559 559 spin_lock_irq_rcu_node(snp); 560 560 cbs = false; 561 - if (snp >= sp->level[rcu_num_lvls - 1]) 561 + last_lvl = snp >= sp->level[rcu_num_lvls - 1]; 562 + if (last_lvl) 562 563 cbs = snp->srcu_have_cbs[idx] == gpseq; 563 564 snp->srcu_have_cbs[idx] = gpseq; 564 565 rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); ··· 571 572 srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); 572 573 573 574 /* Occasionally prevent srcu_data counter wrap. */ 574 - if (!(gpseq & counter_wrap_check)) 575 + if (!(gpseq & counter_wrap_check) && last_lvl) 575 576 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { 576 577 sdp = per_cpu_ptr(sp->sda, cpu); 577 578 spin_lock_irqsave_rcu_node(sdp, flags); 578 579 if (ULONG_CMP_GE(gpseq, 579 580 sdp->srcu_gp_seq_needed + 100)) 580 581 sdp->srcu_gp_seq_needed = gpseq; 582 + if (ULONG_CMP_GE(gpseq, 583 + sdp->srcu_gp_seq_needed_exp + 100)) 584 + sdp->srcu_gp_seq_needed_exp = gpseq; 581 585 spin_unlock_irqrestore_rcu_node(sdp, flags); 582 586 } 583 587 } ··· 595 593 ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { 596 594 srcu_gp_start(sp); 597 595 spin_unlock_irq_rcu_node(sp); 598 - /* Throttle expedited grace periods: Should be rare! */ 599 - srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff 600 - ? 0 : SRCU_INTERVAL); 596 + srcu_reschedule(sp, 0); 601 597 } else { 602 598 spin_unlock_irq_rcu_node(sp); 603 599 } ··· 626 626 spin_unlock_irqrestore_rcu_node(snp, flags); 627 627 } 628 628 spin_lock_irqsave_rcu_node(sp, flags); 629 - if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 629 + if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 630 630 sp->srcu_gp_seq_needed_exp = s; 631 631 spin_unlock_irqrestore_rcu_node(sp, flags); 632 632 } ··· 691 691 rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { 692 692 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 693 693 srcu_gp_start(sp); 694 - queue_delayed_work(system_power_efficient_wq, &sp->work, 695 - srcu_get_delay(sp)); 694 + queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp)); 696 695 } 697 696 spin_unlock_irqrestore_rcu_node(sp, flags); 698 697 } ··· 1224 1225 spin_unlock_irq_rcu_node(sp); 1225 1226 1226 1227 if (pushgp) 1227 - queue_delayed_work(system_power_efficient_wq, &sp->work, delay); 1228 + queue_delayed_work(rcu_gp_wq, &sp->work, delay); 1228 1229 } 1229 1230 1230 1231 /*

+26 -46

kernel/rcu/tree.c

··· 1161 1161 */ 1162 1162 static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) 1163 1163 { 1164 - lockdep_assert_held(&rnp->lock); 1164 + raw_lockdep_assert_held_rcu_node(rnp); 1165 1165 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum)) 1166 1166 WRITE_ONCE(rdp->gpwrap, true); 1167 1167 if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum)) ··· 1350 1350 rsp->gp_kthread ? rsp->gp_kthread->state : ~0, 1351 1351 rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1); 1352 1352 if (rsp->gp_kthread) { 1353 + pr_err("RCU grace-period kthread stack dump:\n"); 1353 1354 sched_show_task(rsp->gp_kthread); 1354 1355 wake_up_process(rsp->gp_kthread); 1355 1356 } ··· 1629 1628 static unsigned long rcu_cbs_completed(struct rcu_state *rsp, 1630 1629 struct rcu_node *rnp) 1631 1630 { 1632 - lockdep_assert_held(&rnp->lock); 1631 + raw_lockdep_assert_held_rcu_node(rnp); 1633 1632 1634 1633 /* 1635 1634 * If RCU is idle, we just wait for the next grace period. ··· 1676 1675 bool ret = false; 1677 1676 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1678 1677 1679 - lockdep_assert_held(&rnp->lock); 1678 + raw_lockdep_assert_held_rcu_node(rnp); 1680 1679 1681 1680 /* 1682 1681 * Pick up grace-period number for new callbacks. If this ··· 1804 1803 { 1805 1804 bool ret = false; 1806 1805 1807 - lockdep_assert_held(&rnp->lock); 1806 + raw_lockdep_assert_held_rcu_node(rnp); 1808 1807 1809 1808 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ 1810 1809 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) ··· 1844 1843 static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1845 1844 struct rcu_data *rdp) 1846 1845 { 1847 - lockdep_assert_held(&rnp->lock); 1846 + raw_lockdep_assert_held_rcu_node(rnp); 1848 1847 1849 1848 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ 1850 1849 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) ··· 1872 1871 bool ret; 1873 1872 bool need_gp; 1874 1873 1875 - lockdep_assert_held(&rnp->lock); 1874 + raw_lockdep_assert_held_rcu_node(rnp); 1876 1875 1877 1876 /* Handle the ends of any preceding grace periods first. */ 1878 1877 if (rdp->completed == rnp->completed && ··· 2297 2296 rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 2298 2297 struct rcu_data *rdp) 2299 2298 { 2300 - lockdep_assert_held(&rnp->lock); 2299 + raw_lockdep_assert_held_rcu_node(rnp); 2301 2300 if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { 2302 2301 /* 2303 2302 * Either we have not yet spawned the grace-period ··· 2359 2358 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 2360 2359 __releases(rcu_get_root(rsp)->lock) 2361 2360 { 2362 - lockdep_assert_held(&rcu_get_root(rsp)->lock); 2361 + raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp)); 2363 2362 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 2364 2363 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 2365 2364 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); ··· 2384 2383 unsigned long oldmask = 0; 2385 2384 struct rcu_node *rnp_c; 2386 2385 2387 - lockdep_assert_held(&rnp->lock); 2386 + raw_lockdep_assert_held_rcu_node(rnp); 2388 2387 2389 2388 /* Walk up the rcu_node hierarchy. */ 2390 2389 for (;;) { ··· 2448 2447 unsigned long mask; 2449 2448 struct rcu_node *rnp_p; 2450 2449 2451 - lockdep_assert_held(&rnp->lock); 2450 + raw_lockdep_assert_held_rcu_node(rnp); 2452 2451 if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || 2453 2452 rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 2454 2453 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); ··· 2593 2592 long mask; 2594 2593 struct rcu_node *rnp = rnp_leaf; 2595 2594 2596 - lockdep_assert_held(&rnp->lock); 2595 + raw_lockdep_assert_held_rcu_node(rnp); 2597 2596 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || 2598 2597 rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) 2599 2598 return; ··· 2692 2691 /* Update counts and requeue any remaining callbacks. */ 2693 2692 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl); 2694 2693 smp_mb(); /* List handling before counting for rcu_barrier(). */ 2695 - rdp->n_cbs_invoked += count; 2696 2694 rcu_segcblist_insert_count(&rdp->cblist, &rcl); 2697 2695 2698 2696 /* Reinstate batch limit if we have worked down the excess. */ ··· 2845 2845 !raw_spin_trylock(&rnp->fqslock); 2846 2846 if (rnp_old != NULL) 2847 2847 raw_spin_unlock(&rnp_old->fqslock); 2848 - if (ret) { 2849 - rsp->n_force_qs_lh++; 2848 + if (ret) 2850 2849 return; 2851 - } 2852 2850 rnp_old = rnp; 2853 2851 } 2854 2852 /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ ··· 2855 2857 raw_spin_lock_irqsave_rcu_node(rnp_old, flags); 2856 2858 raw_spin_unlock(&rnp_old->fqslock); 2857 2859 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 2858 - rsp->n_force_qs_lh++; 2859 2860 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); 2860 2861 return; /* Someone beat us to it. */ 2861 2862 } ··· 3352 3355 { 3353 3356 struct rcu_node *rnp = rdp->mynode; 3354 3357 3355 - rdp->n_rcu_pending++; 3356 - 3357 3358 /* Check for CPU stalls, if enabled. */ 3358 3359 check_cpu_stall(rsp, rdp); 3359 3360 ··· 3360 3365 return 0; 3361 3366 3362 3367 /* Is the RCU core waiting for a quiescent state from this CPU? */ 3363 - if (rcu_scheduler_fully_active && 3364 - rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && 3365 - rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) { 3366 - rdp->n_rp_core_needs_qs++; 3367 - } else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) { 3368 - rdp->n_rp_report_qs++; 3368 + if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) 3369 3369 return 1; 3370 - } 3371 3370 3372 3371 /* Does this CPU have callbacks ready to invoke? */ 3373 - if (rcu_segcblist_ready_cbs(&rdp->cblist)) { 3374 - rdp->n_rp_cb_ready++; 3372 + if (rcu_segcblist_ready_cbs(&rdp->cblist)) 3375 3373 return 1; 3376 - } 3377 3374 3378 3375 /* Has RCU gone idle with this CPU needing another grace period? */ 3379 - if (cpu_needs_another_gp(rsp, rdp)) { 3380 - rdp->n_rp_cpu_needs_gp++; 3376 + if (cpu_needs_another_gp(rsp, rdp)) 3381 3377 return 1; 3382 - } 3383 3378 3384 3379 /* Has another RCU grace period completed? */ 3385 - if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 3386 - rdp->n_rp_gp_completed++; 3380 + if (READ_ONCE(rnp->completed) != rdp->completed) /* outside lock */ 3387 3381 return 1; 3388 - } 3389 3382 3390 3383 /* Has a new RCU grace period started? */ 3391 3384 if (READ_ONCE(rnp->gpnum) != rdp->gpnum || 3392 - unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */ 3393 - rdp->n_rp_gp_started++; 3385 + unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */ 3394 3386 return 1; 3395 - } 3396 3387 3397 3388 /* Does this CPU need a deferred NOCB wakeup? */ 3398 - if (rcu_nocb_need_deferred_wakeup(rdp)) { 3399 - rdp->n_rp_nocb_defer_wakeup++; 3389 + if (rcu_nocb_need_deferred_wakeup(rdp)) 3400 3390 return 1; 3401 - } 3402 3391 3403 3392 /* nothing to do */ 3404 - rdp->n_rp_need_nothing++; 3405 3393 return 0; 3406 3394 } 3407 3395 ··· 3596 3618 long mask; 3597 3619 struct rcu_node *rnp = rnp_leaf; 3598 3620 3599 - lockdep_assert_held(&rnp->lock); 3621 + raw_lockdep_assert_held_rcu_node(rnp); 3600 3622 for (;;) { 3601 3623 mask = rnp->grpmask; 3602 3624 rnp = rnp->parent; ··· 3614 3636 static void __init 3615 3637 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 3616 3638 { 3617 - unsigned long flags; 3618 3639 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3619 - struct rcu_node *rnp = rcu_get_root(rsp); 3620 3640 3621 3641 /* Set up local state, ensuring consistent view of global state. */ 3622 - raw_spin_lock_irqsave_rcu_node(rnp, flags); 3623 3642 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); 3624 3643 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3625 3644 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); ··· 3624 3649 rdp->cpu = cpu; 3625 3650 rdp->rsp = rsp; 3626 3651 rcu_boot_init_nocb_percpu_data(rdp); 3627 - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3628 3652 } 3629 3653 3630 3654 /* ··· 4167 4193 pr_cont("\n"); 4168 4194 } 4169 4195 4196 + struct workqueue_struct *rcu_gp_wq; 4197 + 4170 4198 void __init rcu_init(void) 4171 4199 { 4172 4200 int cpu; ··· 4195 4219 rcu_cpu_starting(cpu); 4196 4220 rcutree_online_cpu(cpu); 4197 4221 } 4222 + 4223 + /* Create workqueue for expedited GPs and for Tree SRCU. */ 4224 + rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); 4225 + WARN_ON(!rcu_gp_wq); 4198 4226 } 4199 4227 4200 4228 #include "tree_exp.h"

+3 -33

kernel/rcu/tree.h

··· 146 146 /* boosting for this rcu_node structure. */ 147 147 unsigned int boost_kthread_status; 148 148 /* State of boost_kthread_task for tracing. */ 149 - unsigned long n_tasks_boosted; 150 - /* Total number of tasks boosted. */ 151 - unsigned long n_exp_boosts; 152 - /* Number of tasks boosted for expedited GP. */ 153 - unsigned long n_normal_boosts; 154 - /* Number of tasks boosted for normal GP. */ 155 149 #ifdef CONFIG_RCU_NOCB_CPU 156 150 struct swait_queue_head nocb_gp_wq[2]; 157 151 /* Place for rcu_nocb_kthread() to wait GP. */ ··· 178 184 u16 s; /* Set of bits, aggregate OR here. */ 179 185 }; 180 186 181 - /* Index values for nxttail array in struct rcu_data. */ 182 - #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 183 - #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ 184 - #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ 185 - #define RCU_NEXT_TAIL 3 186 - #define RCU_NEXT_SIZE 4 187 - 188 187 /* Per-CPU data for read-copy update. */ 189 188 struct rcu_data { 190 189 /* 1) quiescent-state and grace-period handling : */ ··· 204 217 /* different grace periods. */ 205 218 long qlen_last_fqs_check; 206 219 /* qlen at last check for QS forcing */ 207 - unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 208 - unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */ 209 220 unsigned long n_force_qs_snap; 210 221 /* did other CPU force QS recently? */ 211 222 long blimit; /* Upper limit on a processed batch */ ··· 219 234 /* Grace period that needs help */ 220 235 /* from cond_resched(). */ 221 236 222 - /* 5) __rcu_pending() statistics. */ 223 - unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 224 - unsigned long n_rp_core_needs_qs; 225 - unsigned long n_rp_report_qs; 226 - unsigned long n_rp_cb_ready; 227 - unsigned long n_rp_cpu_needs_gp; 228 - unsigned long n_rp_gp_completed; 229 - unsigned long n_rp_gp_started; 230 - unsigned long n_rp_nocb_defer_wakeup; 231 - unsigned long n_rp_need_nothing; 232 - 233 - /* 6) _rcu_barrier(), OOM callbacks, and expediting. */ 237 + /* 5) _rcu_barrier(), OOM callbacks, and expediting. */ 234 238 struct rcu_head barrier_head; 235 239 #ifdef CONFIG_RCU_FAST_NO_HZ 236 240 struct rcu_head oom_head; ··· 230 256 atomic_long_t exp_workdone3; /* # done by others #3. */ 231 257 int exp_dynticks_snap; /* Double-check need for IPI. */ 232 258 233 - /* 7) Callback offloading. */ 259 + /* 6) Callback offloading. */ 234 260 #ifdef CONFIG_RCU_NOCB_CPU 235 261 struct rcu_head *nocb_head; /* CBs waiting for kthread. */ 236 262 struct rcu_head **nocb_tail; ··· 257 283 /* Leader CPU takes GP-end wakeups. */ 258 284 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 259 285 260 - /* 8) RCU CPU stall data. */ 286 + /* 7) RCU CPU stall data. */ 261 287 unsigned int softirq_snap; /* Snapshot of softirq activity. */ 262 288 /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ 263 289 struct irq_work rcu_iw; /* Check for non-irq activity. */ ··· 348 374 /* kthreads, if configured. */ 349 375 unsigned long n_force_qs; /* Number of calls to */ 350 376 /* force_quiescent_state(). */ 351 - unsigned long n_force_qs_lh; /* ~Number of calls leaving */ 352 - /* due to lock unavailable. */ 353 - unsigned long n_force_qs_ngp; /* Number of calls leaving */ 354 - /* due to no GP active. */ 355 377 unsigned long gp_start; /* Time at which GP started, */ 356 378 /* but in jiffies. */ 357 379 unsigned long gp_activity; /* Time of last GP kthread */

+28 -8

kernel/rcu/tree_exp.h

··· 29 29 } 30 30 31 31 /* 32 + * Return then value that expedited-grace-period counter will have 33 + * at the end of the current grace period. 34 + */ 35 + static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp) 36 + { 37 + return rcu_seq_endval(&rsp->expedited_sequence); 38 + } 39 + 40 + /* 32 41 * Record the end of an expedited grace period. 33 42 */ 34 43 static void rcu_exp_gp_seq_end(struct rcu_state *rsp) ··· 375 366 int ret; 376 367 struct rcu_node *rnp; 377 368 369 + trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); 378 370 sync_exp_reset_tree(rsp); 371 + trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select")); 379 372 rcu_for_each_leaf_node(rsp, rnp) { 380 373 raw_spin_lock_irqsave_rcu_node(rnp, flags); 381 374 382 375 /* Each pass checks a CPU for identity, offline, and idle. */ 383 376 mask_ofl_test = 0; 384 - for_each_leaf_node_possible_cpu(rnp, cpu) { 377 + for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { 378 + unsigned long mask = leaf_node_cpu_bit(rnp, cpu); 385 379 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 380 + struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu); 381 + int snap; 386 382 387 - rdp->exp_dynticks_snap = 388 - rcu_dynticks_snap(rdp->dynticks); 389 383 if (raw_smp_processor_id() == cpu || 390 - rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) || 391 - !(rnp->qsmaskinitnext & rdp->grpmask)) 392 - mask_ofl_test |= rdp->grpmask; 384 + !(rnp->qsmaskinitnext & mask)) { 385 + mask_ofl_test |= mask; 386 + } else { 387 + snap = rcu_dynticks_snap(rdtp); 388 + if (rcu_dynticks_in_eqs(snap)) 389 + mask_ofl_test |= mask; 390 + else 391 + rdp->exp_dynticks_snap = snap; 392 + } 393 393 } 394 394 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; 395 395 ··· 412 394 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 413 395 414 396 /* IPI the remaining CPUs for expedited quiescent state. */ 415 - for_each_leaf_node_possible_cpu(rnp, cpu) { 397 + for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { 416 398 unsigned long mask = leaf_node_cpu_bit(rnp, cpu); 417 399 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 418 400 ··· 435 417 (rnp->expmask & mask)) { 436 418 /* Online, so delay for a bit and try again. */ 437 419 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 420 + trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl")); 438 421 schedule_timeout_uninterruptible(1); 439 422 goto retry_ipi; 440 423 } ··· 462 443 struct rcu_node *rnp_root = rcu_get_root(rsp); 463 444 int ret; 464 445 446 + trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait")); 465 447 jiffies_stall = rcu_jiffies_till_stall_check(); 466 448 jiffies_start = jiffies; 467 449 ··· 626 606 rew.rew_rsp = rsp; 627 607 rew.rew_s = s; 628 608 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); 629 - schedule_work(&rew.rew_work); 609 + queue_work(rcu_gp_wq, &rew.rew_work); 630 610 } 631 611 632 612 /* Wait for expedited grace period to complete. */

+22 -12

kernel/rcu/tree_plugin.h

··· 180 180 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0); 181 181 struct task_struct *t = current; 182 182 183 - lockdep_assert_held(&rnp->lock); 183 + raw_lockdep_assert_held_rcu_node(rnp); 184 184 WARN_ON_ONCE(rdp->mynode != rnp); 185 185 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); 186 186 ··· 560 560 } 561 561 t = list_entry(rnp->gp_tasks->prev, 562 562 struct task_struct, rcu_node_entry); 563 - list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 563 + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 564 + /* 565 + * We could be printing a lot while holding a spinlock. 566 + * Avoid triggering hard lockup. 567 + */ 568 + touch_nmi_watchdog(); 564 569 sched_show_task(t); 570 + } 565 571 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 566 572 } 567 573 ··· 963 957 * expedited grace period must boost all blocked tasks, including 964 958 * those blocking the pre-existing normal grace period. 965 959 */ 966 - if (rnp->exp_tasks != NULL) { 960 + if (rnp->exp_tasks != NULL) 967 961 tb = rnp->exp_tasks; 968 - rnp->n_exp_boosts++; 969 - } else { 962 + else 970 963 tb = rnp->boost_tasks; 971 - rnp->n_normal_boosts++; 972 - } 973 - rnp->n_tasks_boosted++; 974 964 975 965 /* 976 966 * We boost task t by manufacturing an rt_mutex that appears to ··· 1044 1042 { 1045 1043 struct task_struct *t; 1046 1044 1047 - lockdep_assert_held(&rnp->lock); 1045 + raw_lockdep_assert_held_rcu_node(rnp); 1048 1046 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1049 1047 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1050 1048 return; ··· 1679 1677 char *ticks_title; 1680 1678 unsigned long ticks_value; 1681 1679 1680 + /* 1681 + * We could be printing a lot while holding a spinlock. Avoid 1682 + * triggering hard lockup. 1683 + */ 1684 + touch_nmi_watchdog(); 1685 + 1682 1686 if (rsp->gpnum == rdp->gpnum) { 1683 1687 ticks_title = "ticks this GP"; 1684 1688 ticks_value = rdp->ticks_this_gp; ··· 2243 2235 smp_mb__before_atomic(); /* _add after CB invocation. */ 2244 2236 atomic_long_add(-c, &rdp->nocb_q_count); 2245 2237 atomic_long_add(-cl, &rdp->nocb_q_count_lazy); 2246 - rdp->n_nocbs_invoked += c; 2247 2238 } 2248 2239 return 0; 2249 2240 } ··· 2319 2312 cpumask_and(rcu_nocb_mask, cpu_possible_mask, 2320 2313 rcu_nocb_mask); 2321 2314 } 2322 - pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n", 2323 - cpumask_pr_args(rcu_nocb_mask)); 2315 + if (cpumask_empty(rcu_nocb_mask)) 2316 + pr_info("\tOffload RCU callbacks from CPUs: (none).\n"); 2317 + else 2318 + pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n", 2319 + cpumask_pr_args(rcu_nocb_mask)); 2324 2320 if (rcu_nocb_poll) 2325 2321 pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); 2326 2322

-10

kernel/time/Kconfig

··· 113 113 114 114 endchoice 115 115 116 - config NO_HZ_FULL_ALL 117 - bool "Full dynticks system on all CPUs by default (except CPU 0)" 118 - depends on NO_HZ_FULL 119 - help 120 - If the user doesn't pass the nohz_full boot option to 121 - define the range of full dynticks CPUs, consider that all 122 - CPUs in the system are full dynticks by default. 123 - Note the boot CPU will still be kept outside the range to 124 - handle the timekeeping duty. 125 - 126 116 config NO_HZ 127 117 bool "Old Idle dynticks config" 128 118 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS

+2 -20

kernel/time/tick-sched.c

··· 405 405 return 0; 406 406 } 407 407 408 - static int tick_nohz_init_all(void) 409 - { 410 - int err = -1; 411 - 412 - #ifdef CONFIG_NO_HZ_FULL_ALL 413 - if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { 414 - WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n"); 415 - return err; 416 - } 417 - err = 0; 418 - cpumask_setall(tick_nohz_full_mask); 419 - tick_nohz_full_running = true; 420 - #endif 421 - return err; 422 - } 423 - 424 408 void __init tick_nohz_init(void) 425 409 { 426 410 int cpu, ret; 427 411 428 - if (!tick_nohz_full_running) { 429 - if (tick_nohz_init_all() < 0) 430 - return; 431 - } 412 + if (!tick_nohz_full_running) 413 + return; 432 414 433 415 /* 434 416 * Full dynticks uses irq work to drive the tick rescheduling on safe

+15 -2

tools/testing/selftests/rcutorture/bin/functions.sh

··· 136 136 qemu-system-x86_64|qemu-system-i386) 137 137 echo arch/x86/boot/bzImage 138 138 ;; 139 + qemu-system-aarch64) 140 + echo arch/arm64/boot/Image 141 + ;; 139 142 *) 140 143 echo vmlinux 141 144 ;; ··· 161 158 elif echo $u | grep -q "Intel 80386" 162 159 then 163 160 echo qemu-system-i386 161 + elif echo $u | grep -q aarch64 162 + then 163 + echo qemu-system-aarch64 164 164 elif uname -a | grep -q ppc64 165 165 then 166 166 echo qemu-system-ppc64 ··· 182 176 # Output arguments for the qemu "-append" string based on CPU type 183 177 # and the TORTURE_QEMU_INTERACTIVE environment variable. 184 178 identify_qemu_append () { 179 + local console=ttyS0 185 180 case "$1" in 186 181 qemu-system-x86_64|qemu-system-i386) 187 182 echo noapic selinux=0 initcall_debug debug 183 + ;; 184 + qemu-system-aarch64) 185 + console=ttyAMA0 188 186 ;; 189 187 esac 190 188 if test -n "$TORTURE_QEMU_INTERACTIVE" 191 189 then 192 190 echo root=/dev/sda 193 191 else 194 - echo console=ttyS0 192 + echo console=$console 195 193 fi 196 194 } 197 195 ··· 206 196 identify_qemu_args () { 207 197 case "$1" in 208 198 qemu-system-x86_64|qemu-system-i386) 199 + ;; 200 + qemu-system-aarch64) 201 + echo -machine virt,gic-version=host -cpu host 209 202 ;; 210 203 qemu-system-ppc64) 211 204 echo -enable-kvm -M pseries -nodefaults ··· 267 254 echo $2 268 255 else 269 256 case "$1" in 270 - qemu-system-x86_64|qemu-system-i386) 257 + qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64) 271 258 echo $2 -smp $3 272 259 ;; 273 260 qemu-system-ppc64)

+6 -5

tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh

··· 39 39 tr -d '\015' | 40 40 awk ' 41 41 $8 == "start" { 42 - if (starttask != "") 42 + if (startseq != "") 43 43 nlost++; 44 44 starttask = $1; 45 45 starttime = $3; 46 46 startseq = $7; 47 + seqtask[startseq] = starttask; 47 48 } 48 49 49 50 $8 == "end" { 50 - if (starttask == $1 && startseq == $7) { 51 + if (startseq == $7) { 51 52 curgpdur = $3 - starttime; 52 53 gptimes[++n] = curgpdur; 53 54 gptaskcnt[starttask]++; 54 55 sum += curgpdur; 55 56 if (curgpdur > 1000) 56 57 print "Long GP " starttime "us to " $3 "us (" curgpdur "us)"; 57 - starttask = ""; 58 + startseq = ""; 58 59 } else { 59 60 # Lost a message or some such, reset. 60 - starttask = ""; 61 + startseq = ""; 61 62 nlost++; 62 63 } 63 64 } 64 65 65 - $8 == "done" { 66 + $8 == "done" && seqtask[$7] != $1 { 66 67 piggybackcnt[$1]++; 67 68 } 68 69

+2 -2

tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh

··· 177 177 exit 0 178 178 fi 179 179 echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log 180 - echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd 181 - ( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & 180 + echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd 181 + ( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & 182 182 commandcompleted=0 183 183 sleep 10 # Give qemu's pid a chance to reach the file 184 184 if test -s "$resdir/qemu_pid"

+17 -5

tools/testing/selftests/rcutorture/bin/kvm.sh

··· 1 1 #!/bin/bash 2 2 # 3 - # Run a series of 14 tests under KVM. These are not particularly 4 - # well-selected or well-tuned, but are the current set. 5 - # 6 - # Edit the definitions below to set the locations of the various directories, 7 - # as well as the test duration. 3 + # Run a series of tests under KVM. By default, this series is specified 4 + # by the relevant CFLIST file, but can be overridden by the --configs 5 + # command-line argument. 8 6 # 9 7 # Usage: kvm.sh [ options ] 10 8 # ··· 42 44 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD 43 45 TORTURE_KCONFIG_ARG="" 44 46 TORTURE_KMAKE_ARG="" 47 + TORTURE_QEMU_MEM=512 45 48 TORTURE_SHUTDOWN_GRACE=180 46 49 TORTURE_SUITE=rcu 47 50 resdir="" ··· 69 70 echo " --kconfig Kconfig-options" 70 71 echo " --kmake-arg kernel-make-arguments" 71 72 echo " --mac nn:nn:nn:nn:nn:nn" 73 + echo " --memory megabytes | nnnG" 72 74 echo " --no-initrd" 73 75 echo " --qemu-args qemu-arguments" 74 76 echo " --qemu-cmd qemu-system-..." ··· 147 147 TORTURE_QEMU_MAC=$2 148 148 shift 149 149 ;; 150 + --memory) 151 + checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error 152 + TORTURE_QEMU_MEM=$2 153 + shift 154 + ;; 150 155 --no-initrd) 151 156 TORTURE_INITRD=""; export TORTURE_INITRD 152 157 ;; ··· 179 174 checkarg --torture "(suite name)" "$#" "$2" '^$lock\|rcu\|rcuperf$$' '^--' 180 175 TORTURE_SUITE=$2 181 176 shift 177 + if test "$TORTURE_SUITE" = rcuperf 178 + then 179 + # If you really want jitter for rcuperf, specify 180 + # it after specifying rcuperf. (But why?) 181 + jitter=0 182 + fi 182 183 ;; 183 184 *) 184 185 echo Unknown argument $1 ··· 299 288 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD 300 289 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE 301 290 TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC 291 + TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM 302 292 TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE 303 293 TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE 304 294 if ! test -e $resdir

-1

tools/testing/selftests/rcutorture/configs/rcu/TASKS03

··· 9 9 CONFIG_HZ_PERIODIC=n 10 10 CONFIG_NO_HZ_IDLE=n 11 11 CONFIG_NO_HZ_FULL=y 12 - CONFIG_NO_HZ_FULL_ALL=y 13 12 #CHECK#CONFIG_RCU_EXPERT=n

+1 -1

tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot

··· 1 - rcutorture.torture_type=tasks 1 + rcutorture.torture_type=tasks nohz_full=1

-1

tools/testing/selftests/rcutorture/configs/rcu/TREE04

··· 7 7 CONFIG_HZ_PERIODIC=n 8 8 CONFIG_NO_HZ_IDLE=n 9 9 CONFIG_NO_HZ_FULL=y 10 - CONFIG_NO_HZ_FULL_ALL=y 11 10 CONFIG_RCU_FAST_NO_HZ=y 12 11 CONFIG_RCU_TRACE=y 13 12 CONFIG_HOTPLUG_CPU=n

+1 -1

tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot

··· 1 - rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 1 + rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7

-1

tools/testing/selftests/rcutorture/configs/rcu/TREE07

··· 7 7 CONFIG_HZ_PERIODIC=n 8 8 CONFIG_NO_HZ_IDLE=n 9 9 CONFIG_NO_HZ_FULL=y 10 - CONFIG_NO_HZ_FULL_ALL=n 11 10 CONFIG_RCU_FAST_NO_HZ=n 12 11 CONFIG_RCU_TRACE=y 13 12 CONFIG_HOTPLUG_CPU=y

+1 -23

tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh

··· 20 20 # 21 21 # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 22 23 - # rcuperf_param_nreaders bootparam-string 24 - # 25 - # Adds nreaders rcuperf module parameter if not already specified. 26 - rcuperf_param_nreaders () { 27 - if ! echo "$1" | grep -q "rcuperf.nreaders" 28 - then 29 - echo rcuperf.nreaders=-1 30 - fi 31 - } 32 - 33 - # rcuperf_param_nwriters bootparam-string 34 - # 35 - # Adds nwriters rcuperf module parameter if not already specified. 36 - rcuperf_param_nwriters () { 37 - if ! echo "$1" | grep -q "rcuperf.nwriters" 38 - then 39 - echo rcuperf.nwriters=-1 40 - fi 41 - } 42 - 43 23 # per_version_boot_params bootparam-string config-file seconds 44 24 # 45 25 # Adds per-version torture-module parameters to kernels supporting them. 46 26 per_version_boot_params () { 47 - echo $1 `rcuperf_param_nreaders "$1"` \ 48 - `rcuperf_param_nwriters "$1"` \ 49 - rcuperf.shutdown=1 \ 27 + echo $1 rcuperf.shutdown=1 \ 50 28 rcuperf.verbose=1 51 29 }

+1 -1

tools/testing/selftests/rcutorture/doc/rcu-test-image.txt

··· 1 - This document describes one way to created the rcu-test-image file 1 + This document describes one way to create the rcu-test-image file 2 2 that contains the filesystem used by the guest-OS kernel. There are 3 3 probably much better ways of doing this, and this filesystem could no 4 4 doubt be smaller. It is probably also possible to simply download