Merge branches 'doc.2023.05.10a', 'fixes.2023.05.11a', 'kvfree.2023.05.10a', 'nocb.2023.05.11a', 'rcu-tasks.2023.05.10a', 'torture.2023.05.15a' and 'rcu-urgent.2023.06.06a' into HEAD

+1 -35

Documentation/RCU/Design/Requirements/Requirements.rst

··· 2071 2071 2072 2072 Because RCU avoids interrupting idle CPUs, it is illegal to execute an 2073 2073 RCU read-side critical section on an idle CPU. (Kernels built with 2074 - ``CONFIG_PROVE_RCU=y`` will splat if you try it.) The RCU_NONIDLE() 2075 - macro and ``_rcuidle`` event tracing is provided to work around this 2076 - restriction. In addition, rcu_is_watching() may be used to test 2077 - whether or not it is currently legal to run RCU read-side critical 2078 - sections on this CPU. I learned of the need for diagnostics on the one 2079 - hand and RCU_NONIDLE() on the other while inspecting idle-loop code. 2080 - Steven Rostedt supplied ``_rcuidle`` event tracing, which is used quite 2081 - heavily in the idle loop. However, there are some restrictions on the 2082 - code placed within RCU_NONIDLE(): 2083 - 2084 - #. Blocking is prohibited. In practice, this is not a serious 2085 - restriction given that idle tasks are prohibited from blocking to 2086 - begin with. 2087 - #. Although nesting RCU_NONIDLE() is permitted, they cannot nest 2088 - indefinitely deeply. However, given that they can be nested on the 2089 - order of a million deep, even on 32-bit systems, this should not be a 2090 - serious restriction. This nesting limit would probably be reached 2091 - long after the compiler OOMed or the stack overflowed. 2092 - #. Any code path that enters RCU_NONIDLE() must sequence out of that 2093 - same RCU_NONIDLE(). For example, the following is grossly 2094 - illegal: 2095 - 2096 - :: 2097 - 2098 - 1 RCU_NONIDLE({ 2099 - 2 do_something(); 2100 - 3 goto bad_idea; /* BUG!!! */ 2101 - 4 do_something_else();}); 2102 - 5 bad_idea: 2103 - 2104 - 2105 - It is just as illegal to transfer control into the middle of 2106 - RCU_NONIDLE()'s argument. Yes, in theory, you could transfer in 2107 - as long as you also transferred out, but in practice you could also 2108 - expect to get sharply worded review comments. 2074 + ``CONFIG_PROVE_RCU=y`` will splat if you try it.) 2109 2075 2110 2076 It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU 2111 2077 running in userspace. RCU must therefore track ``nohz_full`` userspace

-1

Documentation/RCU/whatisRCU.rst

··· 1117 1117 1118 1118 RCU_LOCKDEP_WARN 1119 1119 rcu_sleep_check 1120 - RCU_NONIDLE 1121 1120 1122 1121 All: Unchecked RCU-protected pointer access:: 1123 1122

+11 -2

Documentation/admin-guide/kernel-parameters.txt

··· 5094 5094 5095 5095 rcutorture.stall_cpu_block= [KNL] 5096 5096 Sleep while stalling if set. This will result 5097 - in warnings from preemptible RCU in addition 5098 - to any other stall-related activity. 5097 + in warnings from preemptible RCU in addition to 5098 + any other stall-related activity. Note that 5099 + in kernels built with CONFIG_PREEMPTION=n and 5100 + CONFIG_PREEMPT_COUNT=y, this parameter will 5101 + cause the CPU to pass through a quiescent state. 5102 + Given CONFIG_PREEMPTION=n, this will suppress 5103 + RCU CPU stall warnings, but will instead result 5104 + in scheduling-while-atomic splats. 5105 + 5106 + Use of this module parameter results in splats. 5107 + 5099 5108 5100 5109 rcutorture.stall_cpu_holdoff= [KNL] 5101 5110 Time to wait (s) after boot before inducing stall.

+10

include/linux/notifier.h

··· 106 106 #define RAW_NOTIFIER_INIT(name) { \ 107 107 .head = NULL } 108 108 109 + #ifdef CONFIG_TREE_SRCU 110 + #define SRCU_NOTIFIER_INIT(name, pcpu) \ 111 + { \ 112 + .mutex = __MUTEX_INITIALIZER(name.mutex), \ 113 + .head = NULL, \ 114 + .srcuu = __SRCU_USAGE_INIT(name.srcuu), \ 115 + .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ 116 + } 117 + #else 109 118 #define SRCU_NOTIFIER_INIT(name, pcpu) \ 110 119 { \ 111 120 .mutex = __MUTEX_INITIALIZER(name.mutex), \ 112 121 .head = NULL, \ 113 122 .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ 114 123 } 124 + #endif 115 125 116 126 #define ATOMIC_NOTIFIER_HEAD(name) \ 117 127 struct atomic_notifier_head name = \

+8 -46

include/linux/rcupdate.h

··· 156 156 static inline void rcu_nocb_flush_deferred_wakeup(void) { } 157 157 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 158 158 159 - /** 160 - * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers 161 - * @a: Code that RCU needs to pay attention to. 162 - * 163 - * RCU read-side critical sections are forbidden in the inner idle loop, 164 - * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU 165 - * will happily ignore any such read-side critical sections. However, 166 - * things like powertop need tracepoints in the inner idle loop. 167 - * 168 - * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) 169 - * will tell RCU that it needs to pay attention, invoke its argument 170 - * (in this example, calling the do_something_with_RCU() function), 171 - * and then tell RCU to go back to ignoring this CPU. It is permissible 172 - * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is 173 - * on the order of a million or so, even on 32-bit systems). It is 174 - * not legal to block within RCU_NONIDLE(), nor is it permissible to 175 - * transfer control either into or out of RCU_NONIDLE()'s statement. 176 - */ 177 - #define RCU_NONIDLE(a) \ 178 - do { \ 179 - ct_irq_enter_irqson(); \ 180 - do { a; } while (0); \ 181 - ct_irq_exit_irqson(); \ 182 - } while (0) 183 - 184 159 /* 185 160 * Note a quasi-voluntary context switch for RCU-tasks's benefit. 186 161 * This is a macro rather than an inline function to avoid #include hell. ··· 932 957 933 958 /** 934 959 * kfree_rcu() - kfree an object after a grace period. 935 - * @ptr: pointer to kfree for both single- and double-argument invocations. 936 - * @rhf: the name of the struct rcu_head within the type of @ptr, 937 - * but only for double-argument invocations. 960 + * @ptr: pointer to kfree for double-argument invocations. 961 + * @rhf: the name of the struct rcu_head within the type of @ptr. 938 962 * 939 963 * Many rcu callbacks functions just call kfree() on the base structure. 940 964 * These functions are trivial, but their size adds up, and furthermore ··· 958 984 * The BUILD_BUG_ON check must not involve any function calls, hence the 959 985 * checks are done in macros here. 960 986 */ 961 - #define kfree_rcu(ptr, rhf...) kvfree_rcu(ptr, ## rhf) 987 + #define kfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf) 988 + #define kvfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf) 962 989 963 990 /** 964 - * kvfree_rcu() - kvfree an object after a grace period. 965 - * 966 - * This macro consists of one or two arguments and it is 967 - * based on whether an object is head-less or not. If it 968 - * has a head then a semantic stays the same as it used 969 - * to be before: 970 - * 971 - * kvfree_rcu(ptr, rhf); 972 - * 973 - * where @ptr is a pointer to kvfree(), @rhf is the name 974 - * of the rcu_head structure within the type of @ptr. 991 + * kfree_rcu_mightsleep() - kfree an object after a grace period. 992 + * @ptr: pointer to kfree for single-argument invocations. 975 993 * 976 994 * When it comes to head-less variant, only one argument 977 995 * is passed and that is just a pointer which has to be 978 996 * freed after a grace period. Therefore the semantic is 979 997 * 980 - * kvfree_rcu(ptr); 998 + * kfree_rcu_mightsleep(ptr); 981 999 * 982 1000 * where @ptr is the pointer to be freed by kvfree(). 983 1001 * ··· 978 1012 * annotation. Otherwise, please switch and embed the 979 1013 * rcu_head structure within the type of @ptr. 980 1014 */ 981 - #define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \ 982 - kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__) 983 - 1015 + #define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) 984 1016 #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) 985 - #define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr) 986 1017 987 - #define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME 988 1018 #define kvfree_rcu_arg_2(ptr, rhf) \ 989 1019 do { \ 990 1020 typeof (ptr) ___p = (ptr); \

+4 -4

include/linux/srcu.h

··· 212 212 213 213 srcu_check_nmi_safety(ssp, false); 214 214 retval = __srcu_read_lock(ssp); 215 - srcu_lock_acquire(&(ssp)->dep_map); 215 + srcu_lock_acquire(&ssp->dep_map); 216 216 return retval; 217 217 } 218 218 ··· 229 229 230 230 srcu_check_nmi_safety(ssp, true); 231 231 retval = __srcu_read_lock_nmisafe(ssp); 232 - rcu_lock_acquire(&(ssp)->dep_map); 232 + rcu_lock_acquire(&ssp->dep_map); 233 233 return retval; 234 234 } 235 235 ··· 284 284 { 285 285 WARN_ON_ONCE(idx & ~0x1); 286 286 srcu_check_nmi_safety(ssp, false); 287 - srcu_lock_release(&(ssp)->dep_map); 287 + srcu_lock_release(&ssp->dep_map); 288 288 __srcu_read_unlock(ssp, idx); 289 289 } 290 290 ··· 300 300 { 301 301 WARN_ON_ONCE(idx & ~0x1); 302 302 srcu_check_nmi_safety(ssp, true); 303 - rcu_lock_release(&(ssp)->dep_map); 303 + rcu_lock_release(&ssp->dep_map); 304 304 __srcu_read_unlock_nmisafe(ssp, idx); 305 305 } 306 306

+22 -29

kernel/locking/locktorture.c

··· 33 33 MODULE_LICENSE("GPL"); 34 34 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); 35 35 36 - torture_param(int, nwriters_stress, -1, 37 - "Number of write-locking stress-test threads"); 38 - torture_param(int, nreaders_stress, -1, 39 - "Number of read-locking stress-test threads"); 36 + torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads"); 37 + torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads"); 38 + torture_param(int, long_hold, 100, "Do occasional long hold of lock (ms), 0=disable"); 40 39 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); 41 - torture_param(int, onoff_interval, 0, 42 - "Time between CPU hotplugs (s), 0=disable"); 43 - torture_param(int, shuffle_interval, 3, 44 - "Number of jiffies between shuffles, 0=disable"); 40 + torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable"); 41 + torture_param(int, shuffle_interval, 3, "Number of jiffies between shuffles, 0=disable"); 45 42 torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable."); 46 - torture_param(int, stat_interval, 60, 47 - "Number of seconds between stats printk()s"); 43 + torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s"); 48 44 torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); 49 45 torture_param(int, rt_boost, 2, 50 - "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types."); 46 + "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types."); 51 47 torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens."); 52 - torture_param(int, verbose, 1, 53 - "Enable verbose debugging printk()s"); 48 + torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); 54 49 torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)"); 55 50 /* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */ 56 51 #define MAX_NESTED_LOCKS 8 ··· 115 120 116 121 static void torture_lock_busted_write_delay(struct torture_random_state *trsp) 117 122 { 118 - const unsigned long longdelay_ms = 100; 123 + const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; 119 124 120 125 /* We want a long delay occasionally to force massive contention. */ 121 126 if (!(torture_random(trsp) % ··· 193 198 static void torture_spin_lock_write_delay(struct torture_random_state *trsp) 194 199 { 195 200 const unsigned long shortdelay_us = 2; 196 - const unsigned long longdelay_ms = 100; 201 + const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; 202 + unsigned long j; 197 203 198 204 /* We want a short delay mostly to emulate likely code, and 199 205 * we want a long delay occasionally to force massive contention. 200 206 */ 201 - if (!(torture_random(trsp) % 202 - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) 207 + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) { 208 + j = jiffies; 203 209 mdelay(longdelay_ms); 204 - if (!(torture_random(trsp) % 205 - (cxt.nrealwriters_stress * 2 * shortdelay_us))) 210 + pr_alert("%s: delay = %lu jiffies.\n", __func__, jiffies - j); 211 + } 212 + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 200 * shortdelay_us))) 206 213 udelay(shortdelay_us); 207 214 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) 208 215 torture_preempt_schedule(); /* Allow test to be preempted. */ ··· 319 322 static void torture_rwlock_write_delay(struct torture_random_state *trsp) 320 323 { 321 324 const unsigned long shortdelay_us = 2; 322 - const unsigned long longdelay_ms = 100; 325 + const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; 323 326 324 327 /* We want a short delay mostly to emulate likely code, and 325 328 * we want a long delay occasionally to force massive contention. ··· 452 455 453 456 static void torture_mutex_delay(struct torture_random_state *trsp) 454 457 { 455 - const unsigned long longdelay_ms = 100; 458 + const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; 456 459 457 460 /* We want a long delay occasionally to force massive contention. */ 458 461 if (!(torture_random(trsp) % 459 462 (cxt.nrealwriters_stress * 2000 * longdelay_ms))) 460 463 mdelay(longdelay_ms * 5); 461 - else 462 - mdelay(longdelay_ms / 5); 463 464 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) 464 465 torture_preempt_schedule(); /* Allow test to be preempted. */ 465 466 } ··· 625 630 static void torture_rtmutex_delay(struct torture_random_state *trsp) 626 631 { 627 632 const unsigned long shortdelay_us = 2; 628 - const unsigned long longdelay_ms = 100; 633 + const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; 629 634 630 635 /* 631 636 * We want a short delay mostly to emulate likely code, and ··· 635 640 (cxt.nrealwriters_stress * 2000 * longdelay_ms))) 636 641 mdelay(longdelay_ms); 637 642 if (!(torture_random(trsp) % 638 - (cxt.nrealwriters_stress * 2 * shortdelay_us))) 643 + (cxt.nrealwriters_stress * 200 * shortdelay_us))) 639 644 udelay(shortdelay_us); 640 645 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) 641 646 torture_preempt_schedule(); /* Allow test to be preempted. */ ··· 690 695 691 696 static void torture_rwsem_write_delay(struct torture_random_state *trsp) 692 697 { 693 - const unsigned long longdelay_ms = 100; 698 + const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; 694 699 695 700 /* We want a long delay occasionally to force massive contention. */ 696 701 if (!(torture_random(trsp) % 697 702 (cxt.nrealwriters_stress * 2000 * longdelay_ms))) 698 703 mdelay(longdelay_ms * 10); 699 - else 700 - mdelay(longdelay_ms / 10); 701 704 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) 702 705 torture_preempt_schedule(); /* Allow test to be preempted. */ 703 706 } ··· 841 848 842 849 lwsp->n_lock_acquired++; 843 850 } 844 - cxt.cur_ops->write_delay(&rand); 845 851 if (!skip_main_lock) { 852 + cxt.cur_ops->write_delay(&rand); 846 853 lock_is_write_held = false; 847 854 WRITE_ONCE(last_lock_release, jiffies); 848 855 cxt.cur_ops->writeunlock(tid);

+18

kernel/rcu/Kconfig

··· 314 314 To save power, batch RCU callbacks and flush after delay, memory 315 315 pressure, or callback list growing too big. 316 316 317 + config RCU_DOUBLE_CHECK_CB_TIME 318 + bool "RCU callback-batch backup time check" 319 + depends on RCU_EXPERT 320 + default n 321 + help 322 + Use this option to provide more precise enforcement of the 323 + rcutree.rcu_resched_ns module parameter in situations where 324 + a single RCU callback might run for hundreds of microseconds, 325 + thus defeating the 32-callback batching used to amortize the 326 + cost of the fine-grained but expensive local_clock() function. 327 + 328 + This option rounds rcutree.rcu_resched_ns up to the next 329 + jiffy, and overrides the 32-callback batching if this limit 330 + is exceeded. 331 + 332 + Say Y here if you need tighter callback-limit enforcement. 333 + Say N here if you are unsure. 334 + 317 335 endmenu # "RCU Subsystem"

+6

kernel/rcu/rcu.h

··· 642 642 static inline void show_rcu_tasks_trace_gp_kthread(void) {} 643 643 #endif 644 644 645 + #ifdef CONFIG_TINY_RCU 646 + static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; } 647 + #else 648 + bool rcu_cpu_beenfullyonline(int cpu); 649 + #endif 650 + 645 651 #endif /* __LINUX_RCU_H */

+102 -97

kernel/rcu/rcuscale.c

··· 522 522 scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown); 523 523 } 524 524 525 - static void 526 - rcu_scale_cleanup(void) 527 - { 528 - int i; 529 - int j; 530 - int ngps = 0; 531 - u64 *wdp; 532 - u64 *wdpp; 533 - 534 - /* 535 - * Would like warning at start, but everything is expedited 536 - * during the mid-boot phase, so have to wait till the end. 537 - */ 538 - if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) 539 - SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); 540 - if (rcu_gp_is_normal() && gp_exp) 541 - SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); 542 - if (gp_exp && gp_async) 543 - SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); 544 - 545 - if (torture_cleanup_begin()) 546 - return; 547 - if (!cur_ops) { 548 - torture_cleanup_end(); 549 - return; 550 - } 551 - 552 - if (reader_tasks) { 553 - for (i = 0; i < nrealreaders; i++) 554 - torture_stop_kthread(rcu_scale_reader, 555 - reader_tasks[i]); 556 - kfree(reader_tasks); 557 - } 558 - 559 - if (writer_tasks) { 560 - for (i = 0; i < nrealwriters; i++) { 561 - torture_stop_kthread(rcu_scale_writer, 562 - writer_tasks[i]); 563 - if (!writer_n_durations) 564 - continue; 565 - j = writer_n_durations[i]; 566 - pr_alert("%s%s writer %d gps: %d\n", 567 - scale_type, SCALE_FLAG, i, j); 568 - ngps += j; 569 - } 570 - pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", 571 - scale_type, SCALE_FLAG, 572 - t_rcu_scale_writer_started, t_rcu_scale_writer_finished, 573 - t_rcu_scale_writer_finished - 574 - t_rcu_scale_writer_started, 575 - ngps, 576 - rcuscale_seq_diff(b_rcu_gp_test_finished, 577 - b_rcu_gp_test_started)); 578 - for (i = 0; i < nrealwriters; i++) { 579 - if (!writer_durations) 580 - break; 581 - if (!writer_n_durations) 582 - continue; 583 - wdpp = writer_durations[i]; 584 - if (!wdpp) 585 - continue; 586 - for (j = 0; j < writer_n_durations[i]; j++) { 587 - wdp = &wdpp[j]; 588 - pr_alert("%s%s %4d writer-duration: %5d %llu\n", 589 - scale_type, SCALE_FLAG, 590 - i, j, *wdp); 591 - if (j % 100 == 0) 592 - schedule_timeout_uninterruptible(1); 593 - } 594 - kfree(writer_durations[i]); 595 - } 596 - kfree(writer_tasks); 597 - kfree(writer_durations); 598 - kfree(writer_n_durations); 599 - } 600 - 601 - /* Do torture-type-specific cleanup operations. */ 602 - if (cur_ops->cleanup != NULL) 603 - cur_ops->cleanup(); 604 - 605 - torture_cleanup_end(); 606 - } 607 - 608 525 /* 609 526 * Return the number if non-negative. If -1, the number of CPUs. 610 527 * If less than -1, that much less than the number of CPUs, but ··· 539 622 nr = 1; 540 623 } 541 624 return nr; 542 - } 543 - 544 - /* 545 - * RCU scalability shutdown kthread. Just waits to be awakened, then shuts 546 - * down system. 547 - */ 548 - static int 549 - rcu_scale_shutdown(void *arg) 550 - { 551 - wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); 552 - smp_mb(); /* Wake before output. */ 553 - rcu_scale_cleanup(); 554 - kernel_power_off(); 555 - return -EINVAL; 556 625 } 557 626 558 627 /* ··· 775 872 torture_init_end(); 776 873 kfree_scale_cleanup(); 777 874 return firsterr; 875 + } 876 + 877 + static void 878 + rcu_scale_cleanup(void) 879 + { 880 + int i; 881 + int j; 882 + int ngps = 0; 883 + u64 *wdp; 884 + u64 *wdpp; 885 + 886 + /* 887 + * Would like warning at start, but everything is expedited 888 + * during the mid-boot phase, so have to wait till the end. 889 + */ 890 + if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) 891 + SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); 892 + if (rcu_gp_is_normal() && gp_exp) 893 + SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); 894 + if (gp_exp && gp_async) 895 + SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); 896 + 897 + if (kfree_rcu_test) { 898 + kfree_scale_cleanup(); 899 + return; 900 + } 901 + 902 + if (torture_cleanup_begin()) 903 + return; 904 + if (!cur_ops) { 905 + torture_cleanup_end(); 906 + return; 907 + } 908 + 909 + if (reader_tasks) { 910 + for (i = 0; i < nrealreaders; i++) 911 + torture_stop_kthread(rcu_scale_reader, 912 + reader_tasks[i]); 913 + kfree(reader_tasks); 914 + } 915 + 916 + if (writer_tasks) { 917 + for (i = 0; i < nrealwriters; i++) { 918 + torture_stop_kthread(rcu_scale_writer, 919 + writer_tasks[i]); 920 + if (!writer_n_durations) 921 + continue; 922 + j = writer_n_durations[i]; 923 + pr_alert("%s%s writer %d gps: %d\n", 924 + scale_type, SCALE_FLAG, i, j); 925 + ngps += j; 926 + } 927 + pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", 928 + scale_type, SCALE_FLAG, 929 + t_rcu_scale_writer_started, t_rcu_scale_writer_finished, 930 + t_rcu_scale_writer_finished - 931 + t_rcu_scale_writer_started, 932 + ngps, 933 + rcuscale_seq_diff(b_rcu_gp_test_finished, 934 + b_rcu_gp_test_started)); 935 + for (i = 0; i < nrealwriters; i++) { 936 + if (!writer_durations) 937 + break; 938 + if (!writer_n_durations) 939 + continue; 940 + wdpp = writer_durations[i]; 941 + if (!wdpp) 942 + continue; 943 + for (j = 0; j < writer_n_durations[i]; j++) { 944 + wdp = &wdpp[j]; 945 + pr_alert("%s%s %4d writer-duration: %5d %llu\n", 946 + scale_type, SCALE_FLAG, 947 + i, j, *wdp); 948 + if (j % 100 == 0) 949 + schedule_timeout_uninterruptible(1); 950 + } 951 + kfree(writer_durations[i]); 952 + } 953 + kfree(writer_tasks); 954 + kfree(writer_durations); 955 + kfree(writer_n_durations); 956 + } 957 + 958 + /* Do torture-type-specific cleanup operations. */ 959 + if (cur_ops->cleanup != NULL) 960 + cur_ops->cleanup(); 961 + 962 + torture_cleanup_end(); 963 + } 964 + 965 + /* 966 + * RCU scalability shutdown kthread. Just waits to be awakened, then shuts 967 + * down system. 968 + */ 969 + static int 970 + rcu_scale_shutdown(void *arg) 971 + { 972 + wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); 973 + smp_mb(); /* Wake before output. */ 974 + rcu_scale_cleanup(); 975 + kernel_power_off(); 976 + return -EINVAL; 778 977 } 779 978 780 979 static int __init

+8 -4

kernel/rcu/tasks.h

··· 241 241 if (rcu_task_enqueue_lim < 0) { 242 242 rcu_task_enqueue_lim = 1; 243 243 rcu_task_cb_adjust = true; 244 - pr_info("%s: Setting adjustable number of callback queues.\n", __func__); 245 244 } else if (rcu_task_enqueue_lim == 0) { 246 245 rcu_task_enqueue_lim = 1; 247 246 } ··· 271 272 raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. 272 273 } 273 274 raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); 274 - pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim)); 275 + 276 + pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name, 277 + data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust); 275 278 } 276 279 277 280 // IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). ··· 464 463 { 465 464 int cpu; 466 465 int cpunext; 466 + int cpuwq; 467 467 unsigned long flags; 468 468 int len; 469 469 struct rcu_head *rhp; ··· 475 473 cpunext = cpu * 2 + 1; 476 474 if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { 477 475 rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); 478 - queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); 476 + cpuwq = rcu_cpu_beenfullyonline(cpunext) ? cpunext : WORK_CPU_UNBOUND; 477 + queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work); 479 478 cpunext++; 480 479 if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { 481 480 rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); 482 - queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); 481 + cpuwq = rcu_cpu_beenfullyonline(cpunext) ? cpunext : WORK_CPU_UNBOUND; 482 + queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work); 483 483 } 484 484 } 485 485

+86 -41

kernel/rcu/tree.c

··· 2046 2046 rcu_report_qs_rdp(rdp); 2047 2047 } 2048 2048 2049 + /* Return true if callback-invocation time limit exceeded. */ 2050 + static bool rcu_do_batch_check_time(long count, long tlimit, 2051 + bool jlimit_check, unsigned long jlimit) 2052 + { 2053 + // Invoke local_clock() only once per 32 consecutive callbacks. 2054 + return unlikely(tlimit) && 2055 + (!likely(count & 31) || 2056 + (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) && 2057 + jlimit_check && time_after(jiffies, jlimit))) && 2058 + local_clock() >= tlimit; 2059 + } 2060 + 2049 2061 /* 2050 2062 * Invoke any RCU callbacks that have made it to the end of their grace 2051 2063 * period. Throttle as specified by rdp->blimit. 2052 2064 */ 2053 2065 static void rcu_do_batch(struct rcu_data *rdp) 2054 2066 { 2067 + long bl; 2068 + long count = 0; 2055 2069 int div; 2056 2070 bool __maybe_unused empty; 2057 2071 unsigned long flags; 2058 - struct rcu_head *rhp; 2072 + unsigned long jlimit; 2073 + bool jlimit_check = false; 2074 + long pending; 2059 2075 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); 2060 - long bl, count = 0; 2061 - long pending, tlimit = 0; 2076 + struct rcu_head *rhp; 2077 + long tlimit = 0; 2062 2078 2063 2079 /* If no callbacks are ready, just return. */ 2064 2080 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { ··· 2098 2082 div = READ_ONCE(rcu_divisor); 2099 2083 div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; 2100 2084 bl = max(rdp->blimit, pending >> div); 2101 - if (in_serving_softirq() && unlikely(bl > 100)) { 2085 + if ((in_serving_softirq() || rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING) && 2086 + (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) || unlikely(bl > 100))) { 2087 + const long npj = NSEC_PER_SEC / HZ; 2102 2088 long rrn = READ_ONCE(rcu_resched_ns); 2103 2089 2104 2090 rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn; 2105 2091 tlimit = local_clock() + rrn; 2092 + jlimit = jiffies + (rrn + npj + 1) / npj; 2093 + jlimit_check = true; 2106 2094 } 2107 2095 trace_rcu_batch_start(rcu_state.name, 2108 2096 rcu_segcblist_n_cbs(&rdp->cblist), bl); ··· 2146 2126 * Make sure we don't spend too much time here and deprive other 2147 2127 * softirq vectors of CPU cycles. 2148 2128 */ 2149 - if (unlikely(tlimit)) { 2150 - /* only call local_clock() every 32 callbacks */ 2151 - if (likely((count & 31) || local_clock() < tlimit)) 2152 - continue; 2153 - /* Exceeded the time limit, so leave. */ 2129 + if (rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit)) 2154 2130 break; 2155 - } 2156 2131 } else { 2157 - // In rcuoc context, so no worries about depriving 2158 - // other softirq vectors of CPU cycles. 2132 + // In rcuc/rcuoc context, so no worries about 2133 + // depriving other softirq vectors of CPU cycles. 2159 2134 local_bh_enable(); 2160 2135 lockdep_assert_irqs_enabled(); 2161 2136 cond_resched_tasks_rcu_qs(); 2162 2137 lockdep_assert_irqs_enabled(); 2163 2138 local_bh_disable(); 2139 + // But rcuc kthreads can delay quiescent-state 2140 + // reporting, so check time limits for them. 2141 + if (rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING && 2142 + rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit)) { 2143 + rdp->rcu_cpu_has_work = 1; 2144 + break; 2145 + } 2164 2146 } 2165 2147 } 2166 2148 ··· 2481 2459 *statusp = RCU_KTHREAD_RUNNING; 2482 2460 local_irq_disable(); 2483 2461 work = *workp; 2484 - *workp = 0; 2462 + WRITE_ONCE(*workp, 0); 2485 2463 local_irq_enable(); 2486 2464 if (work) 2487 2465 rcu_core(); 2488 2466 local_bh_enable(); 2489 - if (*workp == 0) { 2467 + if (!READ_ONCE(*workp)) { 2490 2468 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); 2491 2469 *statusp = RCU_KTHREAD_WAITING; 2492 2470 return; ··· 2778 2756 */ 2779 2757 struct kvfree_rcu_bulk_data { 2780 2758 struct list_head list; 2781 - unsigned long gp_snap; 2759 + struct rcu_gp_oldstate gp_snap; 2782 2760 unsigned long nr_records; 2783 2761 void *records[]; 2784 2762 }; ··· 2795 2773 * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests 2796 2774 * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period 2797 2775 * @head_free: List of kfree_rcu() objects waiting for a grace period 2776 + * @head_free_gp_snap: Grace-period snapshot to check for attempted premature frees. 2798 2777 * @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period 2799 2778 * @krcp: Pointer to @kfree_rcu_cpu structure 2800 2779 */ ··· 2803 2780 struct kfree_rcu_cpu_work { 2804 2781 struct rcu_work rcu_work; 2805 2782 struct rcu_head *head_free; 2783 + struct rcu_gp_oldstate head_free_gp_snap; 2806 2784 struct list_head bulk_head_free[FREE_N_CHANNELS]; 2807 2785 struct kfree_rcu_cpu *krcp; 2808 2786 }; ··· 2924 2900 struct llist_node *page_list, *pos, *n; 2925 2901 int freed = 0; 2926 2902 2903 + if (!rcu_min_cached_objs) 2904 + return 0; 2905 + 2927 2906 raw_spin_lock_irqsave(&krcp->lock, flags); 2928 2907 page_list = llist_del_all(&krcp->bkvcache); 2929 2908 WRITE_ONCE(krcp->nr_bkv_objs, 0); ··· 2947 2920 unsigned long flags; 2948 2921 int i; 2949 2922 2950 - debug_rcu_bhead_unqueue(bnode); 2923 + if (!WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&bnode->gp_snap))) { 2924 + debug_rcu_bhead_unqueue(bnode); 2925 + rcu_lock_acquire(&rcu_callback_map); 2926 + if (idx == 0) { // kmalloc() / kfree(). 2927 + trace_rcu_invoke_kfree_bulk_callback( 2928 + rcu_state.name, bnode->nr_records, 2929 + bnode->records); 2951 2930 2952 - rcu_lock_acquire(&rcu_callback_map); 2953 - if (idx == 0) { // kmalloc() / kfree(). 2954 - trace_rcu_invoke_kfree_bulk_callback( 2955 - rcu_state.name, bnode->nr_records, 2956 - bnode->records); 2931 + kfree_bulk(bnode->nr_records, bnode->records); 2932 + } else { // vmalloc() / vfree(). 2933 + for (i = 0; i < bnode->nr_records; i++) { 2934 + trace_rcu_invoke_kvfree_callback( 2935 + rcu_state.name, bnode->records[i], 0); 2957 2936 2958 - kfree_bulk(bnode->nr_records, bnode->records); 2959 - } else { // vmalloc() / vfree(). 2960 - for (i = 0; i < bnode->nr_records; i++) { 2961 - trace_rcu_invoke_kvfree_callback( 2962 - rcu_state.name, bnode->records[i], 0); 2963 - 2964 - vfree(bnode->records[i]); 2937 + vfree(bnode->records[i]); 2938 + } 2965 2939 } 2940 + rcu_lock_release(&rcu_callback_map); 2966 2941 } 2967 - rcu_lock_release(&rcu_callback_map); 2968 2942 2969 2943 raw_spin_lock_irqsave(&krcp->lock, flags); 2970 2944 if (put_cached_bnode(krcp, bnode)) ··· 3012 2984 struct rcu_head *head; 3013 2985 struct kfree_rcu_cpu *krcp; 3014 2986 struct kfree_rcu_cpu_work *krwp; 2987 + struct rcu_gp_oldstate head_gp_snap; 3015 2988 int i; 3016 2989 3017 2990 krwp = container_of(to_rcu_work(work), ··· 3027 2998 // Channel 3. 3028 2999 head = krwp->head_free; 3029 3000 krwp->head_free = NULL; 3001 + head_gp_snap = krwp->head_free_gp_snap; 3030 3002 raw_spin_unlock_irqrestore(&krcp->lock, flags); 3031 3003 3032 3004 // Handle the first two channels. ··· 3044 3014 * queued on a linked list through their rcu_head structures. 3045 3015 * This list is named "Channel 3". 3046 3016 */ 3047 - kvfree_rcu_list(head); 3017 + if (head && !WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&head_gp_snap))) 3018 + kvfree_rcu_list(head); 3048 3019 } 3049 3020 3050 3021 static bool ··· 3112 3081 INIT_LIST_HEAD(&bulk_ready[i]); 3113 3082 3114 3083 list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) { 3115 - if (!poll_state_synchronize_rcu(bnode->gp_snap)) 3084 + if (!poll_state_synchronize_rcu_full(&bnode->gp_snap)) 3116 3085 break; 3117 3086 3118 3087 atomic_sub(bnode->nr_records, &krcp->bulk_count[i]); ··· 3177 3146 // objects queued on the linked list. 3178 3147 if (!krwp->head_free) { 3179 3148 krwp->head_free = krcp->head; 3149 + get_state_synchronize_rcu_full(&krwp->head_free_gp_snap); 3180 3150 atomic_set(&krcp->head_count, 0); 3181 3151 WRITE_ONCE(krcp->head, NULL); 3182 3152 } ··· 3226 3194 nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ? 3227 3195 1 : rcu_min_cached_objs; 3228 3196 3229 - for (i = 0; i < nr_pages; i++) { 3197 + for (i = READ_ONCE(krcp->nr_bkv_objs); i < nr_pages; i++) { 3230 3198 bnode = (struct kvfree_rcu_bulk_data *) 3231 3199 __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 3232 3200 ··· 3250 3218 static void 3251 3219 run_page_cache_worker(struct kfree_rcu_cpu *krcp) 3252 3220 { 3221 + // If cache disabled, bail out. 3222 + if (!rcu_min_cached_objs) 3223 + return; 3224 + 3253 3225 if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && 3254 3226 !atomic_xchg(&krcp->work_in_progress, 1)) { 3255 3227 if (atomic_read(&krcp->backoff_page_cache_fill)) { ··· 3308 3272 // scenarios. 3309 3273 bnode = (struct kvfree_rcu_bulk_data *) 3310 3274 __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 3311 - *krcp = krc_this_cpu_lock(flags); 3275 + raw_spin_lock_irqsave(&(*krcp)->lock, *flags); 3312 3276 } 3313 3277 3314 3278 if (!bnode) ··· 3321 3285 3322 3286 // Finally insert and update the GP for this page. 3323 3287 bnode->records[bnode->nr_records++] = ptr; 3324 - bnode->gp_snap = get_state_synchronize_rcu(); 3288 + get_state_synchronize_rcu_full(&bnode->gp_snap); 3325 3289 atomic_inc(&(*krcp)->bulk_count[idx]); 3326 3290 3327 3291 return true; ··· 4319 4283 */ 4320 4284 rnp = rdp->mynode; 4321 4285 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 4322 - rdp->beenonline = true; /* We have now been online. */ 4323 4286 rdp->gp_seq = READ_ONCE(rnp->gp_seq); 4324 4287 rdp->gp_seq_needed = rdp->gp_seq; 4325 4288 rdp->cpu_no_qs.b.norm = true; ··· 4343 4308 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); 4344 4309 4345 4310 rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); 4311 + } 4312 + 4313 + /* 4314 + * Has the specified (known valid) CPU ever been fully online? 4315 + */ 4316 + bool rcu_cpu_beenfullyonline(int cpu) 4317 + { 4318 + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); 4319 + 4320 + return smp_load_acquire(&rdp->beenonline); 4346 4321 } 4347 4322 4348 4323 /* ··· 4413 4368 * Note that this function is special in that it is invoked directly 4414 4369 * from the incoming CPU rather than from the cpuhp_step mechanism. 4415 4370 * This is because this function must be invoked at a precise location. 4371 + * This incoming CPU must not have enabled interrupts yet. 4416 4372 */ 4417 4373 void rcu_cpu_starting(unsigned int cpu) 4418 4374 { 4419 - unsigned long flags; 4420 4375 unsigned long mask; 4421 4376 struct rcu_data *rdp; 4422 4377 struct rcu_node *rnp; 4423 4378 bool newcpu; 4424 4379 4380 + lockdep_assert_irqs_disabled(); 4425 4381 rdp = per_cpu_ptr(&rcu_data, cpu); 4426 4382 if (rdp->cpu_started) 4427 4383 return; ··· 4430 4384 4431 4385 rnp = rdp->mynode; 4432 4386 mask = rdp->grpmask; 4433 - local_irq_save(flags); 4434 4387 arch_spin_lock(&rcu_state.ofl_lock); 4435 4388 rcu_dynticks_eqs_online(); 4436 4389 raw_spin_lock(&rcu_state.barrier_lock); ··· 4448 4403 /* An incoming CPU should never be blocking a grace period. */ 4449 4404 if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ 4450 4405 /* rcu_report_qs_rnp() *really* wants some flags to restore */ 4451 - unsigned long flags2; 4406 + unsigned long flags; 4452 4407 4453 - local_irq_save(flags2); 4408 + local_irq_save(flags); 4454 4409 rcu_disable_urgency_upon_qs(rdp); 4455 4410 /* Report QS -after- changing ->qsmaskinitnext! */ 4456 - rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2); 4411 + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); 4457 4412 } else { 4458 4413 raw_spin_unlock_rcu_node(rnp); 4459 4414 } 4460 4415 arch_spin_unlock(&rcu_state.ofl_lock); 4461 - local_irq_restore(flags); 4416 + smp_store_release(&rdp->beenonline, true); 4462 4417 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ 4463 4418 } 4464 4419

+1 -1

kernel/rcu/tree_exp.h

··· 643 643 "O."[!!cpu_online(cpu)], 644 644 "o."[!!(rdp->grpmask & rnp->expmaskinit)], 645 645 "N."[!!(rdp->grpmask & rnp->expmaskinitnext)], 646 - "D."[!!(rdp->cpu_no_qs.b.exp)]); 646 + "D."[!!data_race(rdp->cpu_no_qs.b.exp)]); 647 647 } 648 648 } 649 649 pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",

+49 -7

kernel/rcu/tree_nocb.h

··· 1319 1319 int cpu; 1320 1320 unsigned long count = 0; 1321 1321 1322 + if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) 1323 + return 0; 1324 + 1325 + /* Protect rcu_nocb_mask against concurrent (de-)offloading. */ 1326 + if (!mutex_trylock(&rcu_state.barrier_mutex)) 1327 + return 0; 1328 + 1322 1329 /* Snapshot count of all CPUs */ 1323 - for_each_possible_cpu(cpu) { 1330 + for_each_cpu(cpu, rcu_nocb_mask) { 1324 1331 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); 1325 1332 1326 1333 count += READ_ONCE(rdp->lazy_len); 1327 1334 } 1335 + 1336 + mutex_unlock(&rcu_state.barrier_mutex); 1328 1337 1329 1338 return count ? count : SHRINK_EMPTY; 1330 1339 } ··· 1345 1336 unsigned long flags; 1346 1337 unsigned long count = 0; 1347 1338 1348 - /* Snapshot count of all CPUs */ 1349 - for_each_possible_cpu(cpu) { 1350 - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); 1351 - int _count = READ_ONCE(rdp->lazy_len); 1339 + if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) 1340 + return 0; 1341 + /* 1342 + * Protect against concurrent (de-)offloading. Otherwise nocb locking 1343 + * may be ignored or imbalanced. 1344 + */ 1345 + if (!mutex_trylock(&rcu_state.barrier_mutex)) { 1346 + /* 1347 + * But really don't insist if barrier_mutex is contended since we 1348 + * can't guarantee that it will never engage in a dependency 1349 + * chain involving memory allocation. The lock is seldom contended 1350 + * anyway. 1351 + */ 1352 + return 0; 1353 + } 1352 1354 1353 - if (_count == 0) 1355 + /* Snapshot count of all CPUs */ 1356 + for_each_cpu(cpu, rcu_nocb_mask) { 1357 + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); 1358 + int _count; 1359 + 1360 + if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp))) 1354 1361 continue; 1362 + 1363 + if (!READ_ONCE(rdp->lazy_len)) 1364 + continue; 1365 + 1355 1366 rcu_nocb_lock_irqsave(rdp, flags); 1356 - WRITE_ONCE(rdp->lazy_len, 0); 1367 + /* 1368 + * Recheck under the nocb lock. Since we are not holding the bypass 1369 + * lock we may still race with increments from the enqueuer but still 1370 + * we know for sure if there is at least one lazy callback. 1371 + */ 1372 + _count = READ_ONCE(rdp->lazy_len); 1373 + if (!_count) { 1374 + rcu_nocb_unlock_irqrestore(rdp, flags); 1375 + continue; 1376 + } 1377 + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); 1357 1378 rcu_nocb_unlock_irqrestore(rdp, flags); 1358 1379 wake_nocb_gp(rdp, false); 1359 1380 sc->nr_to_scan -= _count; ··· 1391 1352 if (sc->nr_to_scan <= 0) 1392 1353 break; 1393 1354 } 1355 + 1356 + mutex_unlock(&rcu_state.barrier_mutex); 1357 + 1394 1358 return count ? count : SHRINK_STOP; 1395 1359 } 1396 1360

+3 -1

kernel/rcu/tree_plugin.h

··· 257 257 * GP should not be able to end until we report, so there should be 258 258 * no need to check for a subsequent expedited GP. (Though we are 259 259 * still in a quiescent state in any case.) 260 + * 261 + * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change. 260 262 */ 261 263 if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp) 262 264 rcu_report_exp_rdp(rdp); ··· 943 941 { 944 942 struct rcu_data *rdp = this_cpu_ptr(&rcu_data); 945 943 946 - if (rdp->cpu_no_qs.b.exp) 944 + if (READ_ONCE(rdp->cpu_no_qs.b.exp)) 947 945 rcu_report_exp_rdp(rdp); 948 946 } 949 947

+1 -1

tools/testing/selftests/rcutorture/bin/functions.sh

··· 250 250 echo -machine virt,gic-version=host -cpu host 251 251 ;; 252 252 qemu-system-ppc64) 253 - echo -enable-kvm -M pseries -nodefaults 253 + echo -M pseries -nodefaults 254 254 echo -device spapr-vscsi 255 255 if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC" 256 256 then

+1 -1

tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot

··· 5 5 rcutree.gp_cleanup_delay=3 6 6 rcutree.kthread_prio=2 7 7 threadirqs 8 - tree.use_softirq=0 8 + rcutree.use_softirq=0

+1 -1

tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot

··· 4 4 rcutree.gp_cleanup_delay=3 5 5 rcutree.kthread_prio=2 6 6 threadirqs 7 - tree.use_softirq=0 7 + rcutree.use_softirq=0