Merge branches 'docs.2022.04.20a', 'fixes.2022.04.20a', 'nocb.2022.04.11b', 'rcu-tasks.2022.04.11b', 'srcu.2022.05.03a', 'torture.2022.04.11b', 'torture-tasks.2022.04.20a' and 'torturescript.2022.04.20a' into HEAD

+70 -3

Documentation/admin-guide/kernel-parameters.txt

··· 4955 4955 number avoids disturbing real-time workloads, 4956 4956 but lengthens grace periods. 4957 4957 4958 + rcupdate.rcu_task_stall_info= [KNL] 4959 + Set initial timeout in jiffies for RCU task stall 4960 + informational messages, which give some indication 4961 + of the problem for those not patient enough to 4962 + wait for ten minutes. Informational messages are 4963 + only printed prior to the stall-warning message 4964 + for a given grace period. Disable with a value 4965 + less than or equal to zero. Defaults to ten 4966 + seconds. A change in value does not take effect 4967 + until the beginning of the next grace period. 4968 + 4969 + rcupdate.rcu_task_stall_info_mult= [KNL] 4970 + Multiplier for time interval between successive 4971 + RCU task stall informational messages for a given 4972 + RCU tasks grace period. This value is clamped 4973 + to one through ten, inclusive. It defaults to 4974 + the value three, so that the first informational 4975 + message is printed 10 seconds into the grace 4976 + period, the second at 40 seconds, the third at 4977 + 160 seconds, and then the stall warning at 600 4978 + seconds would prevent a fourth at 640 seconds. 4979 + 4958 4980 rcupdate.rcu_task_stall_timeout= [KNL] 4959 - Set timeout in jiffies for RCU task stall warning 4960 - messages. Disable with a value less than or equal 4961 - to zero. 4981 + Set timeout in jiffies for RCU task stall 4982 + warning messages. Disable with a value less 4983 + than or equal to zero. Defaults to ten minutes. 4984 + A change in value does not take effect until 4985 + the beginning of the next grace period. 4962 4986 4963 4987 rcupdate.rcu_self_test= [KNL] 4964 4988 Run the RCU early boot self tests ··· 5401 5377 smart2= [HW] 5402 5378 Format: <io1>[,<io2>[,...,<io8>]] 5403 5379 5380 + smp.csd_lock_timeout= [KNL] 5381 + Specify the period of time in milliseconds 5382 + that smp_call_function() and friends will wait 5383 + for a CPU to release the CSD lock. This is 5384 + useful when diagnosing bugs involving CPUs 5385 + disabling interrupts for extended periods 5386 + of time. Defaults to 5,000 milliseconds, and 5387 + setting a value of zero disables this feature. 5388 + This feature may be more efficiently disabled 5389 + using the csdlock_debug- kernel parameter. 5390 + 5404 5391 smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices 5405 5392 smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port 5406 5393 smsc-ircc2.ircc_sir= [HW] SIR base I/O port ··· 5643 5608 off: Disable mitigation and remove 5644 5609 performance impact to RDRAND and RDSEED 5645 5610 5611 + srcutree.big_cpu_lim [KNL] 5612 + Specifies the number of CPUs constituting a 5613 + large system, such that srcu_struct structures 5614 + should immediately allocate an srcu_node array. 5615 + This kernel-boot parameter defaults to 128, 5616 + but takes effect only when the low-order four 5617 + bits of srcutree.convert_to_big is equal to 3 5618 + (decide at boot). 5619 + 5620 + srcutree.convert_to_big [KNL] 5621 + Specifies under what conditions an SRCU tree 5622 + srcu_struct structure will be converted to big 5623 + form, that is, with an rcu_node tree: 5624 + 5625 + 0: Never. 5626 + 1: At init_srcu_struct() time. 5627 + 2: When rcutorture decides to. 5628 + 3: Decide at boot time (default). 5629 + 0x1X: Above plus if high contention. 5630 + 5631 + Either way, the srcu_node tree will be sized based 5632 + on the actual runtime number of CPUs (nr_cpu_ids) 5633 + instead of the compile-time CONFIG_NR_CPUS. 5634 + 5646 5635 srcutree.counter_wrap_check [KNL] 5647 5636 Specifies how frequently to check for 5648 5637 grace-period sequence counter wrap for the ··· 5683 5624 grace period will be considered for automatic 5684 5625 expediting. Set to zero to disable automatic 5685 5626 expediting. 5627 + 5628 + srcutree.small_contention_lim [KNL] 5629 + Specifies the number of update-side contention 5630 + events per jiffy will be tolerated before 5631 + initiating a conversion of an srcu_struct 5632 + structure to big form. Note that the value of 5633 + srcutree.convert_to_big must have the 0x10 bit 5634 + set for contention-based conversions to occur. 5686 5635 5687 5636 ssbd= [ARM64,HW] 5688 5637 Speculative Store Bypass Disable control

+1

arch/Kconfig

··· 35 35 depends on MODULES 36 36 depends on HAVE_KPROBES 37 37 select KALLSYMS 38 + select TASKS_RCU if PREEMPTION 38 39 help 39 40 Kprobes allows you to trap at almost any kernel address and 40 41 execute a callback function. register_kprobe() establishes

+1

include/linux/rcupdate.h

··· 196 196 void exit_tasks_rcu_start(void); 197 197 void exit_tasks_rcu_finish(void); 198 198 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ 199 + #define rcu_tasks_classic_qs(t, preempt) do { } while (0) 199 200 #define rcu_tasks_qs(t, preempt) do { } while (0) 200 201 #define rcu_note_voluntary_context_switch(t) do { } while (0) 201 202 #define call_rcu_tasks call_rcu

+41

include/linux/sched.h

··· 2117 2117 #endif 2118 2118 } 2119 2119 2120 + #ifdef CONFIG_PREEMPT_DYNAMIC 2121 + 2122 + extern bool preempt_model_none(void); 2123 + extern bool preempt_model_voluntary(void); 2124 + extern bool preempt_model_full(void); 2125 + 2126 + #else 2127 + 2128 + static inline bool preempt_model_none(void) 2129 + { 2130 + return IS_ENABLED(CONFIG_PREEMPT_NONE); 2131 + } 2132 + static inline bool preempt_model_voluntary(void) 2133 + { 2134 + return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); 2135 + } 2136 + static inline bool preempt_model_full(void) 2137 + { 2138 + return IS_ENABLED(CONFIG_PREEMPT); 2139 + } 2140 + 2141 + #endif 2142 + 2143 + static inline bool preempt_model_rt(void) 2144 + { 2145 + return IS_ENABLED(CONFIG_PREEMPT_RT); 2146 + } 2147 + 2148 + /* 2149 + * Does the preemption model allow non-cooperative preemption? 2150 + * 2151 + * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with 2152 + * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the 2153 + * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the 2154 + * PREEMPT_NONE model. 2155 + */ 2156 + static inline bool preempt_model_preemptible(void) 2157 + { 2158 + return preempt_model_full() || preempt_model_rt(); 2159 + } 2160 + 2120 2161 /* 2121 2162 * Does a critical section need to be broken due to another 2122 2163 * task waiting?: (technically does not depend on CONFIG_PREEMPTION,

+25 -7

include/linux/srcutree.h

··· 47 47 */ 48 48 struct srcu_node { 49 49 spinlock_t __private lock; 50 - unsigned long srcu_have_cbs[4]; /* GP seq for children */ 51 - /* having CBs, but only */ 52 - /* is > ->srcu_gq_seq. */ 53 - unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */ 54 - /* have CBs for given GP? */ 50 + unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ 51 + /* if greater than ->srcu_gq_seq. */ 52 + unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ 55 53 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ 56 54 struct srcu_node *srcu_parent; /* Next up in tree. */ 57 55 int grplo; /* Least CPU for node. */ ··· 60 62 * Per-SRCU-domain structure, similar in function to rcu_state. 61 63 */ 62 64 struct srcu_struct { 63 - struct srcu_node node[NUM_RCU_NODES]; /* Combining tree. */ 65 + struct srcu_node *node; /* Combining tree. */ 64 66 struct srcu_node *level[RCU_NUM_LVLS + 1]; 65 67 /* First node at each level. */ 68 + int srcu_size_state; /* Small-to-big transition state. */ 66 69 struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ 67 - spinlock_t __private lock; /* Protect counters */ 70 + spinlock_t __private lock; /* Protect counters and size state. */ 68 71 struct mutex srcu_gp_mutex; /* Serialize GP work. */ 69 72 unsigned int srcu_idx; /* Current rdr array element. */ 70 73 unsigned long srcu_gp_seq; /* Grace-period seq #. */ 71 74 unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ 72 75 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ 76 + unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */ 73 77 unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ 78 + unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ 79 + unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ 80 + unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ 74 81 struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ 82 + bool sda_is_static; /* May ->sda be passed to free_percpu()? */ 75 83 unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ 76 84 struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ 77 85 struct completion srcu_barrier_completion; ··· 85 81 atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ 86 82 /* callback for the barrier */ 87 83 /* operation. */ 84 + unsigned long reschedule_jiffies; 85 + unsigned long reschedule_count; 88 86 struct delayed_work work; 89 87 struct lockdep_map dep_map; 90 88 }; 89 + 90 + /* Values for size state variable (->srcu_size_state). */ 91 + #define SRCU_SIZE_SMALL 0 92 + #define SRCU_SIZE_ALLOC 1 93 + #define SRCU_SIZE_WAIT_BARRIER 2 94 + #define SRCU_SIZE_WAIT_CALL 3 95 + #define SRCU_SIZE_WAIT_CBS1 4 96 + #define SRCU_SIZE_WAIT_CBS2 5 97 + #define SRCU_SIZE_WAIT_CBS3 6 98 + #define SRCU_SIZE_WAIT_CBS4 7 99 + #define SRCU_SIZE_BIG 8 91 100 92 101 /* Values for state variable (bottom bits of ->srcu_gp_seq). */ 93 102 #define SRCU_STATE_IDLE 0 ··· 138 121 #ifdef MODULE 139 122 # define __DEFINE_SRCU(name, is_static) \ 140 123 is_static struct srcu_struct name; \ 124 + extern struct srcu_struct * const __srcu_struct_##name; \ 141 125 struct srcu_struct * const __srcu_struct_##name \ 142 126 __section("___srcu_struct_ptrs") = &name 143 127 #else

+1 -1

include/linux/torture.h

··· 118 118 _torture_stop_kthread("Stopping " #n " task", &(tp)) 119 119 120 120 #ifdef CONFIG_PREEMPTION 121 - #define torture_preempt_schedule() preempt_schedule() 121 + #define torture_preempt_schedule() __preempt_schedule() 122 122 #else 123 123 #define torture_preempt_schedule() do { } while (0) 124 124 #endif

+1

kernel/bpf/Kconfig

··· 27 27 bool "Enable bpf() system call" 28 28 select BPF 29 29 select IRQ_WORK 30 + select TASKS_RCU if PREEMPTION 30 31 select TASKS_TRACE_RCU 31 32 select BINARY_PRINTF 32 33 select NET_SOCK_MSG if NET

+44 -19

kernel/rcu/Kconfig

··· 77 77 This option enables generic infrastructure code supporting 78 78 task-based RCU implementations. Not for manual selection. 79 79 80 - config TASKS_RCU 81 - def_bool PREEMPTION 80 + config FORCE_TASKS_RCU 81 + bool "Force selection of TASKS_RCU" 82 + depends on RCU_EXPERT 83 + select TASKS_RCU 84 + default n 82 85 help 83 - This option enables a task-based RCU implementation that uses 84 - only voluntary context switch (not preemption!), idle, and 85 - user-mode execution as quiescent states. Not for manual selection. 86 + This option force-enables a task-based RCU implementation 87 + that uses only voluntary context switch (not preemption!), 88 + idle, and user-mode execution as quiescent states. Not for 89 + manual selection in most cases. 90 + 91 + config TASKS_RCU 92 + bool 93 + default n 94 + select IRQ_WORK 95 + 96 + config FORCE_TASKS_RUDE_RCU 97 + bool "Force selection of Tasks Rude RCU" 98 + depends on RCU_EXPERT 99 + select TASKS_RUDE_RCU 100 + default n 101 + help 102 + This option force-enables a task-based RCU implementation 103 + that uses only context switch (including preemption) and 104 + user-mode execution as quiescent states. It forces IPIs and 105 + context switches on all online CPUs, including idle ones, 106 + so use with caution. Not for manual selection in most cases. 86 107 87 108 config TASKS_RUDE_RCU 88 - def_bool 0 89 - help 90 - This option enables a task-based RCU implementation that uses 91 - only context switch (including preemption) and user-mode 92 - execution as quiescent states. It forces IPIs and context 93 - switches on all online CPUs, including idle ones, so use 94 - with caution. 95 - 96 - config TASKS_TRACE_RCU 97 - def_bool 0 109 + bool 110 + default n 98 111 select IRQ_WORK 112 + 113 + config FORCE_TASKS_TRACE_RCU 114 + bool "Force selection of Tasks Trace RCU" 115 + depends on RCU_EXPERT 116 + select TASKS_TRACE_RCU 117 + default n 99 118 help 100 119 This option enables a task-based RCU implementation that uses 101 120 explicit rcu_read_lock_trace() read-side markers, and allows 102 - these readers to appear in the idle loop as well as on the CPU 103 - hotplug code paths. It can force IPIs on online CPUs, including 104 - idle ones, so use with caution. 121 + these readers to appear in the idle loop as well as on the 122 + CPU hotplug code paths. It can force IPIs on online CPUs, 123 + including idle ones, so use with caution. Not for manual 124 + selection in most cases. 125 + 126 + config TASKS_TRACE_RCU 127 + bool 128 + default n 129 + select IRQ_WORK 105 130 106 131 config RCU_STALL_COMMON 107 132 def_bool TREE_RCU ··· 250 225 251 226 config TASKS_TRACE_RCU_READ_MB 252 227 bool "Tasks Trace RCU readers use memory barriers in user and idle" 253 - depends on RCU_EXPERT 228 + depends on RCU_EXPERT && TASKS_TRACE_RCU 254 229 default PREEMPT_RT || NR_CPUS < 8 255 230 help 256 231 Use this option to further reduce the number of IPIs sent

-9

kernel/rcu/Kconfig.debug

··· 28 28 depends on DEBUG_KERNEL 29 29 select TORTURE_TEST 30 30 select SRCU 31 - select TASKS_RCU 32 - select TASKS_RUDE_RCU 33 - select TASKS_TRACE_RCU 34 31 default n 35 32 help 36 33 This option provides a kernel module that runs performance ··· 44 47 depends on DEBUG_KERNEL 45 48 select TORTURE_TEST 46 49 select SRCU 47 - select TASKS_RCU 48 - select TASKS_RUDE_RCU 49 - select TASKS_TRACE_RCU 50 50 default n 51 51 help 52 52 This option provides a kernel module that runs torture tests ··· 60 66 depends on DEBUG_KERNEL 61 67 select TORTURE_TEST 62 68 select SRCU 63 - select TASKS_RCU 64 - select TASKS_RUDE_RCU 65 - select TASKS_TRACE_RCU 66 69 default n 67 70 help 68 71 This option provides a kernel module that runs performance tests

+4 -2

kernel/rcu/rcu.h

··· 523 523 static inline void show_rcu_gp_kthreads(void) { } 524 524 static inline int rcu_get_gp_kthreads_prio(void) { return 0; } 525 525 static inline void rcu_fwd_progress_check(unsigned long j) { } 526 + static inline void rcu_gp_slow_register(atomic_t *rgssp) { } 527 + static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { } 526 528 #else /* #ifdef CONFIG_TINY_RCU */ 527 529 bool rcu_dynticks_zero_in_eqs(int cpu, int *vp); 528 530 unsigned long rcu_get_gp_seq(void); ··· 537 535 void rcu_force_quiescent_state(void); 538 536 extern struct workqueue_struct *rcu_gp_wq; 539 537 extern struct workqueue_struct *rcu_par_gp_wq; 538 + void rcu_gp_slow_register(atomic_t *rgssp); 539 + void rcu_gp_slow_unregister(atomic_t *rgssp); 540 540 #endif /* #else #ifdef CONFIG_TINY_RCU */ 541 541 542 542 #ifdef CONFIG_RCU_NOCB_CPU 543 - bool rcu_is_nocb_cpu(int cpu); 544 543 void rcu_bind_current_to_nocb(void); 545 544 #else 546 - static inline bool rcu_is_nocb_cpu(int cpu) { return false; } 547 545 static inline void rcu_bind_current_to_nocb(void) { } 548 546 #endif 549 547

+4 -4

kernel/rcu/rcu_segcblist.c

··· 505 505 WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]); 506 506 507 507 /* 508 - * Callbacks moved, so clean up the misordered ->tails[] pointers 509 - * that now point into the middle of the list of ready-to-invoke 510 - * callbacks. The overall effect is to copy down the later pointers 511 - * into the gap that was created by the now-ready segments. 508 + * Callbacks moved, so there might be an empty RCU_WAIT_TAIL 509 + * and a non-empty RCU_NEXT_READY_TAIL. If so, copy the 510 + * RCU_NEXT_READY_TAIL segment to fill the RCU_WAIT_TAIL gap 511 + * created by the now-ready-to-invoke segments. 512 512 */ 513 513 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { 514 514 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])

+21 -1

kernel/rcu/rcuscale.c

··· 268 268 .name = "srcud" 269 269 }; 270 270 271 + #ifdef CONFIG_TASKS_RCU 272 + 271 273 /* 272 274 * Definitions for RCU-tasks scalability testing. 273 275 */ ··· 296 294 .exp_sync = synchronize_rcu_tasks, 297 295 .name = "tasks" 298 296 }; 297 + 298 + #define TASKS_OPS &tasks_ops, 299 + 300 + #else // #ifdef CONFIG_TASKS_RCU 301 + 302 + #define TASKS_OPS 303 + 304 + #endif // #else // #ifdef CONFIG_TASKS_RCU 305 + 306 + #ifdef CONFIG_TASKS_TRACE_RCU 299 307 300 308 /* 301 309 * Definitions for RCU-tasks-trace scalability testing. ··· 335 323 .exp_sync = synchronize_rcu_tasks_trace, 336 324 .name = "tasks-tracing" 337 325 }; 326 + 327 + #define TASKS_TRACING_OPS &tasks_tracing_ops, 328 + 329 + #else // #ifdef CONFIG_TASKS_TRACE_RCU 330 + 331 + #define TASKS_TRACING_OPS 332 + 333 + #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 338 334 339 335 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old) 340 336 { ··· 817 797 long i; 818 798 int firsterr = 0; 819 799 static struct rcu_scale_ops *scale_ops[] = { 820 - &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops 800 + &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS 821 801 }; 822 802 823 803 if (!torture_init_begin(scale_type, verbose))

+89 -40

kernel/rcu/rcutorture.c

··· 738 738 }; 739 739 740 740 /* 741 + * Definitions for trivial CONFIG_PREEMPT=n-only torture testing. 742 + * This implementation does not necessarily work well with CPU hotplug. 743 + */ 744 + 745 + static void synchronize_rcu_trivial(void) 746 + { 747 + int cpu; 748 + 749 + for_each_online_cpu(cpu) { 750 + rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu)); 751 + WARN_ON_ONCE(raw_smp_processor_id() != cpu); 752 + } 753 + } 754 + 755 + static int rcu_torture_read_lock_trivial(void) __acquires(RCU) 756 + { 757 + preempt_disable(); 758 + return 0; 759 + } 760 + 761 + static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU) 762 + { 763 + preempt_enable(); 764 + } 765 + 766 + static struct rcu_torture_ops trivial_ops = { 767 + .ttype = RCU_TRIVIAL_FLAVOR, 768 + .init = rcu_sync_torture_init, 769 + .readlock = rcu_torture_read_lock_trivial, 770 + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 771 + .readunlock = rcu_torture_read_unlock_trivial, 772 + .readlock_held = torture_readlock_not_held, 773 + .get_gp_seq = rcu_no_completed, 774 + .sync = synchronize_rcu_trivial, 775 + .exp_sync = synchronize_rcu_trivial, 776 + .fqs = NULL, 777 + .stats = NULL, 778 + .irq_capable = 1, 779 + .name = "trivial" 780 + }; 781 + 782 + #ifdef CONFIG_TASKS_RCU 783 + 784 + /* 741 785 * Definitions for RCU-tasks torture testing. 742 786 */ 743 787 ··· 824 780 .name = "tasks" 825 781 }; 826 782 827 - /* 828 - * Definitions for trivial CONFIG_PREEMPT=n-only torture testing. 829 - * This implementation does not necessarily work well with CPU hotplug. 830 - */ 783 + #define TASKS_OPS &tasks_ops, 831 784 832 - static void synchronize_rcu_trivial(void) 833 - { 834 - int cpu; 785 + #else // #ifdef CONFIG_TASKS_RCU 835 786 836 - for_each_online_cpu(cpu) { 837 - rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu)); 838 - WARN_ON_ONCE(raw_smp_processor_id() != cpu); 839 - } 840 - } 787 + #define TASKS_OPS 841 788 842 - static int rcu_torture_read_lock_trivial(void) __acquires(RCU) 843 - { 844 - preempt_disable(); 845 - return 0; 846 - } 789 + #endif // #else #ifdef CONFIG_TASKS_RCU 847 790 848 - static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU) 849 - { 850 - preempt_enable(); 851 - } 852 791 853 - static struct rcu_torture_ops trivial_ops = { 854 - .ttype = RCU_TRIVIAL_FLAVOR, 855 - .init = rcu_sync_torture_init, 856 - .readlock = rcu_torture_read_lock_trivial, 857 - .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 858 - .readunlock = rcu_torture_read_unlock_trivial, 859 - .readlock_held = torture_readlock_not_held, 860 - .get_gp_seq = rcu_no_completed, 861 - .sync = synchronize_rcu_trivial, 862 - .exp_sync = synchronize_rcu_trivial, 863 - .fqs = NULL, 864 - .stats = NULL, 865 - .irq_capable = 1, 866 - .name = "trivial" 867 - }; 792 + #ifdef CONFIG_TASKS_RUDE_RCU 868 793 869 794 /* 870 795 * Definitions for rude RCU-tasks torture testing. ··· 863 850 .irq_capable = 1, 864 851 .name = "tasks-rude" 865 852 }; 853 + 854 + #define TASKS_RUDE_OPS &tasks_rude_ops, 855 + 856 + #else // #ifdef CONFIG_TASKS_RUDE_RCU 857 + 858 + #define TASKS_RUDE_OPS 859 + 860 + #endif // #else #ifdef CONFIG_TASKS_RUDE_RCU 861 + 862 + 863 + #ifdef CONFIG_TASKS_TRACE_RCU 866 864 867 865 /* 868 866 * Definitions for tracing RCU-tasks torture testing. ··· 916 892 .slow_gps = 1, 917 893 .name = "tasks-tracing" 918 894 }; 895 + 896 + #define TASKS_TRACING_OPS &tasks_tracing_ops, 897 + 898 + #else // #ifdef CONFIG_TASKS_TRACE_RCU 899 + 900 + #define TASKS_TRACING_OPS 901 + 902 + #endif // #else #ifdef CONFIG_TASKS_TRACE_RCU 903 + 919 904 920 905 static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old) 921 906 { ··· 1211 1178 " GP expediting controlled from boot/sysfs for %s.\n", 1212 1179 torture_type, cur_ops->name); 1213 1180 if (WARN_ONCE(nsynctypes == 0, 1214 - "rcu_torture_writer: No update-side primitives.\n")) { 1181 + "%s: No update-side primitives.\n", __func__)) { 1215 1182 /* 1216 1183 * No updates primitives, so don't try updating. 1217 1184 * The resulting test won't be testing much, hence the ··· 1219 1186 */ 1220 1187 rcu_torture_writer_state = RTWS_STOPPING; 1221 1188 torture_kthread_stopping("rcu_torture_writer"); 1189 + return 0; 1222 1190 } 1223 1191 1224 1192 do { ··· 1355 1321 1356 1322 VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started"); 1357 1323 set_user_nice(current, MAX_NICE); 1324 + 1325 + if (WARN_ONCE(nsynctypes == 0, 1326 + "%s: No update-side primitives.\n", __func__)) { 1327 + /* 1328 + * No updates primitives, so don't try updating. 1329 + * The resulting test won't be testing much, hence the 1330 + * above WARN_ONCE(). 1331 + */ 1332 + torture_kthread_stopping("rcu_torture_fakewriter"); 1333 + return 0; 1334 + } 1358 1335 1359 1336 do { 1360 1337 torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand); ··· 2961 2916 pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier); 2962 2917 cur_ops->cb_barrier(); 2963 2918 } 2919 + rcu_gp_slow_unregister(NULL); 2964 2920 return; 2965 2921 } 2966 2922 if (!cur_ops) { 2967 2923 torture_cleanup_end(); 2924 + rcu_gp_slow_unregister(NULL); 2968 2925 return; 2969 2926 } 2970 2927 ··· 3063 3016 else 3064 3017 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); 3065 3018 torture_cleanup_end(); 3019 + rcu_gp_slow_unregister(&rcu_fwd_cb_nodelay); 3066 3020 } 3067 3021 3068 3022 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD ··· 3144 3096 int flags = 0; 3145 3097 unsigned long gp_seq = 0; 3146 3098 static struct rcu_torture_ops *torture_ops[] = { 3147 - &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, 3148 - &busted_srcud_ops, &tasks_ops, &tasks_rude_ops, 3149 - &tasks_tracing_ops, &trivial_ops, 3099 + &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops, 3100 + TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS 3101 + &trivial_ops, 3150 3102 }; 3151 3103 3152 3104 if (!torture_init_begin(torture_type, verbose)) ··· 3368 3320 if (object_debug) 3369 3321 rcu_test_debug_objects(); 3370 3322 torture_init_end(); 3323 + rcu_gp_slow_register(&rcu_fwd_cb_nodelay); 3371 3324 return 0; 3372 3325 3373 3326 unwind:

+21 -1

kernel/rcu/refscale.c

··· 207 207 .name = "srcu" 208 208 }; 209 209 210 + #ifdef CONFIG_TASKS_RCU 211 + 210 212 // Definitions for RCU Tasks ref scale testing: Empty read markers. 211 213 // These definitions also work for RCU Rude readers. 212 214 static void rcu_tasks_ref_scale_read_section(const int nloops) ··· 233 231 .delaysection = rcu_tasks_ref_scale_delay_section, 234 232 .name = "rcu-tasks" 235 233 }; 234 + 235 + #define RCU_TASKS_OPS &rcu_tasks_ops, 236 + 237 + #else // #ifdef CONFIG_TASKS_RCU 238 + 239 + #define RCU_TASKS_OPS 240 + 241 + #endif // #else // #ifdef CONFIG_TASKS_RCU 242 + 243 + #ifdef CONFIG_TASKS_TRACE_RCU 236 244 237 245 // Definitions for RCU Tasks Trace ref scale testing. 238 246 static void rcu_trace_ref_scale_read_section(const int nloops) ··· 272 260 .delaysection = rcu_trace_ref_scale_delay_section, 273 261 .name = "rcu-trace" 274 262 }; 263 + 264 + #define RCU_TRACE_OPS &rcu_trace_ops, 265 + 266 + #else // #ifdef CONFIG_TASKS_TRACE_RCU 267 + 268 + #define RCU_TRACE_OPS 269 + 270 + #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 275 271 276 272 // Definitions for reference count 277 273 static atomic_t refcnt; ··· 810 790 long i; 811 791 int firsterr = 0; 812 792 static struct ref_scale_ops *scale_ops[] = { 813 - &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops, 793 + &rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops, 814 794 &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, 815 795 }; 816 796

+468 -163

kernel/rcu/srcutree.c

··· 24 24 #include <linux/smp.h> 25 25 #include <linux/delay.h> 26 26 #include <linux/module.h> 27 + #include <linux/slab.h> 27 28 #include <linux/srcu.h> 28 29 29 30 #include "rcu.h" ··· 39 38 static ulong counter_wrap_check = (ULONG_MAX >> 2); 40 39 module_param(counter_wrap_check, ulong, 0444); 41 40 41 + /* 42 + * Control conversion to SRCU_SIZE_BIG: 43 + * 0: Don't convert at all. 44 + * 1: Convert at init_srcu_struct() time. 45 + * 2: Convert when rcutorture invokes srcu_torture_stats_print(). 46 + * 3: Decide at boot time based on system shape (default). 47 + * 0x1x: Convert when excessive contention encountered. 48 + */ 49 + #define SRCU_SIZING_NONE 0 50 + #define SRCU_SIZING_INIT 1 51 + #define SRCU_SIZING_TORTURE 2 52 + #define SRCU_SIZING_AUTO 3 53 + #define SRCU_SIZING_CONTEND 0x10 54 + #define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x) 55 + #define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE)) 56 + #define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT)) 57 + #define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE)) 58 + #define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND) 59 + static int convert_to_big = SRCU_SIZING_AUTO; 60 + module_param(convert_to_big, int, 0444); 61 + 62 + /* Number of CPUs to trigger init_srcu_struct()-time transition to big. */ 63 + static int big_cpu_lim __read_mostly = 128; 64 + module_param(big_cpu_lim, int, 0444); 65 + 66 + /* Contention events per jiffy to initiate transition to big. */ 67 + static int small_contention_lim __read_mostly = 100; 68 + module_param(small_contention_lim, int, 0444); 69 + 42 70 /* Early-boot callback-management, so early that no lock is required! */ 43 71 static LIST_HEAD(srcu_boot_list); 44 72 static bool __read_mostly srcu_init_done; ··· 78 48 static void srcu_delay_timer(struct timer_list *t); 79 49 80 50 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ 81 - #define spin_lock_rcu_node(p) \ 82 - do { \ 83 - spin_lock(&ACCESS_PRIVATE(p, lock)); \ 84 - smp_mb__after_unlock_lock(); \ 51 + #define spin_lock_rcu_node(p) \ 52 + do { \ 53 + spin_lock(&ACCESS_PRIVATE(p, lock)); \ 54 + smp_mb__after_unlock_lock(); \ 85 55 } while (0) 86 56 87 57 #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) 88 58 89 - #define spin_lock_irq_rcu_node(p) \ 90 - do { \ 91 - spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ 92 - smp_mb__after_unlock_lock(); \ 59 + #define spin_lock_irq_rcu_node(p) \ 60 + do { \ 61 + spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ 62 + smp_mb__after_unlock_lock(); \ 93 63 } while (0) 94 64 95 - #define spin_unlock_irq_rcu_node(p) \ 65 + #define spin_unlock_irq_rcu_node(p) \ 96 66 spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) 97 67 98 - #define spin_lock_irqsave_rcu_node(p, flags) \ 99 - do { \ 100 - spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 101 - smp_mb__after_unlock_lock(); \ 68 + #define spin_lock_irqsave_rcu_node(p, flags) \ 69 + do { \ 70 + spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 71 + smp_mb__after_unlock_lock(); \ 102 72 } while (0) 103 73 104 - #define spin_unlock_irqrestore_rcu_node(p, flags) \ 105 - spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ 74 + #define spin_trylock_irqsave_rcu_node(p, flags) \ 75 + ({ \ 76 + bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 77 + \ 78 + if (___locked) \ 79 + smp_mb__after_unlock_lock(); \ 80 + ___locked; \ 81 + }) 82 + 83 + #define spin_unlock_irqrestore_rcu_node(p, flags) \ 84 + spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ 106 85 107 86 /* 108 - * Initialize SRCU combining tree. Note that statically allocated 87 + * Initialize SRCU per-CPU data. Note that statically allocated 109 88 * srcu_struct structures might already have srcu_read_lock() and 110 89 * srcu_read_unlock() running against them. So if the is_static parameter 111 90 * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. 112 91 */ 113 - static void init_srcu_struct_nodes(struct srcu_struct *ssp) 92 + static void init_srcu_struct_data(struct srcu_struct *ssp) 93 + { 94 + int cpu; 95 + struct srcu_data *sdp; 96 + 97 + /* 98 + * Initialize the per-CPU srcu_data array, which feeds into the 99 + * leaves of the srcu_node tree. 100 + */ 101 + WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != 102 + ARRAY_SIZE(sdp->srcu_unlock_count)); 103 + for_each_possible_cpu(cpu) { 104 + sdp = per_cpu_ptr(ssp->sda, cpu); 105 + spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); 106 + rcu_segcblist_init(&sdp->srcu_cblist); 107 + sdp->srcu_cblist_invoking = false; 108 + sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; 109 + sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; 110 + sdp->mynode = NULL; 111 + sdp->cpu = cpu; 112 + INIT_WORK(&sdp->work, srcu_invoke_callbacks); 113 + timer_setup(&sdp->delay_work, srcu_delay_timer, 0); 114 + sdp->ssp = ssp; 115 + } 116 + } 117 + 118 + /* Invalid seq state, used during snp node initialization */ 119 + #define SRCU_SNP_INIT_SEQ 0x2 120 + 121 + /* 122 + * Check whether sequence number corresponding to snp node, 123 + * is invalid. 124 + */ 125 + static inline bool srcu_invl_snp_seq(unsigned long s) 126 + { 127 + return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ; 128 + } 129 + 130 + /* 131 + * Allocated and initialize SRCU combining tree. Returns @true if 132 + * allocation succeeded and @false otherwise. 133 + */ 134 + static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) 114 135 { 115 136 int cpu; 116 137 int i; ··· 173 92 174 93 /* Initialize geometry if it has not already been initialized. */ 175 94 rcu_init_geometry(); 95 + ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags); 96 + if (!ssp->node) 97 + return false; 176 98 177 99 /* Work out the overall tree geometry. */ 178 100 ssp->level[0] = &ssp->node[0]; ··· 189 105 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != 190 106 ARRAY_SIZE(snp->srcu_data_have_cbs)); 191 107 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { 192 - snp->srcu_have_cbs[i] = 0; 108 + snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ; 193 109 snp->srcu_data_have_cbs[i] = 0; 194 110 } 195 - snp->srcu_gp_seq_needed_exp = 0; 111 + snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ; 196 112 snp->grplo = -1; 197 113 snp->grphi = -1; 198 114 if (snp == &ssp->node[0]) { ··· 213 129 * Initialize the per-CPU srcu_data array, which feeds into the 214 130 * leaves of the srcu_node tree. 215 131 */ 216 - WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != 217 - ARRAY_SIZE(sdp->srcu_unlock_count)); 218 132 level = rcu_num_lvls - 1; 219 133 snp_first = ssp->level[level]; 220 134 for_each_possible_cpu(cpu) { 221 135 sdp = per_cpu_ptr(ssp->sda, cpu); 222 - spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); 223 - rcu_segcblist_init(&sdp->srcu_cblist); 224 - sdp->srcu_cblist_invoking = false; 225 - sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; 226 - sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; 227 136 sdp->mynode = &snp_first[cpu / levelspread[level]]; 228 137 for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { 229 138 if (snp->grplo < 0) 230 139 snp->grplo = cpu; 231 140 snp->grphi = cpu; 232 141 } 233 - sdp->cpu = cpu; 234 - INIT_WORK(&sdp->work, srcu_invoke_callbacks); 235 - timer_setup(&sdp->delay_work, srcu_delay_timer, 0); 236 - sdp->ssp = ssp; 237 142 sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); 238 143 } 144 + smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); 145 + return true; 239 146 } 240 147 241 148 /* 242 149 * Initialize non-compile-time initialized fields, including the 243 - * associated srcu_node and srcu_data structures. The is_static 244 - * parameter is passed through to init_srcu_struct_nodes(), and 245 - * also tells us that ->sda has already been wired up to srcu_data. 150 + * associated srcu_node and srcu_data structures. The is_static parameter 151 + * tells us that ->sda has already been wired up to srcu_data. 246 152 */ 247 153 static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) 248 154 { 155 + ssp->srcu_size_state = SRCU_SIZE_SMALL; 156 + ssp->node = NULL; 249 157 mutex_init(&ssp->srcu_cb_mutex); 250 158 mutex_init(&ssp->srcu_gp_mutex); 251 159 ssp->srcu_idx = 0; ··· 246 170 mutex_init(&ssp->srcu_barrier_mutex); 247 171 atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); 248 172 INIT_DELAYED_WORK(&ssp->work, process_srcu); 173 + ssp->sda_is_static = is_static; 249 174 if (!is_static) 250 175 ssp->sda = alloc_percpu(struct srcu_data); 251 176 if (!ssp->sda) 252 177 return -ENOMEM; 253 - init_srcu_struct_nodes(ssp); 178 + init_srcu_struct_data(ssp); 254 179 ssp->srcu_gp_seq_needed_exp = 0; 255 180 ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 181 + if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { 182 + if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { 183 + if (!ssp->sda_is_static) { 184 + free_percpu(ssp->sda); 185 + ssp->sda = NULL; 186 + return -ENOMEM; 187 + } 188 + } else { 189 + WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG); 190 + } 191 + } 256 192 smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ 257 193 return 0; 258 194 } ··· 300 212 EXPORT_SYMBOL_GPL(init_srcu_struct); 301 213 302 214 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 215 + 216 + /* 217 + * Initiate a transition to SRCU_SIZE_BIG with lock held. 218 + */ 219 + static void __srcu_transition_to_big(struct srcu_struct *ssp) 220 + { 221 + lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); 222 + smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC); 223 + } 224 + 225 + /* 226 + * Initiate an idempotent transition to SRCU_SIZE_BIG. 227 + */ 228 + static void srcu_transition_to_big(struct srcu_struct *ssp) 229 + { 230 + unsigned long flags; 231 + 232 + /* Double-checked locking on ->srcu_size-state. */ 233 + if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) 234 + return; 235 + spin_lock_irqsave_rcu_node(ssp, flags); 236 + if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) { 237 + spin_unlock_irqrestore_rcu_node(ssp, flags); 238 + return; 239 + } 240 + __srcu_transition_to_big(ssp); 241 + spin_unlock_irqrestore_rcu_node(ssp, flags); 242 + } 243 + 244 + /* 245 + * Check to see if the just-encountered contention event justifies 246 + * a transition to SRCU_SIZE_BIG. 247 + */ 248 + static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) 249 + { 250 + unsigned long j; 251 + 252 + if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state) 253 + return; 254 + j = jiffies; 255 + if (ssp->srcu_size_jiffies != j) { 256 + ssp->srcu_size_jiffies = j; 257 + ssp->srcu_n_lock_retries = 0; 258 + } 259 + if (++ssp->srcu_n_lock_retries <= small_contention_lim) 260 + return; 261 + __srcu_transition_to_big(ssp); 262 + } 263 + 264 + /* 265 + * Acquire the specified srcu_data structure's ->lock, but check for 266 + * excessive contention, which results in initiation of a transition 267 + * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module 268 + * parameter permits this. 269 + */ 270 + static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) 271 + { 272 + struct srcu_struct *ssp = sdp->ssp; 273 + 274 + if (spin_trylock_irqsave_rcu_node(sdp, *flags)) 275 + return; 276 + spin_lock_irqsave_rcu_node(ssp, *flags); 277 + spin_lock_irqsave_check_contention(ssp); 278 + spin_unlock_irqrestore_rcu_node(ssp, *flags); 279 + spin_lock_irqsave_rcu_node(sdp, *flags); 280 + } 281 + 282 + /* 283 + * Acquire the specified srcu_struct structure's ->lock, but check for 284 + * excessive contention, which results in initiation of a transition 285 + * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module 286 + * parameter permits this. 287 + */ 288 + static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) 289 + { 290 + if (spin_trylock_irqsave_rcu_node(ssp, *flags)) 291 + return; 292 + spin_lock_irqsave_rcu_node(ssp, *flags); 293 + spin_lock_irqsave_check_contention(ssp); 294 + } 303 295 304 296 /* 305 297 * First-use initialization of statically allocated srcu_struct ··· 511 343 return sum; 512 344 } 513 345 514 - #define SRCU_INTERVAL 1 346 + #define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. 347 + #define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. 348 + #define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances. 349 + #define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances. 515 350 516 351 /* 517 352 * Return grace-period delay, zero if there are expedited grace ··· 522 351 */ 523 352 static unsigned long srcu_get_delay(struct srcu_struct *ssp) 524 353 { 525 - if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), 526 - READ_ONCE(ssp->srcu_gp_seq_needed_exp))) 527 - return 0; 528 - return SRCU_INTERVAL; 354 + unsigned long jbase = SRCU_INTERVAL; 355 + 356 + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) 357 + jbase = 0; 358 + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) 359 + jbase += jiffies - READ_ONCE(ssp->srcu_gp_start); 360 + if (!jbase) { 361 + WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); 362 + if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE) 363 + jbase = 1; 364 + } 365 + return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; 529 366 } 530 367 531 368 /** ··· 561 382 return; /* Forgot srcu_barrier(), so just leak it! */ 562 383 } 563 384 if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 385 + WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) || 564 386 WARN_ON(srcu_readers_active(ssp))) { 565 - pr_info("%s: Active srcu_struct %p state: %d\n", 566 - __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))); 387 + pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n", 388 + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)), 389 + rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed); 567 390 return; /* Caller forgot to stop doing call_srcu()? */ 568 391 } 569 - free_percpu(ssp->sda); 570 - ssp->sda = NULL; 392 + if (!ssp->sda_is_static) { 393 + free_percpu(ssp->sda); 394 + ssp->sda = NULL; 395 + } 396 + kfree(ssp->node); 397 + ssp->node = NULL; 398 + ssp->srcu_size_state = SRCU_SIZE_SMALL; 571 399 } 572 400 EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 573 401 ··· 620 434 */ 621 435 static void srcu_gp_start(struct srcu_struct *ssp) 622 436 { 623 - struct srcu_data *sdp = this_cpu_ptr(ssp->sda); 437 + struct srcu_data *sdp; 624 438 int state; 625 439 440 + if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) 441 + sdp = per_cpu_ptr(ssp->sda, 0); 442 + else 443 + sdp = this_cpu_ptr(ssp->sda); 626 444 lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); 627 445 WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); 628 446 spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ ··· 635 445 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 636 446 rcu_seq_snap(&ssp->srcu_gp_seq)); 637 447 spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ 448 + WRITE_ONCE(ssp->srcu_gp_start, jiffies); 449 + WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0); 638 450 smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ 639 451 rcu_seq_start(&ssp->srcu_gp_seq); 640 452 state = rcu_seq_state(ssp->srcu_gp_seq); ··· 709 517 int idx; 710 518 unsigned long mask; 711 519 struct srcu_data *sdp; 520 + unsigned long sgsne; 712 521 struct srcu_node *snp; 522 + int ss_state; 713 523 714 524 /* Prevent more than one additional grace period. */ 715 525 mutex_lock(&ssp->srcu_cb_mutex); ··· 720 526 spin_lock_irq_rcu_node(ssp); 721 527 idx = rcu_seq_state(ssp->srcu_gp_seq); 722 528 WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); 723 - cbdelay = srcu_get_delay(ssp); 529 + cbdelay = !!srcu_get_delay(ssp); 724 530 WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); 725 531 rcu_seq_end(&ssp->srcu_gp_seq); 726 532 gpseq = rcu_seq_current(&ssp->srcu_gp_seq); ··· 731 537 /* A new grace period can start at this point. But only one. */ 732 538 733 539 /* Initiate callback invocation as needed. */ 734 - idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 735 - srcu_for_each_node_breadth_first(ssp, snp) { 736 - spin_lock_irq_rcu_node(snp); 737 - cbs = false; 738 - last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; 739 - if (last_lvl) 740 - cbs = snp->srcu_have_cbs[idx] == gpseq; 741 - snp->srcu_have_cbs[idx] = gpseq; 742 - rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); 743 - if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) 744 - WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq); 745 - mask = snp->srcu_data_have_cbs[idx]; 746 - snp->srcu_data_have_cbs[idx] = 0; 747 - spin_unlock_irq_rcu_node(snp); 748 - if (cbs) 749 - srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); 750 - 751 - /* Occasionally prevent srcu_data counter wrap. */ 752 - if (!(gpseq & counter_wrap_check) && last_lvl) 753 - for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { 754 - sdp = per_cpu_ptr(ssp->sda, cpu); 755 - spin_lock_irqsave_rcu_node(sdp, flags); 756 - if (ULONG_CMP_GE(gpseq, 757 - sdp->srcu_gp_seq_needed + 100)) 758 - sdp->srcu_gp_seq_needed = gpseq; 759 - if (ULONG_CMP_GE(gpseq, 760 - sdp->srcu_gp_seq_needed_exp + 100)) 761 - sdp->srcu_gp_seq_needed_exp = gpseq; 762 - spin_unlock_irqrestore_rcu_node(sdp, flags); 763 - } 540 + ss_state = smp_load_acquire(&ssp->srcu_size_state); 541 + if (ss_state < SRCU_SIZE_WAIT_BARRIER) { 542 + srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay); 543 + } else { 544 + idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 545 + srcu_for_each_node_breadth_first(ssp, snp) { 546 + spin_lock_irq_rcu_node(snp); 547 + cbs = false; 548 + last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; 549 + if (last_lvl) 550 + cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq; 551 + snp->srcu_have_cbs[idx] = gpseq; 552 + rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); 553 + sgsne = snp->srcu_gp_seq_needed_exp; 554 + if (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, gpseq)) 555 + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq); 556 + if (ss_state < SRCU_SIZE_BIG) 557 + mask = ~0; 558 + else 559 + mask = snp->srcu_data_have_cbs[idx]; 560 + snp->srcu_data_have_cbs[idx] = 0; 561 + spin_unlock_irq_rcu_node(snp); 562 + if (cbs) 563 + srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); 564 + } 764 565 } 566 + 567 + /* Occasionally prevent srcu_data counter wrap. */ 568 + if (!(gpseq & counter_wrap_check)) 569 + for_each_possible_cpu(cpu) { 570 + sdp = per_cpu_ptr(ssp->sda, cpu); 571 + spin_lock_irqsave_rcu_node(sdp, flags); 572 + if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) 573 + sdp->srcu_gp_seq_needed = gpseq; 574 + if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) 575 + sdp->srcu_gp_seq_needed_exp = gpseq; 576 + spin_unlock_irqrestore_rcu_node(sdp, flags); 577 + } 765 578 766 579 /* Callback initiation done, allow grace periods after next. */ 767 580 mutex_unlock(&ssp->srcu_cb_mutex); ··· 784 583 } else { 785 584 spin_unlock_irq_rcu_node(ssp); 786 585 } 586 + 587 + /* Transition to big if needed. */ 588 + if (ss_state != SRCU_SIZE_SMALL && ss_state != SRCU_SIZE_BIG) { 589 + if (ss_state == SRCU_SIZE_ALLOC) 590 + init_srcu_struct_nodes(ssp, GFP_KERNEL); 591 + else 592 + smp_store_release(&ssp->srcu_size_state, ss_state + 1); 593 + } 787 594 } 788 595 789 596 /* ··· 805 596 unsigned long s) 806 597 { 807 598 unsigned long flags; 599 + unsigned long sgsne; 808 600 809 - for (; snp != NULL; snp = snp->srcu_parent) { 810 - if (rcu_seq_done(&ssp->srcu_gp_seq, s) || 811 - ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) 812 - return; 813 - spin_lock_irqsave_rcu_node(snp, flags); 814 - if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { 601 + if (snp) 602 + for (; snp != NULL; snp = snp->srcu_parent) { 603 + sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp); 604 + if (rcu_seq_done(&ssp->srcu_gp_seq, s) || 605 + (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s))) 606 + return; 607 + spin_lock_irqsave_rcu_node(snp, flags); 608 + sgsne = snp->srcu_gp_seq_needed_exp; 609 + if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) { 610 + spin_unlock_irqrestore_rcu_node(snp, flags); 611 + return; 612 + } 613 + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 815 614 spin_unlock_irqrestore_rcu_node(snp, flags); 816 - return; 817 615 } 818 - WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 819 - spin_unlock_irqrestore_rcu_node(snp, flags); 820 - } 821 - spin_lock_irqsave_rcu_node(ssp, flags); 616 + spin_lock_irqsave_ssp_contention(ssp, &flags); 822 617 if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) 823 618 WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); 824 619 spin_unlock_irqrestore_rcu_node(ssp, flags); ··· 843 630 { 844 631 unsigned long flags; 845 632 int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); 846 - struct srcu_node *snp = sdp->mynode; 633 + unsigned long sgsne; 634 + struct srcu_node *snp; 635 + struct srcu_node *snp_leaf; 847 636 unsigned long snp_seq; 848 637 849 - /* Each pass through the loop does one level of the srcu_node tree. */ 850 - for (; snp != NULL; snp = snp->srcu_parent) { 851 - if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode) 852 - return; /* GP already done and CBs recorded. */ 853 - spin_lock_irqsave_rcu_node(snp, flags); 854 - if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { 638 + /* Ensure that snp node tree is fully initialized before traversing it */ 639 + if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) 640 + snp_leaf = NULL; 641 + else 642 + snp_leaf = sdp->mynode; 643 + 644 + if (snp_leaf) 645 + /* Each pass through the loop does one level of the srcu_node tree. */ 646 + for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { 647 + if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf) 648 + return; /* GP already done and CBs recorded. */ 649 + spin_lock_irqsave_rcu_node(snp, flags); 855 650 snp_seq = snp->srcu_have_cbs[idx]; 856 - if (snp == sdp->mynode && snp_seq == s) 857 - snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 858 - spin_unlock_irqrestore_rcu_node(snp, flags); 859 - if (snp == sdp->mynode && snp_seq != s) { 860 - srcu_schedule_cbs_sdp(sdp, do_norm 861 - ? SRCU_INTERVAL 862 - : 0); 651 + if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) { 652 + if (snp == snp_leaf && snp_seq == s) 653 + snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 654 + spin_unlock_irqrestore_rcu_node(snp, flags); 655 + if (snp == snp_leaf && snp_seq != s) { 656 + srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0); 657 + return; 658 + } 659 + if (!do_norm) 660 + srcu_funnel_exp_start(ssp, snp, s); 863 661 return; 864 662 } 865 - if (!do_norm) 866 - srcu_funnel_exp_start(ssp, snp, s); 867 - return; 663 + snp->srcu_have_cbs[idx] = s; 664 + if (snp == snp_leaf) 665 + snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 666 + sgsne = snp->srcu_gp_seq_needed_exp; 667 + if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s))) 668 + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 669 + spin_unlock_irqrestore_rcu_node(snp, flags); 868 670 } 869 - snp->srcu_have_cbs[idx] = s; 870 - if (snp == sdp->mynode) 871 - snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 872 - if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) 873 - WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 874 - spin_unlock_irqrestore_rcu_node(snp, flags); 875 - } 876 671 877 672 /* Top of tree, must ensure the grace period will be started. */ 878 - spin_lock_irqsave_rcu_node(ssp, flags); 673 + spin_lock_irqsave_ssp_contention(ssp, &flags); 879 674 if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) { 880 675 /* 881 676 * Record need for grace period s. Pair with load ··· 899 678 rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) { 900 679 WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); 901 680 srcu_gp_start(ssp); 681 + 682 + // And how can that list_add() in the "else" clause 683 + // possibly be safe for concurrent execution? Well, 684 + // it isn't. And it does not have to be. After all, it 685 + // can only be executed during early boot when there is only 686 + // the one boot CPU running with interrupts still disabled. 902 687 if (likely(srcu_init_done)) 903 688 queue_delayed_work(rcu_gp_wq, &ssp->work, 904 - srcu_get_delay(ssp)); 689 + !!srcu_get_delay(ssp)); 905 690 else if (list_empty(&ssp->work.work.entry)) 906 691 list_add(&ssp->work.work.entry, &srcu_boot_list); 907 692 } ··· 1041 814 bool needgp = false; 1042 815 unsigned long s; 1043 816 struct srcu_data *sdp; 817 + struct srcu_node *sdp_mynode; 818 + int ss_state; 1044 819 1045 820 check_init_srcu_struct(ssp); 1046 821 idx = srcu_read_lock(ssp); 1047 - sdp = raw_cpu_ptr(ssp->sda); 1048 - spin_lock_irqsave_rcu_node(sdp, flags); 822 + ss_state = smp_load_acquire(&ssp->srcu_size_state); 823 + if (ss_state < SRCU_SIZE_WAIT_CALL) 824 + sdp = per_cpu_ptr(ssp->sda, 0); 825 + else 826 + sdp = raw_cpu_ptr(ssp->sda); 827 + spin_lock_irqsave_sdp_contention(sdp, &flags); 1049 828 if (rhp) 1050 829 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); 1051 830 rcu_segcblist_advance(&sdp->srcu_cblist, ··· 1067 834 needexp = true; 1068 835 } 1069 836 spin_unlock_irqrestore_rcu_node(sdp, flags); 837 + 838 + /* Ensure that snp node tree is fully initialized before traversing it */ 839 + if (ss_state < SRCU_SIZE_WAIT_BARRIER) 840 + sdp_mynode = NULL; 841 + else 842 + sdp_mynode = sdp->mynode; 843 + 1070 844 if (needgp) 1071 845 srcu_funnel_gp_start(ssp, sdp, s, do_norm); 1072 846 else if (needexp) 1073 - srcu_funnel_exp_start(ssp, sdp->mynode, s); 847 + srcu_funnel_exp_start(ssp, sdp_mynode, s); 1074 848 srcu_read_unlock(ssp, idx); 1075 849 return s; 1076 850 } ··· 1337 1097 complete(&ssp->srcu_barrier_completion); 1338 1098 } 1339 1099 1100 + /* 1101 + * Enqueue an srcu_barrier() callback on the specified srcu_data 1102 + * structure's ->cblist. but only if that ->cblist already has at least one 1103 + * callback enqueued. Note that if a CPU already has callbacks enqueue, 1104 + * it must have already registered the need for a future grace period, 1105 + * so all we need do is enqueue a callback that will use the same grace 1106 + * period as the last callback already in the queue. 1107 + */ 1108 + static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) 1109 + { 1110 + spin_lock_irq_rcu_node(sdp); 1111 + atomic_inc(&ssp->srcu_barrier_cpu_cnt); 1112 + sdp->srcu_barrier_head.func = srcu_barrier_cb; 1113 + debug_rcu_head_queue(&sdp->srcu_barrier_head); 1114 + if (!rcu_segcblist_entrain(&sdp->srcu_cblist, 1115 + &sdp->srcu_barrier_head)) { 1116 + debug_rcu_head_unqueue(&sdp->srcu_barrier_head); 1117 + atomic_dec(&ssp->srcu_barrier_cpu_cnt); 1118 + } 1119 + spin_unlock_irq_rcu_node(sdp); 1120 + } 1121 + 1340 1122 /** 1341 1123 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. 1342 1124 * @ssp: srcu_struct on which to wait for in-flight callbacks. ··· 1366 1104 void srcu_barrier(struct srcu_struct *ssp) 1367 1105 { 1368 1106 int cpu; 1369 - struct srcu_data *sdp; 1107 + int idx; 1370 1108 unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq); 1371 1109 1372 1110 check_init_srcu_struct(ssp); ··· 1382 1120 /* Initial count prevents reaching zero until all CBs are posted. */ 1383 1121 atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); 1384 1122 1385 - /* 1386 - * Each pass through this loop enqueues a callback, but only 1387 - * on CPUs already having callbacks enqueued. Note that if 1388 - * a CPU already has callbacks enqueue, it must have already 1389 - * registered the need for a future grace period, so all we 1390 - * need do is enqueue a callback that will use the same 1391 - * grace period as the last callback already in the queue. 1392 - */ 1393 - for_each_possible_cpu(cpu) { 1394 - sdp = per_cpu_ptr(ssp->sda, cpu); 1395 - spin_lock_irq_rcu_node(sdp); 1396 - atomic_inc(&ssp->srcu_barrier_cpu_cnt); 1397 - sdp->srcu_barrier_head.func = srcu_barrier_cb; 1398 - debug_rcu_head_queue(&sdp->srcu_barrier_head); 1399 - if (!rcu_segcblist_entrain(&sdp->srcu_cblist, 1400 - &sdp->srcu_barrier_head)) { 1401 - debug_rcu_head_unqueue(&sdp->srcu_barrier_head); 1402 - atomic_dec(&ssp->srcu_barrier_cpu_cnt); 1403 - } 1404 - spin_unlock_irq_rcu_node(sdp); 1405 - } 1123 + idx = srcu_read_lock(ssp); 1124 + if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) 1125 + srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); 1126 + else 1127 + for_each_possible_cpu(cpu) 1128 + srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); 1129 + srcu_read_unlock(ssp, idx); 1406 1130 1407 1131 /* Remove the initial count, at which point reaching zero can happen. */ 1408 1132 if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) ··· 1462 1214 srcu_flip(ssp); 1463 1215 spin_lock_irq_rcu_node(ssp); 1464 1216 rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); 1217 + ssp->srcu_n_exp_nodelay = 0; 1465 1218 spin_unlock_irq_rcu_node(ssp); 1466 1219 } 1467 1220 ··· 1477 1228 mutex_unlock(&ssp->srcu_gp_mutex); 1478 1229 return; /* readers present, retry later. */ 1479 1230 } 1231 + ssp->srcu_n_exp_nodelay = 0; 1480 1232 srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ 1481 1233 } 1482 1234 } ··· 1568 1318 */ 1569 1319 static void process_srcu(struct work_struct *work) 1570 1320 { 1321 + unsigned long curdelay; 1322 + unsigned long j; 1571 1323 struct srcu_struct *ssp; 1572 1324 1573 1325 ssp = container_of(work, struct srcu_struct, work.work); 1574 1326 1575 1327 srcu_advance_state(ssp); 1576 - srcu_reschedule(ssp, srcu_get_delay(ssp)); 1328 + curdelay = srcu_get_delay(ssp); 1329 + if (curdelay) { 1330 + WRITE_ONCE(ssp->reschedule_count, 0); 1331 + } else { 1332 + j = jiffies; 1333 + if (READ_ONCE(ssp->reschedule_jiffies) == j) { 1334 + WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1); 1335 + if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY) 1336 + curdelay = 1; 1337 + } else { 1338 + WRITE_ONCE(ssp->reschedule_count, 1); 1339 + WRITE_ONCE(ssp->reschedule_jiffies, j); 1340 + } 1341 + } 1342 + srcu_reschedule(ssp, curdelay); 1577 1343 } 1578 1344 1579 1345 void srcutorture_get_gp_data(enum rcutorture_type test_type, ··· 1603 1337 } 1604 1338 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1605 1339 1340 + static const char * const srcu_size_state_name[] = { 1341 + "SRCU_SIZE_SMALL", 1342 + "SRCU_SIZE_ALLOC", 1343 + "SRCU_SIZE_WAIT_BARRIER", 1344 + "SRCU_SIZE_WAIT_CALL", 1345 + "SRCU_SIZE_WAIT_CBS1", 1346 + "SRCU_SIZE_WAIT_CBS2", 1347 + "SRCU_SIZE_WAIT_CBS3", 1348 + "SRCU_SIZE_WAIT_CBS4", 1349 + "SRCU_SIZE_BIG", 1350 + "SRCU_SIZE_???", 1351 + }; 1352 + 1606 1353 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) 1607 1354 { 1608 1355 int cpu; 1609 1356 int idx; 1610 1357 unsigned long s0 = 0, s1 = 0; 1358 + int ss_state = READ_ONCE(ssp->srcu_size_state); 1359 + int ss_state_idx = ss_state; 1611 1360 1612 1361 idx = ssp->srcu_idx & 0x1; 1613 - pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):", 1614 - tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx); 1615 - for_each_possible_cpu(cpu) { 1616 - unsigned long l0, l1; 1617 - unsigned long u0, u1; 1618 - long c0, c1; 1619 - struct srcu_data *sdp; 1362 + if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name)) 1363 + ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1; 1364 + pr_alert("%s%s Tree SRCU g%ld state %d (%s)", 1365 + tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state, 1366 + srcu_size_state_name[ss_state_idx]); 1367 + if (!ssp->sda) { 1368 + // Called after cleanup_srcu_struct(), perhaps. 1369 + pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n"); 1370 + } else { 1371 + pr_cont(" per-CPU(idx=%d):", idx); 1372 + for_each_possible_cpu(cpu) { 1373 + unsigned long l0, l1; 1374 + unsigned long u0, u1; 1375 + long c0, c1; 1376 + struct srcu_data *sdp; 1620 1377 1621 - sdp = per_cpu_ptr(ssp->sda, cpu); 1622 - u0 = data_race(sdp->srcu_unlock_count[!idx]); 1623 - u1 = data_race(sdp->srcu_unlock_count[idx]); 1378 + sdp = per_cpu_ptr(ssp->sda, cpu); 1379 + u0 = data_race(sdp->srcu_unlock_count[!idx]); 1380 + u1 = data_race(sdp->srcu_unlock_count[idx]); 1624 1381 1625 - /* 1626 - * Make sure that a lock is always counted if the corresponding 1627 - * unlock is counted. 1628 - */ 1629 - smp_rmb(); 1382 + /* 1383 + * Make sure that a lock is always counted if the corresponding 1384 + * unlock is counted. 1385 + */ 1386 + smp_rmb(); 1630 1387 1631 - l0 = data_race(sdp->srcu_lock_count[!idx]); 1632 - l1 = data_race(sdp->srcu_lock_count[idx]); 1388 + l0 = data_race(sdp->srcu_lock_count[!idx]); 1389 + l1 = data_race(sdp->srcu_lock_count[idx]); 1633 1390 1634 - c0 = l0 - u0; 1635 - c1 = l1 - u1; 1636 - pr_cont(" %d(%ld,%ld %c)", 1637 - cpu, c0, c1, 1638 - "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]); 1639 - s0 += c0; 1640 - s1 += c1; 1391 + c0 = l0 - u0; 1392 + c1 = l1 - u1; 1393 + pr_cont(" %d(%ld,%ld %c)", 1394 + cpu, c0, c1, 1395 + "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]); 1396 + s0 += c0; 1397 + s1 += c1; 1398 + } 1399 + pr_cont(" T(%ld,%ld)\n", s0, s1); 1641 1400 } 1642 - pr_cont(" T(%ld,%ld)\n", s0, s1); 1401 + if (SRCU_SIZING_IS_TORTURE()) 1402 + srcu_transition_to_big(ssp); 1643 1403 } 1644 1404 EXPORT_SYMBOL_GPL(srcu_torture_stats_print); 1645 1405 ··· 1682 1390 { 1683 1391 struct srcu_struct *ssp; 1684 1392 1393 + /* Decide on srcu_struct-size strategy. */ 1394 + if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) { 1395 + if (nr_cpu_ids >= big_cpu_lim) { 1396 + convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention. 1397 + pr_info("%s: Setting srcu_struct sizes to big.\n", __func__); 1398 + } else { 1399 + convert_to_big = SRCU_SIZING_NONE | SRCU_SIZING_CONTEND; 1400 + pr_info("%s: Setting srcu_struct sizes based on contention.\n", __func__); 1401 + } 1402 + } 1403 + 1685 1404 /* 1686 1405 * Once that is set, call_srcu() can follow the normal path and 1687 1406 * queue delayed work. This must follow RCU workqueues creation ··· 1703 1400 ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, 1704 1401 work.work.entry); 1705 1402 list_del_init(&ssp->work.work.entry); 1403 + if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL) 1404 + ssp->srcu_size_state = SRCU_SIZE_ALLOC; 1706 1405 queue_work(rcu_gp_wq, &ssp->work.work); 1707 1406 } 1708 1407 }

+1 -1

kernel/rcu/sync.c

··· 111 111 * a slowpath during the update. After this function returns, all 112 112 * subsequent calls to rcu_sync_is_idle() will return false, which 113 113 * tells readers to stay off their fastpaths. A later call to 114 - * rcu_sync_exit() re-enables reader slowpaths. 114 + * rcu_sync_exit() re-enables reader fastpaths. 115 115 * 116 116 * When called in isolation, rcu_sync_enter() must wait for a grace 117 117 * period, however, closely spaced calls to rcu_sync_enter() can

+72 -17

kernel/rcu/tasks.h

··· 46 46 47 47 /** 48 48 * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism. 49 - * @cbs_wq: Wait queue allowing new callback to get kthread's attention. 49 + * @cbs_wait: RCU wait allowing a new callback to get kthread's attention. 50 50 * @cbs_gbl_lock: Lock protecting callback list. 51 51 * @kthread_ptr: This flavor's grace-period/callback-invocation kthread. 52 52 * @gp_func: This flavor's grace-period-wait function. ··· 77 77 * @kname: This flavor's kthread name. 78 78 */ 79 79 struct rcu_tasks { 80 - struct wait_queue_head cbs_wq; 80 + struct rcuwait cbs_wait; 81 81 raw_spinlock_t cbs_gbl_lock; 82 82 int gp_state; 83 83 int gp_sleep; ··· 113 113 #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \ 114 114 static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \ 115 115 .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \ 116 - .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \ 116 + .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), \ 117 117 }; \ 118 118 static struct rcu_tasks rt_name = \ 119 119 { \ 120 - .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \ 120 + .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait), \ 121 121 .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \ 122 122 .gp_func = gp, \ 123 123 .call_func = call, \ ··· 143 143 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) 144 144 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; 145 145 module_param(rcu_task_stall_timeout, int, 0644); 146 + #define RCU_TASK_STALL_INFO (HZ * 10) 147 + static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO; 148 + module_param(rcu_task_stall_info, int, 0644); 149 + static int rcu_task_stall_info_mult __read_mostly = 3; 150 + module_param(rcu_task_stall_info_mult, int, 0444); 146 151 147 152 static int rcu_task_enqueue_lim __read_mostly = -1; 148 153 module_param(rcu_task_enqueue_lim, int, 0444); ··· 266 261 struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work); 267 262 268 263 rtp = rtpcp->rtpp; 269 - wake_up(&rtp->cbs_wq); 264 + rcuwait_wake_up(&rtp->cbs_wait); 270 265 } 271 266 272 267 // Enqueue a callback for the specified flavor of Tasks RCU. 273 268 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, 274 269 struct rcu_tasks *rtp) 275 270 { 271 + int chosen_cpu; 276 272 unsigned long flags; 273 + int ideal_cpu; 277 274 unsigned long j; 278 275 bool needadjust = false; 279 276 bool needwake; ··· 285 278 rhp->func = func; 286 279 local_irq_save(flags); 287 280 rcu_read_lock(); 288 - rtpcp = per_cpu_ptr(rtp->rtpcpu, 289 - smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift)); 281 + ideal_cpu = smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift); 282 + chosen_cpu = cpumask_next(ideal_cpu - 1, cpu_possible_mask); 283 + rtpcp = per_cpu_ptr(rtp->rtpcpu, chosen_cpu); 290 284 if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled. 291 285 raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. 292 286 j = jiffies; ··· 468 460 } 469 461 } 470 462 471 - if (rcu_segcblist_empty(&rtpcp->cblist)) 463 + if (rcu_segcblist_empty(&rtpcp->cblist) || !cpu_possible(cpu)) 472 464 return; 473 465 raw_spin_lock_irqsave_rcu_node(rtpcp, flags); 474 466 rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq)); ··· 517 509 set_tasks_gp_state(rtp, RTGS_WAIT_CBS); 518 510 519 511 /* If there were none, wait a bit and start over. */ 520 - wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp))); 512 + rcuwait_wait_event(&rtp->cbs_wait, 513 + (needgpcb = rcu_tasks_need_gpcb(rtp)), 514 + TASK_IDLE); 521 515 522 516 if (needgpcb & 0x2) { 523 517 // Wait for one grace period. ··· 558 548 static void __init rcu_tasks_bootup_oddness(void) 559 549 { 560 550 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) 551 + int rtsimc; 552 + 561 553 if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT) 562 554 pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout); 555 + rtsimc = clamp(rcu_task_stall_info_mult, 1, 10); 556 + if (rtsimc != rcu_task_stall_info_mult) { 557 + pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc); 558 + rcu_task_stall_info_mult = rtsimc; 559 + } 563 560 #endif /* #ifdef CONFIG_TASKS_RCU */ 564 561 #ifdef CONFIG_TASKS_RCU 565 562 pr_info("\tTrampoline variant of Tasks RCU enabled.\n"); ··· 585 568 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */ 586 569 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s) 587 570 { 588 - struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each... 571 + int cpu; 572 + bool havecbs = false; 573 + 574 + for_each_possible_cpu(cpu) { 575 + struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); 576 + 577 + if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) { 578 + havecbs = true; 579 + break; 580 + } 581 + } 589 582 pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n", 590 583 rtp->kname, 591 584 tasks_gp_state_getname(rtp), data_race(rtp->gp_state), ··· 603 576 data_race(rcu_seq_current(&rtp->tasks_gp_seq)), 604 577 data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis), 605 578 ".k"[!!data_race(rtp->kthread_ptr)], 606 - ".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))], 579 + ".C"[havecbs], 607 580 s); 608 581 } 609 582 #endif // #ifndef CONFIG_TINY_RCU ··· 619 592 /* Wait for one RCU-tasks grace period. */ 620 593 static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) 621 594 { 622 - struct task_struct *g, *t; 623 - unsigned long lastreport; 624 - LIST_HEAD(holdouts); 595 + struct task_struct *g; 625 596 int fract; 597 + LIST_HEAD(holdouts); 598 + unsigned long j; 599 + unsigned long lastinfo; 600 + unsigned long lastreport; 601 + bool reported = false; 602 + int rtsi; 603 + struct task_struct *t; 626 604 627 605 set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP); 628 606 rtp->pregp_func(); ··· 653 621 * is empty, we are done. 654 622 */ 655 623 lastreport = jiffies; 624 + lastinfo = lastreport; 625 + rtsi = READ_ONCE(rcu_task_stall_info); 656 626 657 627 // Start off with initial wait and slowly back off to 1 HZ wait. 658 628 fract = rtp->init_fract; 659 629 660 630 while (!list_empty(&holdouts)) { 631 + ktime_t exp; 661 632 bool firstreport; 662 633 bool needreport; 663 634 int rtst; 664 635 665 - /* Slowly back off waiting for holdouts */ 636 + // Slowly back off waiting for holdouts 666 637 set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS); 667 - schedule_timeout_idle(fract); 638 + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 639 + schedule_timeout_idle(fract); 640 + } else { 641 + exp = jiffies_to_nsecs(fract); 642 + __set_current_state(TASK_IDLE); 643 + schedule_hrtimeout_range(&exp, jiffies_to_nsecs(HZ / 2), HRTIMER_MODE_REL_HARD); 644 + } 668 645 669 646 if (fract < HZ) 670 647 fract++; 671 648 672 649 rtst = READ_ONCE(rcu_task_stall_timeout); 673 650 needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); 674 - if (needreport) 651 + if (needreport) { 675 652 lastreport = jiffies; 653 + reported = true; 654 + } 676 655 firstreport = true; 677 656 WARN_ON(signal_pending(current)); 678 657 set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS); 679 658 rtp->holdouts_func(&holdouts, needreport, &firstreport); 659 + 660 + // Print pre-stall informational messages if needed. 661 + j = jiffies; 662 + if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) { 663 + lastinfo = j; 664 + rtsi = rtsi * rcu_task_stall_info_mult; 665 + pr_info("%s: %s grace period %lu is %lu jiffies old.\n", 666 + __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start); 667 + } 680 668 } 681 669 682 670 set_tasks_gp_state(rtp, RTGS_POST_GP); ··· 1002 950 // Wait for one rude RCU-tasks grace period. 1003 951 static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp) 1004 952 { 953 + if (num_online_cpus() <= 1) 954 + return; // Fastpath for only one CPU. 955 + 1005 956 rtp->n_ipis += cpumask_weight(cpu_online_mask); 1006 957 schedule_on_each_cpu(rcu_tasks_be_rude); 1007 958 }

+78 -17

kernel/rcu/tree.c

··· 1679 1679 rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ 1680 1680 if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) 1681 1681 WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); 1682 + if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap)) 1683 + WRITE_ONCE(rdp->last_sched_clock, jiffies); 1682 1684 WRITE_ONCE(rdp->gpwrap, false); 1683 1685 rcu_gpnum_ovf(rnp, rdp); 1684 1686 return ret; ··· 1707 1705 rcu_gp_kthread_wake(); 1708 1706 } 1709 1707 1708 + static atomic_t *rcu_gp_slow_suppress; 1709 + 1710 + /* Register a counter to suppress debugging grace-period delays. */ 1711 + void rcu_gp_slow_register(atomic_t *rgssp) 1712 + { 1713 + WARN_ON_ONCE(rcu_gp_slow_suppress); 1714 + 1715 + WRITE_ONCE(rcu_gp_slow_suppress, rgssp); 1716 + } 1717 + EXPORT_SYMBOL_GPL(rcu_gp_slow_register); 1718 + 1719 + /* Unregister a counter, with NULL for not caring which. */ 1720 + void rcu_gp_slow_unregister(atomic_t *rgssp) 1721 + { 1722 + WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress); 1723 + 1724 + WRITE_ONCE(rcu_gp_slow_suppress, NULL); 1725 + } 1726 + EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister); 1727 + 1728 + static bool rcu_gp_slow_is_suppressed(void) 1729 + { 1730 + atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress); 1731 + 1732 + return rgssp && atomic_read(rgssp); 1733 + } 1734 + 1710 1735 static void rcu_gp_slow(int delay) 1711 1736 { 1712 - if (delay > 0 && 1713 - !(rcu_seq_ctr(rcu_state.gp_seq) % 1714 - (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) 1737 + if (!rcu_gp_slow_is_suppressed() && delay > 0 && 1738 + !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) 1715 1739 schedule_timeout_idle(delay); 1716 1740 } 1717 1741 ··· 2124 2096 /* Advance CBs to reduce false positives below. */ 2125 2097 offloaded = rcu_rdp_is_offloaded(rdp); 2126 2098 if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) { 2099 + 2100 + // We get here if a grace period was needed (“needgp”) 2101 + // and the above call to rcu_accelerate_cbs() did not set 2102 + // the RCU_GP_FLAG_INIT bit in ->gp_state (which records 2103 + // the need for another grace period). The purpose 2104 + // of the “offloaded” check is to avoid invoking 2105 + // rcu_accelerate_cbs() on an offloaded CPU because we do not 2106 + // hold the ->nocb_lock needed to safely access an offloaded 2107 + // ->cblist. We do not want to acquire that lock because 2108 + // it can be heavily contended during callback floods. 2109 + 2127 2110 WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT); 2128 2111 WRITE_ONCE(rcu_state.gp_req_activity, jiffies); 2129 - trace_rcu_grace_period(rcu_state.name, 2130 - rcu_state.gp_seq, 2131 - TPS("newreq")); 2112 + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq")); 2132 2113 } else { 2133 - WRITE_ONCE(rcu_state.gp_flags, 2134 - rcu_state.gp_flags & RCU_GP_FLAG_INIT); 2114 + 2115 + // We get here either if there is no need for an 2116 + // additional grace period or if rcu_accelerate_cbs() has 2117 + // already set the RCU_GP_FLAG_INIT bit in ->gp_flags. 2118 + // So all we need to do is to clear all of the other 2119 + // ->gp_flags bits. 2120 + 2121 + WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT); 2135 2122 } 2136 2123 raw_spin_unlock_irq_rcu_node(rnp); 2137 2124 ··· 2652 2609 */ 2653 2610 void rcu_sched_clock_irq(int user) 2654 2611 { 2612 + unsigned long j; 2613 + 2614 + if (IS_ENABLED(CONFIG_PROVE_RCU)) { 2615 + j = jiffies; 2616 + WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock))); 2617 + __this_cpu_write(rcu_data.last_sched_clock, j); 2618 + } 2655 2619 trace_rcu_utilization(TPS("Start scheduler-tick")); 2656 2620 lockdep_assert_irqs_disabled(); 2657 2621 raw_cpu_inc(rcu_data.ticks_this_gp); ··· 2674 2624 rcu_flavor_sched_clock_irq(user); 2675 2625 if (rcu_pending(user)) 2676 2626 invoke_rcu_core(); 2627 + if (user) 2628 + rcu_tasks_classic_qs(current, false); 2677 2629 lockdep_assert_irqs_disabled(); 2678 2630 2679 2631 trace_rcu_utilization(TPS("End scheduler-tick")); ··· 3769 3717 { 3770 3718 int ret; 3771 3719 3772 - if (IS_ENABLED(CONFIG_PREEMPTION)) 3720 + // Invoking preempt_model_*() too early gets a splat. 3721 + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE || 3722 + preempt_model_full() || preempt_model_rt()) 3773 3723 return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE; 3774 3724 might_sleep(); /* Check for RCU read-side critical section. */ 3775 3725 preempt_disable(); ··· 4233 4179 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; 4234 4180 rdp->rcu_onl_gp_seq = rcu_state.gp_seq; 4235 4181 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; 4182 + rdp->last_sched_clock = jiffies; 4236 4183 rdp->cpu = cpu; 4237 4184 rcu_boot_init_nocb_percpu_data(rdp); 4238 4185 } ··· 4535 4480 struct rcu_node *rnp; 4536 4481 struct sched_param sp; 4537 4482 struct task_struct *t; 4483 + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); 4538 4484 4539 4485 rcu_scheduler_fully_active = 1; 4540 4486 t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); ··· 4553 4497 smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */ 4554 4498 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 4555 4499 wake_up_process(t); 4556 - rcu_spawn_nocb_kthreads(); 4557 - rcu_spawn_boost_kthreads(); 4500 + /* This is a pre-SMP initcall, we expect a single CPU */ 4501 + WARN_ON(num_online_cpus() > 1); 4502 + /* 4503 + * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu() 4504 + * due to rcu_scheduler_fully_active. 4505 + */ 4506 + rcu_spawn_cpu_nocb_kthread(smp_processor_id()); 4507 + rcu_spawn_one_boost_kthread(rdp->mynode); 4558 4508 rcu_spawn_core_kthreads(); 4559 4509 return 0; 4560 4510 } ··· 4844 4782 4845 4783 void __init rcu_init(void) 4846 4784 { 4847 - int cpu; 4785 + int cpu = smp_processor_id(); 4848 4786 4849 4787 rcu_early_boot_tests(); 4850 4788 ··· 4864 4802 * or the scheduler are operational. 4865 4803 */ 4866 4804 pm_notifier(rcu_pm_notify, 0); 4867 - for_each_online_cpu(cpu) { 4868 - rcutree_prepare_cpu(cpu); 4869 - rcu_cpu_starting(cpu); 4870 - rcutree_online_cpu(cpu); 4871 - } 4805 + WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot. 4806 + rcutree_prepare_cpu(cpu); 4807 + rcu_cpu_starting(cpu); 4808 + rcutree_online_cpu(cpu); 4872 4809 4873 4810 /* Create workqueue for Tree SRCU and for expedited GPs. */ 4874 4811 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);

+2 -2

kernel/rcu/tree.h

··· 254 254 unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */ 255 255 short rcu_onl_gp_flags; /* ->gp_flags at last online. */ 256 256 unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ 257 + unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ 257 258 258 259 int cpu; 259 260 }; ··· 365 364 arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; 366 365 /* Synchronize offline with */ 367 366 /* GP pre-initialization. */ 367 + int nocb_is_setup; /* nocb is setup from boot */ 368 368 }; 369 369 370 370 /* Values for rcu_state structure's gp_flags field. */ ··· 423 421 static bool rcu_is_callbacks_kthread(void); 424 422 static void rcu_cpu_kthread_setup(unsigned int cpu); 425 423 static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp); 426 - static void __init rcu_spawn_boost_kthreads(void); 427 424 static bool rcu_preempt_has_tasks(struct rcu_node *rnp); 428 425 static bool rcu_preempt_need_deferred_qs(struct task_struct *t); 429 426 static void rcu_preempt_deferred_qs(struct task_struct *t); ··· 440 439 static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); 441 440 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 442 441 static void rcu_spawn_cpu_nocb_kthread(int cpu); 443 - static void __init rcu_spawn_nocb_kthreads(void); 444 442 static void show_rcu_nocb_state(struct rcu_data *rdp); 445 443 static void rcu_nocb_lock(struct rcu_data *rdp); 446 444 static void rcu_nocb_unlock(struct rcu_data *rdp);

+4 -35

kernel/rcu/tree_nocb.h

··· 60 60 * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. 61 61 * If the list is invalid, a warning is emitted and all CPUs are offloaded. 62 62 */ 63 - 64 - static bool rcu_nocb_is_setup; 65 - 66 63 static int __init rcu_nocb_setup(char *str) 67 64 { 68 65 alloc_bootmem_cpumask_var(&rcu_nocb_mask); ··· 69 72 cpumask_setall(rcu_nocb_mask); 70 73 } 71 74 } 72 - rcu_nocb_is_setup = true; 75 + rcu_state.nocb_is_setup = true; 73 76 return 1; 74 77 } 75 78 __setup("rcu_nocbs", rcu_nocb_setup); ··· 210 213 { 211 214 init_swait_queue_head(&rnp->nocb_gp_wq[0]); 212 215 init_swait_queue_head(&rnp->nocb_gp_wq[1]); 213 - } 214 - 215 - /* Is the specified CPU a no-CBs CPU? */ 216 - bool rcu_is_nocb_cpu(int cpu) 217 - { 218 - if (cpumask_available(rcu_nocb_mask)) 219 - return cpumask_test_cpu(cpu, rcu_nocb_mask); 220 - return false; 221 216 } 222 217 223 218 static bool __wake_nocb_gp(struct rcu_data *rdp_gp, ··· 1169 1180 return; 1170 1181 } 1171 1182 } 1172 - rcu_nocb_is_setup = true; 1183 + rcu_state.nocb_is_setup = true; 1173 1184 } 1174 1185 1175 - if (!rcu_nocb_is_setup) 1186 + if (!rcu_state.nocb_is_setup) 1176 1187 return; 1177 1188 1178 1189 #if defined(CONFIG_NO_HZ_FULL) ··· 1230 1241 struct task_struct *t; 1231 1242 struct sched_param sp; 1232 1243 1233 - if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup) 1244 + if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup) 1234 1245 return; 1235 1246 1236 1247 /* If there already is an rcuo kthread, then nothing to do. */ ··· 1264 1275 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1265 1276 WRITE_ONCE(rdp->nocb_cb_kthread, t); 1266 1277 WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); 1267 - } 1268 - 1269 - /* 1270 - * Once the scheduler is running, spawn rcuo kthreads for all online 1271 - * no-CBs CPUs. This assumes that the early_initcall()s happen before 1272 - * non-boot CPUs come online -- if this changes, we will need to add 1273 - * some mutual exclusion. 1274 - */ 1275 - static void __init rcu_spawn_nocb_kthreads(void) 1276 - { 1277 - int cpu; 1278 - 1279 - if (rcu_nocb_is_setup) { 1280 - for_each_online_cpu(cpu) 1281 - rcu_spawn_cpu_nocb_kthread(cpu); 1282 - } 1283 1278 } 1284 1279 1285 1280 /* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */ ··· 1519 1546 } 1520 1547 1521 1548 static void rcu_spawn_cpu_nocb_kthread(int cpu) 1522 - { 1523 - } 1524 - 1525 - static void __init rcu_spawn_nocb_kthreads(void) 1526 1549 { 1527 1550 } 1528 1551

+10 -18

kernel/rcu/tree_plugin.h

··· 486 486 t->rcu_read_unlock_special.s = 0; 487 487 if (special.b.need_qs) { 488 488 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) { 489 + rdp->cpu_no_qs.b.norm = false; 489 490 rcu_report_qs_rdp(rdp); 490 491 udelay(rcu_unlock_delay); 491 492 } else { ··· 661 660 expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) { 662 661 // Get scheduler to re-evaluate and call hooks. 663 662 // If !IRQ_WORK, FQS scan will eventually IPI. 664 - init_irq_work(&rdp->defer_qs_iw, rcu_preempt_deferred_qs_handler); 663 + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && 664 + IS_ENABLED(CONFIG_PREEMPT_RT)) 665 + rdp->defer_qs_iw = IRQ_WORK_INIT_HARD( 666 + rcu_preempt_deferred_qs_handler); 667 + else 668 + init_irq_work(&rdp->defer_qs_iw, 669 + rcu_preempt_deferred_qs_handler); 665 670 rdp->defer_qs_iw_pending = true; 666 671 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu); 667 672 } ··· 1131 1124 __releases(rnp->lock) 1132 1125 { 1133 1126 raw_lockdep_assert_held_rcu_node(rnp); 1134 - if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1127 + if (!rnp->boost_kthread_task || 1128 + (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) { 1135 1129 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1136 1130 return; 1137 1131 } ··· 1234 1226 free_cpumask_var(cm); 1235 1227 } 1236 1228 1237 - /* 1238 - * Spawn boost kthreads -- called as soon as the scheduler is running. 1239 - */ 1240 - static void __init rcu_spawn_boost_kthreads(void) 1241 - { 1242 - struct rcu_node *rnp; 1243 - 1244 - rcu_for_each_leaf_node(rnp) 1245 - if (rcu_rnp_online_cpus(rnp)) 1246 - rcu_spawn_one_boost_kthread(rnp); 1247 - } 1248 - 1249 1229 #else /* #ifdef CONFIG_RCU_BOOST */ 1250 1230 1251 1231 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) ··· 1256 1260 } 1257 1261 1258 1262 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1259 - { 1260 - } 1261 - 1262 - static void __init rcu_spawn_boost_kthreads(void) 1263 1263 { 1264 1264 } 1265 1265

+4 -4

kernel/rcu/tree_stall.h

··· 565 565 566 566 for_each_possible_cpu(cpu) 567 567 totqlen += rcu_get_n_cbs_cpu(cpu); 568 - pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", 568 + pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n", 569 569 smp_processor_id(), (long)(jiffies - gps), 570 - (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); 570 + (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); 571 571 if (ndetected) { 572 572 rcu_dump_cpu_stacks(); 573 573 ··· 626 626 raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); 627 627 for_each_possible_cpu(cpu) 628 628 totqlen += rcu_get_n_cbs_cpu(cpu); 629 - pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n", 629 + pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n", 630 630 jiffies - gps, 631 - (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); 631 + (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); 632 632 633 633 rcu_check_gp_kthread_expired_fqs_timer(); 634 634 rcu_check_gp_kthread_starvation();

+3 -2

kernel/scftorture.c

··· 267 267 } 268 268 this_cpu_inc(scf_invoked_count); 269 269 if (longwait <= 0) { 270 - if (!(r & 0xffc0)) 270 + if (!(r & 0xffc0)) { 271 271 udelay(r & 0x3f); 272 - goto out; 272 + goto out; 273 + } 273 274 } 274 275 if (r & 0xfff) 275 276 goto out;

+12

kernel/sched/core.c

··· 8409 8409 } 8410 8410 } 8411 8411 8412 + #define PREEMPT_MODEL_ACCESSOR(mode) \ 8413 + bool preempt_model_##mode(void) \ 8414 + { \ 8415 + WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \ 8416 + return preempt_dynamic_mode == preempt_dynamic_##mode; \ 8417 + } \ 8418 + EXPORT_SYMBOL_GPL(preempt_model_##mode) 8419 + 8420 + PREEMPT_MODEL_ACCESSOR(none); 8421 + PREEMPT_MODEL_ACCESSOR(voluntary); 8422 + PREEMPT_MODEL_ACCESSOR(full); 8423 + 8412 8424 #else /* !CONFIG_PREEMPT_DYNAMIC */ 8413 8425 8414 8426 static inline void preempt_dynamic_init(void) { }

+5 -2

kernel/smp.c

··· 183 183 static DEFINE_PER_CPU(void *, cur_csd_info); 184 184 static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local); 185 185 186 - #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) 186 + static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ 187 + module_param(csd_lock_timeout, ulong, 0444); 188 + 187 189 static atomic_t csd_bug_count = ATOMIC_INIT(0); 188 190 static u64 cfd_seq; 189 191 ··· 331 329 u64 ts2, ts_delta; 332 330 call_single_data_t *cpu_cur_csd; 333 331 unsigned int flags = READ_ONCE(csd->node.u_flags); 332 + unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC; 334 333 335 334 if (!(flags & CSD_FLAG_LOCK)) { 336 335 if (!unlikely(*bug_id)) ··· 344 341 345 342 ts2 = sched_clock(); 346 343 ts_delta = ts2 - *ts1; 347 - if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) 344 + if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0)) 348 345 return false; 349 346 350 347 firsttime = !*bug_id;

+1

kernel/trace/Kconfig

··· 144 144 select BINARY_PRINTF 145 145 select EVENT_TRACING 146 146 select TRACE_CLOCK 147 + select TASKS_RCU if PREEMPTION 147 148 148 149 config GENERIC_TRACER 149 150 bool

+1 -1

tools/testing/selftests/rcutorture/bin/functions.sh

··· 301 301 echo $2 -smp $3 302 302 ;; 303 303 qemu-system-ppc64) 304 - nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,$[0-9]*$,.*$/\1/'`" 304 + nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`" 305 305 echo $2 -smp cores=`expr $ $3 + $nt - 1 $ / $nt`,threads=$nt 306 306 ;; 307 307 esac

+1 -1

tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh

··· 36 36 then 37 37 egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags 38 38 files="$files $i.diags $i" 39 - elif ! test -f ${scenariobasedir}/vmlinux 39 + elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run" 40 40 then 41 41 echo No ${scenariobasedir}/vmlinux file > $i.diags 42 42 files="$files $i.diags $i"

+6 -1

tools/testing/selftests/rcutorture/bin/kvm-recheck.sh

··· 33 33 TORTURE_SUITE="`cat $i/../torture_suite`" 34 34 configfile=`echo $i | sed -e 's,^.*/,,'` 35 35 rm -f $i/console.log.*.diags 36 - kvm-recheck-${TORTURE_SUITE}.sh $i 36 + case "${TORTURE_SUITE}" in 37 + X*) 38 + ;; 39 + *) 40 + kvm-recheck-${TORTURE_SUITE}.sh $i 41 + esac 37 42 if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137 38 43 then 39 44 echo QEMU error, output:

+7 -7

tools/testing/selftests/rcutorture/bin/kvm-remote.sh

··· 138 138 # Check first to avoid the need for cleanup for system-name typos 139 139 for i in $systems 140 140 do 141 - ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`" 142 - echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" 141 + ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`" 143 142 ret=$? 144 143 if test "$ret" -ne 0 145 144 then 146 145 echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" 147 146 exit 4 148 147 fi 148 + echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" 149 149 done 150 150 151 151 # Download and expand the tarball on all systems. ··· 153 153 for i in $systems 154 154 do 155 155 echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" 156 - cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" 156 + cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -" 157 157 ret=$? 158 158 tries=0 159 159 while test "$ret" -ne 0 160 160 do 161 161 echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log" 162 162 sleep 60 163 - cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" 163 + cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -" 164 164 ret=$? 165 165 if test "$ret" -ne 0 166 166 then ··· 185 185 186 186 while : 187 187 do 188 - ssh $1 "test -f \"$2\"" 188 + ssh -o BatchMode=yes $1 "test -f \"$2\"" 189 189 ret=$? 190 190 if test "$ret" -eq 255 191 191 then ··· 228 228 then 229 229 continue # System still running last test, skip. 230 230 fi 231 - ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2 231 + ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2 232 232 ret=$? 233 233 if test "$ret" -ne 0 234 234 then ··· 267 267 sleep 30 268 268 done 269 269 echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" 270 - ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) 270 + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) 271 271 done 272 272 273 273 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"

+8 -2

tools/testing/selftests/rcutorture/bin/kvm.sh

··· 44 44 TORTURE_KCONFIG_KCSAN_ARG="" 45 45 TORTURE_KMAKE_ARG="" 46 46 TORTURE_QEMU_MEM=512 47 + torture_qemu_mem_default=1 47 48 TORTURE_REMOTE= 48 49 TORTURE_SHUTDOWN_GRACE=180 49 50 TORTURE_SUITE=rcu ··· 87 86 echo " --remote" 88 87 echo " --results absolute-pathname" 89 88 echo " --shutdown-grace seconds" 90 - echo " --torture lock|rcu|rcuscale|refscale|scf" 89 + echo " --torture lock|rcu|rcuscale|refscale|scf|X*" 91 90 echo " --trust-make" 92 91 exit 1 93 92 } ··· 181 180 ;; 182 181 --kasan) 183 182 TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG 183 + if test -n "$torture_qemu_mem_default" 184 + then 185 + TORTURE_QEMU_MEM=2G 186 + fi 184 187 ;; 185 188 --kconfig|--kconfigs) 186 189 checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=$[ynm]\|[0-9]\+$$ CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+$\)*$' '^error$' ··· 207 202 --memory) 208 203 checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error 209 204 TORTURE_QEMU_MEM=$2 205 + torture_qemu_mem_default= 210 206 shift 211 207 ;; 212 208 --no-initrd) ··· 237 231 shift 238 232 ;; 239 233 --torture) 240 - checkarg --torture "(suite name)" "$#" "$2" '^$lock\|rcu\|rcuscale\|refscale\|scf$$' '^--' 234 + checkarg --torture "(suite name)" "$#" "$2" '^$lock\|rcu\|rcuscale\|refscale\|scf\|X.*$$' '^--' 241 235 TORTURE_SUITE=$2 242 236 TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^$lock\|rcu\|scf$$/\1torture/'`" 243 237 shift

+24 -5

tools/testing/selftests/rcutorture/bin/torture.sh

··· 54 54 do_kasan=yes 55 55 do_kcsan=no 56 56 do_clocksourcewd=yes 57 + do_rt=yes 57 58 58 59 # doyesno - Helper function for yes/no arguments 59 60 function doyesno () { ··· 83 82 echo " --do-rcuscale / --do-no-rcuscale" 84 83 echo " --do-rcutorture / --do-no-rcutorture" 85 84 echo " --do-refscale / --do-no-refscale" 85 + echo " --do-rt / --do-no-rt" 86 86 echo " --do-scftorture / --do-no-scftorture" 87 87 echo " --duration [ <minutes> | <hours>h | <days>d ]" 88 88 echo " --kcsan-kmake-arg kernel-make-arguments" ··· 120 118 do_scftorture=yes 121 119 do_rcuscale=yes 122 120 do_refscale=yes 121 + do_rt=yes 123 122 do_kvfree=yes 124 123 do_kasan=yes 125 124 do_kcsan=yes ··· 151 148 do_scftorture=no 152 149 do_rcuscale=no 153 150 do_refscale=no 151 + do_rt=no 154 152 do_kvfree=no 155 153 do_kasan=no 156 154 do_kcsan=no ··· 165 161 ;; 166 162 --do-refscale|--do-no-refscale) 167 163 do_refscale=`doyesno "$1" --do-refscale` 164 + ;; 165 + --do-rt|--do-no-rt) 166 + do_rt=`doyesno "$1" --do-rt` 168 167 ;; 169 168 --do-scftorture|--do-no-scftorture) 170 169 do_scftorture=`doyesno "$1" --do-scftorture` ··· 329 322 echo " --- make clean" > "$amcdir/Make.out" 2>&1 330 323 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 331 324 echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1 325 + cp .config $amcdir 332 326 make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1 333 327 echo " --- make " >> "$amcdir/Make.out" 2>&1 334 328 make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 ··· 358 350 359 351 if test "$do_scftorture" = "yes" 360 352 then 361 - torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot" 362 - torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make 353 + torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1" 354 + torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make 355 + fi 356 + 357 + if test "$do_rt" = "yes" 358 + then 359 + # With all post-boot grace periods forced to normal. 360 + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1" 361 + torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make 362 + 363 + # With all post-boot grace periods forced to expedited. 364 + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1" 365 + torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make 363 366 fi 364 367 365 368 if test "$do_refscale" = yes ··· 382 363 for prim in $primlist 383 364 do 384 365 torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" 385 - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make 366 + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make 386 367 done 387 368 388 369 if test "$do_rcuscale" = yes ··· 394 375 for prim in $primlist 395 376 do 396 377 torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" 397 - torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make 378 + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make 398 379 done 399 380 400 381 if test "$do_kvfree" = "yes" 401 382 then 402 383 torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" 403 - torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make 384 + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make 404 385 fi 405 386 406 387 if test "$do_clocksourcewd" = "yes"

+2

tools/testing/selftests/rcutorture/configs/rcu/RUDE01

··· 8 8 CONFIG_PROVE_LOCKING=y 9 9 #CHECK#CONFIG_PROVE_RCU=y 10 10 CONFIG_RCU_EXPERT=y 11 + CONFIG_FORCE_TASKS_RUDE_RCU=y 12 + #CHECK#CONFIG_TASKS_RUDE_RCU=y

+2

tools/testing/selftests/rcutorture/configs/rcu/SRCU-N

··· 6 6 CONFIG_PREEMPT_VOLUNTARY=n 7 7 CONFIG_PREEMPT=n 8 8 #CHECK#CONFIG_RCU_EXPERT=n 9 + CONFIG_KPROBES=n 10 + CONFIG_FTRACE=n

+1

tools/testing/selftests/rcutorture/configs/rcu/TASKS01

··· 7 7 CONFIG_DEBUG_LOCK_ALLOC=y 8 8 CONFIG_PROVE_LOCKING=y 9 9 #CHECK#CONFIG_PROVE_RCU=y 10 + CONFIG_TASKS_RCU=y 10 11 CONFIG_RCU_EXPERT=y

+4

tools/testing/selftests/rcutorture/configs/rcu/TASKS02

··· 2 2 CONFIG_PREEMPT_NONE=y 3 3 CONFIG_PREEMPT_VOLUNTARY=n 4 4 CONFIG_PREEMPT=n 5 + CONFIG_PREEMPT_DYNAMIC=n 6 + #CHECK#CONFIG_TASKS_RCU=y 7 + CONFIG_FORCE_TASKS_RCU=y 8 + CONFIG_RCU_EXPERT=y

+1

tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot

··· 1 1 rcutorture.torture_type=tasks 2 + rcutorture.stat_interval=60

+2

tools/testing/selftests/rcutorture/configs/rcu/TASKS03

··· 7 7 CONFIG_NO_HZ_IDLE=n 8 8 CONFIG_NO_HZ_FULL=y 9 9 #CHECK#CONFIG_RCU_EXPERT=n 10 + CONFIG_TASKS_RCU=y 11 + CONFIG_RCU_EXPERT=y

+3

tools/testing/selftests/rcutorture/configs/rcu/TRACE01

··· 4 4 CONFIG_PREEMPT_NONE=y 5 5 CONFIG_PREEMPT_VOLUNTARY=n 6 6 CONFIG_PREEMPT=n 7 + CONFIG_PREEMPT_DYNAMIC=n 7 8 CONFIG_DEBUG_LOCK_ALLOC=n 8 9 CONFIG_PROVE_LOCKING=n 9 10 #CHECK#CONFIG_PROVE_RCU=n 11 + CONFIG_FORCE_TASKS_TRACE_RCU=y 12 + #CHECK#CONFIG_TASKS_TRACE_RCU=y 10 13 CONFIG_TASKS_TRACE_RCU_READ_MB=y 11 14 CONFIG_RCU_EXPERT=y

+2

tools/testing/selftests/rcutorture/configs/rcu/TRACE02

··· 7 7 CONFIG_DEBUG_LOCK_ALLOC=y 8 8 CONFIG_PROVE_LOCKING=y 9 9 #CHECK#CONFIG_PROVE_RCU=y 10 + CONFIG_FORCE_TASKS_TRACE_RCU=y 11 + #CHECK#CONFIG_TASKS_TRACE_RCU=y 10 12 CONFIG_TASKS_TRACE_RCU_READ_MB=n 11 13 CONFIG_RCU_EXPERT=y

+3 -2

tools/testing/selftests/rcutorture/configs/rcu/TREE04

··· 1 1 CONFIG_SMP=y 2 2 CONFIG_NR_CPUS=8 3 - CONFIG_PREEMPT_NONE=y 4 - CONFIG_PREEMPT_VOLUNTARY=n 3 + CONFIG_PREEMPT_NONE=n 4 + CONFIG_PREEMPT_VOLUNTARY=y 5 5 CONFIG_PREEMPT=n 6 + CONFIG_PREEMPT_DYNAMIC=n 6 7 #CHECK#CONFIG_TREE_RCU=y 7 8 CONFIG_HZ_PERIODIC=n 8 9 CONFIG_NO_HZ_IDLE=n

+1

tools/testing/selftests/rcutorture/configs/rcu/TREE07

··· 3 3 CONFIG_PREEMPT_NONE=y 4 4 CONFIG_PREEMPT_VOLUNTARY=n 5 5 CONFIG_PREEMPT=n 6 + CONFIG_PREEMPT_DYNAMIC=n 6 7 #CHECK#CONFIG_TREE_RCU=y 7 8 CONFIG_HZ_PERIODIC=n 8 9 CONFIG_NO_HZ_IDLE=n

+2

tools/testing/selftests/rcutorture/configs/rcu/TREE09

··· 13 13 CONFIG_RCU_BOOST=n 14 14 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 15 15 #CHECK#CONFIG_RCU_EXPERT=n 16 + CONFIG_KPROBES=n 17 + CONFIG_FTRACE=n

+1

tools/testing/selftests/rcutorture/configs/rcu/TREE10

··· 3 3 CONFIG_PREEMPT_NONE=y 4 4 CONFIG_PREEMPT_VOLUNTARY=n 5 5 CONFIG_PREEMPT=n 6 + CONFIG_PREEMPT_DYNAMIC=n 6 7 #CHECK#CONFIG_TREE_RCU=y 7 8 CONFIG_HZ_PERIODIC=n 8 9 CONFIG_NO_HZ_IDLE=y

+14 -2

tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh

··· 9 9 10 10 # rcutorture_param_n_barrier_cbs bootparam-string 11 11 # 12 - # Adds n_barrier_cbs rcutorture module parameter to kernels having it. 12 + # Adds n_barrier_cbs rcutorture module parameter if not already specified. 13 13 rcutorture_param_n_barrier_cbs () { 14 14 if echo $1 | grep -q "rcutorture\.n_barrier_cbs" 15 15 then ··· 30 30 fi 31 31 } 32 32 33 + # rcutorture_param_stat_interval bootparam-string 34 + # 35 + # Adds stat_interval rcutorture module parameter if not already specified. 36 + rcutorture_param_stat_interval () { 37 + if echo $1 | grep -q "rcutorture\.stat_interval" 38 + then 39 + : 40 + else 41 + echo rcutorture.stat_interval=15 42 + fi 43 + } 44 + 33 45 # per_version_boot_params bootparam-string config-file seconds 34 46 # 35 47 # Adds per-version torture-module parameters to kernels supporting them. 36 48 per_version_boot_params () { 37 49 echo $1 `rcutorture_param_onoff "$1" "$2"` \ 38 50 `rcutorture_param_n_barrier_cbs "$1"` \ 39 - rcutorture.stat_interval=15 \ 51 + `rcutorture_param_stat_interval "$1"` \ 40 52 rcutorture.shutdown_secs=$3 \ 41 53 rcutorture.test_no_idle_hz=1 \ 42 54 rcutorture.verbose=1

+4 -3

tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon

··· 1 1 CONFIG_RCU_SCALE_TEST=y 2 2 CONFIG_PRINTK_TIME=y 3 - CONFIG_TASKS_RCU_GENERIC=y 4 - CONFIG_TASKS_RCU=y 5 - CONFIG_TASKS_TRACE_RCU=y 3 + CONFIG_FORCE_TASKS_RCU=y 4 + #CHECK#CONFIG_TASKS_RCU=y 5 + CONFIG_FORCE_TASKS_TRACE_RCU=y 6 + #CHECK#CONFIG_TASKS_TRACE_RCU=y

+2

tools/testing/selftests/rcutorture/configs/rcuscale/TREE

··· 16 16 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 17 17 CONFIG_RCU_EXPERT=y 18 18 CONFIG_RCU_TRACE=y 19 + CONFIG_KPROBES=n 20 + CONFIG_FTRACE=n

+4

tools/testing/selftests/rcutorture/configs/refscale/CFcommon

··· 1 1 CONFIG_RCU_REF_SCALE_TEST=y 2 2 CONFIG_PRINTK_TIME=y 3 + CONFIG_FORCE_TASKS_RCU=y 4 + #CHECK#CONFIG_TASKS_RCU=y 5 + CONFIG_FORCE_TASKS_TRACE_RCU=y 6 + #CHECK#CONFIG_TASKS_TRACE_RCU=y

+2

tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT

··· 15 15 CONFIG_RCU_BOOST=n 16 16 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 17 17 CONFIG_RCU_EXPERT=y 18 + CONFIG_KPROBES=n 19 + CONFIG_FTRACE=n

+2

tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT

··· 7 7 CONFIG_NO_HZ_FULL=y 8 8 CONFIG_DEBUG_LOCK_ALLOC=n 9 9 CONFIG_PROVE_LOCKING=n 10 + CONFIG_KPROBES=n 11 + CONFIG_FTRACE=n

+1

tools/testing/selftests/rcutorture/configs/scf/PREEMPT

··· 7 7 CONFIG_NO_HZ_FULL=n 8 8 CONFIG_DEBUG_LOCK_ALLOC=y 9 9 CONFIG_PROVE_LOCKING=y 10 + CONFIG_RCU_EXPERT=y

+1 -2

tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh

··· 25 25 echo $1 `scftorture_param_onoff "$1" "$2"` \ 26 26 scftorture.stat_interval=15 \ 27 27 scftorture.shutdown_secs=$3 \ 28 - scftorture.verbose=1 \ 29 - scf 28 + scftorture.verbose=1 30 29 }