···4152 This wake_up() will be accompanied by a4153 WARN_ONCE() splat and an ftrace_dump().41540000000004155 rcutree.sysrq_rcu= [KNL]4156 Commandeer a sysrq key to dump out Tree RCU's4157 rcu_node tree with an eye towards determining
···4152 This wake_up() will be accompanied by a4153 WARN_ONCE() splat and an ftrace_dump().41544155+ rcutree.rcu_unlock_delay= [KNL]4156+ In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,4157+ this specifies an rcu_read_unlock()-time delay4158+ in microseconds. This defaults to zero.4159+ Larger delays increase the probability of4160+ catching RCU pointer leaks, that is, buggy use4161+ of RCU-protected pointers after the relevant4162+ rcu_read_unlock() has completed.4163+4164 rcutree.sysrq_rcu= [KNL]4165 Commandeer a sysrq key to dump out Tree RCU's4166 rcu_node tree with an eye towards determining
···5556#else /* #ifdef CONFIG_PREEMPT_RCU */5758+#ifdef CONFIG_TINY_RCU59+#define rcu_read_unlock_strict() do { } while (0)60+#else61+void rcu_read_unlock_strict(void);62+#endif63+64static inline void __rcu_read_lock(void)65{66 preempt_disable();···63static inline void __rcu_read_unlock(void)64{65 preempt_enable();66+ rcu_read_unlock_strict();67}6869static inline int rcu_preempt_depth(void)
+5-3
kernel/rcu/Kconfig
···135136config RCU_FANOUT_LEAF137 int "Tree-based hierarchical RCU leaf-level fanout value"138- range 2 64 if 64BIT139- range 2 32 if !64BIT0140 depends on TREE_RCU && RCU_EXPERT141- default 160142 help143 This option controls the leaf-level fanout of hierarchical144 implementations of RCU, and allows trading off cache misses
···135136config RCU_FANOUT_LEAF137 int "Tree-based hierarchical RCU leaf-level fanout value"138+ range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD139+ range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD140+ range 2 3 if RCU_STRICT_GRACE_PERIOD141 depends on TREE_RCU && RCU_EXPERT142+ default 16 if !RCU_STRICT_GRACE_PERIOD143+ default 2 if RCU_STRICT_GRACE_PERIOD144 help145 This option controls the leaf-level fanout of hierarchical146 implementations of RCU, and allows trading off cache misses
+15
kernel/rcu/Kconfig.debug
···114 Say N here if you need ultimate kernel/user switch latencies115 Say Y if you are unsure116000000000000000117endmenu # "RCU Debugging"
···114 Say N here if you need ultimate kernel/user switch latencies115 Say Y if you are unsure116117+config RCU_STRICT_GRACE_PERIOD118+ bool "Provide debug RCU implementation with short grace periods"119+ depends on DEBUG_KERNEL && RCU_EXPERT120+ default n121+ select PREEMPT_COUNT if PREEMPT=n122+ help123+ Select this option to build an RCU variant that is strict about124+ grace periods, making them as short as it can. This limits125+ scalability, destroys real-time response, degrades battery126+ lifetime and kills performance. Don't try this on large127+ machines, as in systems with more than about 10 or 20 CPUs.128+ But in conjunction with tools like KASAN, it can be helpful129+ when looking for certain types of RCU usage bugs, for example,130+ too-short RCU read-side critical sections.131+132endmenu # "RCU Debugging"
+62-11
kernel/rcu/tree.c
···165static int gp_cleanup_delay;166module_param(gp_cleanup_delay, int, 0444);167000000168/*169 * This rcu parameter is runtime-read-only. It reflects170 * a minimum allowed number of objects which can be cached···461 return __this_cpu_read(rcu_data.dynticks_nesting) == 0;462}463464-#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */465-#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */0466static long blimit = DEFAULT_RCU_BLIMIT;467-#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */468static long qhimark = DEFAULT_RCU_QHIMARK;469-#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */470static long qlowmark = DEFAULT_RCU_QLOMARK;471#define DEFAULT_RCU_QOVLD_MULT 2472#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)473-static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */474-static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */475476module_param(blimit, long, 0444);477module_param(qhimark, long, 0444);478module_param(qlowmark, long, 0444);479module_param(qovld, long, 0444);480481-static ulong jiffies_till_first_fqs = ULONG_MAX;482static ulong jiffies_till_next_fqs = ULONG_MAX;483static bool rcu_kick_kthreads;484static int rcu_divisor = 7;···1579}15801581/*00000000000001582 * Update CPU-local rcu_data state to record the beginnings and ends of1583 * grace periods. The caller must hold the ->lock of the leaf rcu_node1584 * structure corresponding to the current CPU, and must have irqs disabled.···1661 }1662 needwake = __note_gp_changes(rnp, rdp);1663 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);01664 if (needwake)1665 rcu_gp_kthread_wake();1666}···1697 schedule_timeout_idle(duration);1698 pr_alert("%s: Wait complete\n", __func__);1699 }0000000001700}17011702/*···1838 cond_resched_tasks_rcu_qs();1839 WRITE_ONCE(rcu_state.gp_activity, jiffies);1840 }000018411842 return true;1843}···2059 rcu_state.gp_flags & RCU_GP_FLAG_INIT);2060 }2061 raw_spin_unlock_irq_rcu_node(rnp);00002062}20632064/*···2241 * structure. This must be called from the specified CPU.2242 */2243static void2244-rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)2245{2246 unsigned long flags;2247 unsigned long mask;···2250 rcu_segcblist_is_offloaded(&rdp->cblist);2251 struct rcu_node *rnp;225202253 rnp = rdp->mynode;2254 raw_spin_lock_irqsave_rcu_node(rnp, flags);2255 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||···2267 return;2268 }2269 mask = rdp->grpmask;2270- if (rdp->cpu == smp_processor_id())2271- rdp->core_needs_qs = false;2272 if ((rnp->qsmask & mask) == 0) {2273 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);2274 } else {···2316 * Tell RCU we are done (but rcu_report_qs_rdp() will be the2317 * judge of that).2318 */2319- rcu_report_qs_rdp(rdp->cpu, rdp);2320}23212322/*···2659}2660EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);2661000000002662/* Perform RCU core processing work for the current CPU. */2663static __latent_entropy void rcu_core(void)2664{···2711 /* Do any needed deferred wakeups of rcuo kthreads. */2712 do_nocb_deferred_wakeup(rdp);2713 trace_rcu_utilization(TPS("End RCU core"));00002714}27152716static void rcu_core_si(struct softirq_action *h)···39123913 /* Set up local state, ensuring consistent view of global state. */3914 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);03915 WARN_ON_ONCE(rdp->dynticks_nesting != 1);3916 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));3917 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
···165static int gp_cleanup_delay;166module_param(gp_cleanup_delay, int, 0444);167168+// Add delay to rcu_read_unlock() for strict grace periods.169+static int rcu_unlock_delay;170+#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD171+module_param(rcu_unlock_delay, int, 0444);172+#endif173+174/*175 * This rcu parameter is runtime-read-only. It reflects176 * a minimum allowed number of objects which can be cached···455 return __this_cpu_read(rcu_data.dynticks_nesting) == 0;456}457458+#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)459+ // Maximum callbacks per rcu_do_batch ...460+#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.461static long blimit = DEFAULT_RCU_BLIMIT;462+#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.463static long qhimark = DEFAULT_RCU_QHIMARK;464+#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.465static long qlowmark = DEFAULT_RCU_QLOMARK;466#define DEFAULT_RCU_QOVLD_MULT 2467#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)468+static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.469+static long qovld_calc = -1; // No pre-initialization lock acquisitions!470471module_param(blimit, long, 0444);472module_param(qhimark, long, 0444);473module_param(qlowmark, long, 0444);474module_param(qovld, long, 0444);475476+static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;477static ulong jiffies_till_next_fqs = ULONG_MAX;478static bool rcu_kick_kthreads;479static int rcu_divisor = 7;···1572}15731574/*1575+ * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a1576+ * quiescent state. This is intended to be invoked when the CPU notices1577+ * a new grace period.1578+ */1579+static void rcu_strict_gp_check_qs(void)1580+{1581+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {1582+ rcu_read_lock();1583+ rcu_read_unlock();1584+ }1585+}1586+1587+/*1588 * Update CPU-local rcu_data state to record the beginnings and ends of1589 * grace periods. The caller must hold the ->lock of the leaf rcu_node1590 * structure corresponding to the current CPU, and must have irqs disabled.···1641 }1642 needwake = __note_gp_changes(rnp, rdp);1643 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);1644+ rcu_strict_gp_check_qs();1645 if (needwake)1646 rcu_gp_kthread_wake();1647}···1676 schedule_timeout_idle(duration);1677 pr_alert("%s: Wait complete\n", __func__);1678 }1679+}1680+1681+/*1682+ * Handler for on_each_cpu() to invoke the target CPU's RCU core1683+ * processing.1684+ */1685+static void rcu_strict_gp_boundary(void *unused)1686+{1687+ invoke_rcu_core();1688}16891690/*···1808 cond_resched_tasks_rcu_qs();1809 WRITE_ONCE(rcu_state.gp_activity, jiffies);1810 }1811+1812+ // If strict, make all CPUs aware of new grace period.1813+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))1814+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);18151816 return true;1817}···2025 rcu_state.gp_flags & RCU_GP_FLAG_INIT);2026 }2027 raw_spin_unlock_irq_rcu_node(rnp);2028+2029+ // If strict, make all CPUs aware of the end of the old grace period.2030+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))2031+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);2032}20332034/*···2203 * structure. This must be called from the specified CPU.2204 */2205static void2206+rcu_report_qs_rdp(struct rcu_data *rdp)2207{2208 unsigned long flags;2209 unsigned long mask;···2212 rcu_segcblist_is_offloaded(&rdp->cblist);2213 struct rcu_node *rnp;22142215+ WARN_ON_ONCE(rdp->cpu != smp_processor_id());2216 rnp = rdp->mynode;2217 raw_spin_lock_irqsave_rcu_node(rnp, flags);2218 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||···2228 return;2229 }2230 mask = rdp->grpmask;2231+ rdp->core_needs_qs = false;02232 if ((rnp->qsmask & mask) == 0) {2233 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);2234 } else {···2278 * Tell RCU we are done (but rcu_report_qs_rdp() will be the2279 * judge of that).2280 */2281+ rcu_report_qs_rdp(rdp);2282}22832284/*···2621}2622EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);26232624+// Workqueue handler for an RCU reader for kernels enforcing struct RCU2625+// grace periods.2626+static void strict_work_handler(struct work_struct *work)2627+{2628+ rcu_read_lock();2629+ rcu_read_unlock();2630+}2631+2632/* Perform RCU core processing work for the current CPU. */2633static __latent_entropy void rcu_core(void)2634{···2665 /* Do any needed deferred wakeups of rcuo kthreads. */2666 do_nocb_deferred_wakeup(rdp);2667 trace_rcu_utilization(TPS("End RCU core"));2668+2669+ // If strict GPs, schedule an RCU reader in a clean environment.2670+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))2671+ queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);2672}26732674static void rcu_core_si(struct softirq_action *h)···38623863 /* Set up local state, ensuring consistent view of global state. */3864 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);3865+ INIT_WORK(&rdp->strict_work, strict_work_handler);3866 WARN_ON_ONCE(rdp->dynticks_nesting != 1);3867 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));3868 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
+1
kernel/rcu/tree.h
···165 /* period it is aware of. */166 struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */167 bool defer_qs_iw_pending; /* Scheduler attention pending? */0168169 /* 2) batch handling */170 struct rcu_segcblist cblist; /* Segmented callback list, with */
···165 /* period it is aware of. */166 struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */167 bool defer_qs_iw_pending; /* Scheduler attention pending? */168+ struct work_struct strict_work; /* Schedule readers for strict GPs. */169170 /* 2) batch handling */171 struct rcu_segcblist cblist; /* Segmented callback list, with */
+30-2
kernel/rcu/tree_plugin.h
···36 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");37 if (IS_ENABLED(CONFIG_PROVE_RCU))38 pr_info("\tRCU lockdep checking is enabled.\n");0039 if (RCU_NUM_LVLS >= 4)40 pr_info("\tFour(or more)-level hierarchy is enabled.\n");41 if (RCU_FANOUT_LEAF != 16)···376 rcu_preempt_read_enter();377 if (IS_ENABLED(CONFIG_PROVE_LOCKING))378 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);00379 barrier(); /* critical section after entry code. */380}381EXPORT_SYMBOL_GPL(__rcu_read_lock);···459 return;460 }461 t->rcu_read_unlock_special.s = 0;462- if (special.b.need_qs)463- rcu_qs();000000464465 /*466 * Respond to a request by an expedited grace period for a···777}778779#else /* #ifdef CONFIG_PREEMPT_RCU */000000000000000000780781/*782 * Tell them what RCU they are running.
···36 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");37 if (IS_ENABLED(CONFIG_PROVE_RCU))38 pr_info("\tRCU lockdep checking is enabled.\n");39+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))40+ pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");41 if (RCU_NUM_LVLS >= 4)42 pr_info("\tFour(or more)-level hierarchy is enabled.\n");43 if (RCU_FANOUT_LEAF != 16)···374 rcu_preempt_read_enter();375 if (IS_ENABLED(CONFIG_PROVE_LOCKING))376 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);377+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)378+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);379 barrier(); /* critical section after entry code. */380}381EXPORT_SYMBOL_GPL(__rcu_read_lock);···455 return;456 }457 t->rcu_read_unlock_special.s = 0;458+ if (special.b.need_qs) {459+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {460+ rcu_report_qs_rdp(rdp);461+ udelay(rcu_unlock_delay);462+ } else {463+ rcu_qs();464+ }465+ }466467 /*468 * Respond to a request by an expedited grace period for a···767}768769#else /* #ifdef CONFIG_PREEMPT_RCU */770+771+/*772+ * If strict grace periods are enabled, and if the calling773+ * __rcu_read_unlock() marks the beginning of a quiescent state, immediately774+ * report that quiescent state and, if requested, spin for a bit.775+ */776+void rcu_read_unlock_strict(void)777+{778+ struct rcu_data *rdp;779+780+ if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||781+ irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)782+ return;783+ rdp = this_cpu_ptr(&rcu_data);784+ rcu_report_qs_rdp(rdp);785+ udelay(rcu_unlock_delay);786+}787+EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);788789/*790 * Tell them what RCU they are running.