···41524152 This wake_up() will be accompanied by a41534153 WARN_ONCE() splat and an ftrace_dump().4154415441554155+ rcutree.rcu_unlock_delay= [KNL]41564156+ In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,41574157+ this specifies an rcu_read_unlock()-time delay41584158+ in microseconds. This defaults to zero.41594159+ Larger delays increase the probability of41604160+ catching RCU pointer leaks, that is, buggy use41614161+ of RCU-protected pointers after the relevant41624162+ rcu_read_unlock() has completed.41634163+41554164 rcutree.sysrq_rcu= [KNL]41564165 Commandeer a sysrq key to dump out Tree RCU's41574166 rcu_node tree with an eye towards determining
+7
include/linux/rcupdate.h
···55555656#else /* #ifdef CONFIG_PREEMPT_RCU */57575858+#ifdef CONFIG_TINY_RCU5959+#define rcu_read_unlock_strict() do { } while (0)6060+#else6161+void rcu_read_unlock_strict(void);6262+#endif6363+5864static inline void __rcu_read_lock(void)5965{6066 preempt_disable();···6963static inline void __rcu_read_unlock(void)7064{7165 preempt_enable();6666+ rcu_read_unlock_strict();7267}73687469static inline int rcu_preempt_depth(void)
+5-3
kernel/rcu/Kconfig
···135135136136config RCU_FANOUT_LEAF137137 int "Tree-based hierarchical RCU leaf-level fanout value"138138- range 2 64 if 64BIT139139- range 2 32 if !64BIT138138+ range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD139139+ range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD140140+ range 2 3 if RCU_STRICT_GRACE_PERIOD140141 depends on TREE_RCU && RCU_EXPERT141141- default 16142142+ default 16 if !RCU_STRICT_GRACE_PERIOD143143+ default 2 if RCU_STRICT_GRACE_PERIOD142144 help143145 This option controls the leaf-level fanout of hierarchical144146 implementations of RCU, and allows trading off cache misses
+15
kernel/rcu/Kconfig.debug
···114114 Say N here if you need ultimate kernel/user switch latencies115115 Say Y if you are unsure116116117117+config RCU_STRICT_GRACE_PERIOD118118+ bool "Provide debug RCU implementation with short grace periods"119119+ depends on DEBUG_KERNEL && RCU_EXPERT120120+ default n121121+ select PREEMPT_COUNT if PREEMPT=n122122+ help123123+ Select this option to build an RCU variant that is strict about124124+ grace periods, making them as short as it can. This limits125125+ scalability, destroys real-time response, degrades battery126126+ lifetime and kills performance. Don't try this on large127127+ machines, as in systems with more than about 10 or 20 CPUs.128128+ But in conjunction with tools like KASAN, it can be helpful129129+ when looking for certain types of RCU usage bugs, for example,130130+ too-short RCU read-side critical sections.131131+117132endmenu # "RCU Debugging"
+62-11
kernel/rcu/tree.c
···165165static int gp_cleanup_delay;166166module_param(gp_cleanup_delay, int, 0444);167167168168+// Add delay to rcu_read_unlock() for strict grace periods.169169+static int rcu_unlock_delay;170170+#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD171171+module_param(rcu_unlock_delay, int, 0444);172172+#endif173173+168174/*169175 * This rcu parameter is runtime-read-only. It reflects170176 * a minimum allowed number of objects which can be cached···461455 return __this_cpu_read(rcu_data.dynticks_nesting) == 0;462456}463457464464-#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */465465-#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */458458+#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)459459+ // Maximum callbacks per rcu_do_batch ...460460+#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.466461static long blimit = DEFAULT_RCU_BLIMIT;467467-#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */462462+#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.468463static long qhimark = DEFAULT_RCU_QHIMARK;469469-#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */464464+#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.470465static long qlowmark = DEFAULT_RCU_QLOMARK;471466#define DEFAULT_RCU_QOVLD_MULT 2472467#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)473473-static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */474474-static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */468468+static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.469469+static long qovld_calc = -1; // No pre-initialization lock acquisitions!475470476471module_param(blimit, long, 0444);477472module_param(qhimark, long, 0444);478473module_param(qlowmark, long, 0444);479474module_param(qovld, long, 0444);480475481481-static ulong jiffies_till_first_fqs = ULONG_MAX;476476+static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;482477static ulong jiffies_till_next_fqs = ULONG_MAX;483478static bool rcu_kick_kthreads;484479static int rcu_divisor = 7;···15791572}1580157315811574/*15751575+ * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a15761576+ * quiescent state. This is intended to be invoked when the CPU notices15771577+ * a new grace period.15781578+ */15791579+static void rcu_strict_gp_check_qs(void)15801580+{15811581+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {15821582+ rcu_read_lock();15831583+ rcu_read_unlock();15841584+ }15851585+}15861586+15871587+/*15821588 * Update CPU-local rcu_data state to record the beginnings and ends of15831589 * grace periods. The caller must hold the ->lock of the leaf rcu_node15841590 * structure corresponding to the current CPU, and must have irqs disabled.···16611641 }16621642 needwake = __note_gp_changes(rnp, rdp);16631643 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);16441644+ rcu_strict_gp_check_qs();16641645 if (needwake)16651646 rcu_gp_kthread_wake();16661647}···16971676 schedule_timeout_idle(duration);16981677 pr_alert("%s: Wait complete\n", __func__);16991678 }16791679+}16801680+16811681+/*16821682+ * Handler for on_each_cpu() to invoke the target CPU's RCU core16831683+ * processing.16841684+ */16851685+static void rcu_strict_gp_boundary(void *unused)16861686+{16871687+ invoke_rcu_core();17001688}1701168917021690/*···18381808 cond_resched_tasks_rcu_qs();18391809 WRITE_ONCE(rcu_state.gp_activity, jiffies);18401810 }18111811+18121812+ // If strict, make all CPUs aware of new grace period.18131813+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))18141814+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);1841181518421816 return true;18431817}···20592025 rcu_state.gp_flags & RCU_GP_FLAG_INIT);20602026 }20612027 raw_spin_unlock_irq_rcu_node(rnp);20282028+20292029+ // If strict, make all CPUs aware of the end of the old grace period.20302030+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))20312031+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);20622032}2063203320642034/*···22412203 * structure. This must be called from the specified CPU.22422204 */22432205static void22442244-rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)22062206+rcu_report_qs_rdp(struct rcu_data *rdp)22452207{22462208 unsigned long flags;22472209 unsigned long mask;···22502212 rcu_segcblist_is_offloaded(&rdp->cblist);22512213 struct rcu_node *rnp;2252221422152215+ WARN_ON_ONCE(rdp->cpu != smp_processor_id());22532216 rnp = rdp->mynode;22542217 raw_spin_lock_irqsave_rcu_node(rnp, flags);22552218 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||···22672228 return;22682229 }22692230 mask = rdp->grpmask;22702270- if (rdp->cpu == smp_processor_id())22712271- rdp->core_needs_qs = false;22312231+ rdp->core_needs_qs = false;22722232 if ((rnp->qsmask & mask) == 0) {22732233 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);22742234 } else {···23162278 * Tell RCU we are done (but rcu_report_qs_rdp() will be the23172279 * judge of that).23182280 */23192319- rcu_report_qs_rdp(rdp->cpu, rdp);22812281+ rcu_report_qs_rdp(rdp);23202282}2321228323222284/*···26592621}26602622EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);2661262326242624+// Workqueue handler for an RCU reader for kernels enforcing struct RCU26252625+// grace periods.26262626+static void strict_work_handler(struct work_struct *work)26272627+{26282628+ rcu_read_lock();26292629+ rcu_read_unlock();26302630+}26312631+26622632/* Perform RCU core processing work for the current CPU. */26632633static __latent_entropy void rcu_core(void)26642634{···27112665 /* Do any needed deferred wakeups of rcuo kthreads. */27122666 do_nocb_deferred_wakeup(rdp);27132667 trace_rcu_utilization(TPS("End RCU core"));26682668+26692669+ // If strict GPs, schedule an RCU reader in a clean environment.26702670+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))26712671+ queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);27142672}2715267327162674static void rcu_core_si(struct softirq_action *h)···3912386239133863 /* Set up local state, ensuring consistent view of global state. */39143864 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);38653865+ INIT_WORK(&rdp->strict_work, strict_work_handler);39153866 WARN_ON_ONCE(rdp->dynticks_nesting != 1);39163867 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));39173868 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
+1
kernel/rcu/tree.h
···165165 /* period it is aware of. */166166 struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */167167 bool defer_qs_iw_pending; /* Scheduler attention pending? */168168+ struct work_struct strict_work; /* Schedule readers for strict GPs. */168169169170 /* 2) batch handling */170171 struct rcu_segcblist cblist; /* Segmented callback list, with */
+30-2
kernel/rcu/tree_plugin.h
···3636 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");3737 if (IS_ENABLED(CONFIG_PROVE_RCU))3838 pr_info("\tRCU lockdep checking is enabled.\n");3939+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))4040+ pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");3941 if (RCU_NUM_LVLS >= 4)4042 pr_info("\tFour(or more)-level hierarchy is enabled.\n");4143 if (RCU_FANOUT_LEAF != 16)···376374 rcu_preempt_read_enter();377375 if (IS_ENABLED(CONFIG_PROVE_LOCKING))378376 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);377377+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)378378+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);379379 barrier(); /* critical section after entry code. */380380}381381EXPORT_SYMBOL_GPL(__rcu_read_lock);···459455 return;460456 }461457 t->rcu_read_unlock_special.s = 0;462462- if (special.b.need_qs)463463- rcu_qs();458458+ if (special.b.need_qs) {459459+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {460460+ rcu_report_qs_rdp(rdp);461461+ udelay(rcu_unlock_delay);462462+ } else {463463+ rcu_qs();464464+ }465465+ }464466465467 /*466468 * Respond to a request by an expedited grace period for a···777767}778768779769#else /* #ifdef CONFIG_PREEMPT_RCU */770770+771771+/*772772+ * If strict grace periods are enabled, and if the calling773773+ * __rcu_read_unlock() marks the beginning of a quiescent state, immediately774774+ * report that quiescent state and, if requested, spin for a bit.775775+ */776776+void rcu_read_unlock_strict(void)777777+{778778+ struct rcu_data *rdp;779779+780780+ if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||781781+ irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)782782+ return;783783+ rdp = this_cpu_ptr(&rcu_data);784784+ rcu_report_qs_rdp(rdp);785785+ udelay(rcu_unlock_delay);786786+}787787+EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);780788781789/*782790 * Tell them what RCU they are running.