Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rcu: Improve diagnostics for spurious RCU CPU stall warnings

The current RCU CPU stall warning code will print "Stall ended before
state dump start" any time that the stall-warning code is triggered on
a CPU that has already reported a quiescent state for the current grace
period and if all quiescent states have been reported for the current
grace period. However, a true stall can result in these symptoms, for
example, by preventing RCU's grace-period kthreads from ever running

This commit therefore checks for this condition, reporting the end of
the stall only if one of the grace-period counters has actually advanced.
Otherwise, it reports the last time that the grace-period kthread made
meaningful progress. (In normal situations, the grace-period kthread
should make meaningful progress at least every jiffies_till_next_fqs
jiffies.)

Reported-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Miroslav Benes <mbenes@suse.cz>

+34 -5
+5
Documentation/RCU/stallwarn.txt
··· 187 187 behavior, you might need to replace some of the cond_resched() 188 188 calls with calls to cond_resched_rcu_qs(). 189 189 190 + o Anything that prevents RCU's grace-period kthreads from running. 191 + This can result in the "All QSes seen" console-log message. 192 + This message will include information on when the kthread last 193 + ran and how often it should be expected to run. 194 + 190 195 o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might 191 196 happen to preempt a low-priority task in the middle of an RCU 192 197 read-side critical section. This is especially damaging if
+27 -5
kernel/rcu/tree.c
··· 1066 1066 } 1067 1067 } 1068 1068 1069 - static void print_other_cpu_stall(struct rcu_state *rsp) 1069 + static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) 1070 1070 { 1071 1071 int cpu; 1072 1072 long delta; 1073 1073 unsigned long flags; 1074 + unsigned long gpa; 1075 + unsigned long j; 1074 1076 int ndetected = 0; 1075 1077 struct rcu_node *rnp = rcu_get_root(rsp); 1076 1078 long totqlen = 0; ··· 1125 1123 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1126 1124 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1127 1125 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1128 - if (ndetected == 0) 1129 - pr_err("INFO: Stall ended before state dump start\n"); 1130 - else 1126 + if (ndetected) { 1131 1127 rcu_dump_cpu_stacks(rsp); 1128 + } else { 1129 + if (ACCESS_ONCE(rsp->gpnum) != gpnum || 1130 + ACCESS_ONCE(rsp->completed) == gpnum) { 1131 + pr_err("INFO: Stall ended before state dump start\n"); 1132 + } else { 1133 + j = jiffies; 1134 + gpa = ACCESS_ONCE(rsp->gp_activity); 1135 + pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n", 1136 + rsp->name, j - gpa, j, gpa, 1137 + jiffies_till_next_fqs); 1138 + /* In this case, the current CPU might be at fault. */ 1139 + sched_show_task(current); 1140 + } 1141 + } 1132 1142 1133 1143 /* Complain about tasks blocking the grace period. */ 1134 1144 ··· 1240 1226 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 1241 1227 1242 1228 /* They had a few time units to dump stack, so complain. */ 1243 - print_other_cpu_stall(rsp); 1229 + print_other_cpu_stall(rsp, gpnum); 1244 1230 } 1245 1231 } 1246 1232 ··· 1636 1622 struct rcu_data *rdp; 1637 1623 struct rcu_node *rnp = rcu_get_root(rsp); 1638 1624 1625 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1639 1626 rcu_bind_gp_kthread(); 1640 1627 raw_spin_lock_irq(&rnp->lock); 1641 1628 smp_mb__after_unlock_lock(); ··· 1697 1682 rnp->grphi, rnp->qsmask); 1698 1683 raw_spin_unlock_irq(&rnp->lock); 1699 1684 cond_resched_rcu_qs(); 1685 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1700 1686 } 1701 1687 1702 1688 mutex_unlock(&rsp->onoff_mutex); ··· 1714 1698 unsigned long maxj; 1715 1699 struct rcu_node *rnp = rcu_get_root(rsp); 1716 1700 1701 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1717 1702 rsp->n_force_qs++; 1718 1703 if (fqs_state == RCU_SAVE_DYNTICK) { 1719 1704 /* Collect dyntick-idle snapshots. */ ··· 1753 1736 struct rcu_data *rdp; 1754 1737 struct rcu_node *rnp = rcu_get_root(rsp); 1755 1738 1739 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1756 1740 raw_spin_lock_irq(&rnp->lock); 1757 1741 smp_mb__after_unlock_lock(); 1758 1742 gp_duration = jiffies - rsp->gp_start; ··· 1790 1772 nocb += rcu_future_gp_cleanup(rsp, rnp); 1791 1773 raw_spin_unlock_irq(&rnp->lock); 1792 1774 cond_resched_rcu_qs(); 1775 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1793 1776 } 1794 1777 rnp = rcu_get_root(rsp); 1795 1778 raw_spin_lock_irq(&rnp->lock); ··· 1840 1821 if (rcu_gp_init(rsp)) 1841 1822 break; 1842 1823 cond_resched_rcu_qs(); 1824 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1843 1825 WARN_ON(signal_pending(current)); 1844 1826 trace_rcu_grace_period(rsp->name, 1845 1827 ACCESS_ONCE(rsp->gpnum), ··· 1884 1864 ACCESS_ONCE(rsp->gpnum), 1885 1865 TPS("fqsend")); 1886 1866 cond_resched_rcu_qs(); 1867 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1887 1868 } else { 1888 1869 /* Deal with stray signal. */ 1889 1870 cond_resched_rcu_qs(); 1871 + ACCESS_ONCE(rsp->gp_activity) = jiffies; 1890 1872 WARN_ON(signal_pending(current)); 1891 1873 trace_rcu_grace_period(rsp->name, 1892 1874 ACCESS_ONCE(rsp->gpnum),
+2
kernel/rcu/tree.h
··· 488 488 /* due to no GP active. */ 489 489 unsigned long gp_start; /* Time at which GP started, */ 490 490 /* but in jiffies. */ 491 + unsigned long gp_activity; /* Time of last GP kthread */ 492 + /* activity in jiffies. */ 491 493 unsigned long jiffies_stall; /* Time at which to check */ 492 494 /* for CPU stalls. */ 493 495 unsigned long jiffies_resched; /* Time at which to resched */