Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rcu-tasks: Fix computation of CPU-to-list shift counts

The ->percpu_enqueue_shift field is used to map from the running CPU
number to the index of the corresponding callback list. This mapping
can change at runtime in response to varying callback load, resulting
in varying levels of contention on the callback-list locks.

Unfortunately, the initial value of this field is correct only if the
system happens to have a power-of-two number of CPUs, otherwise the
callbacks from the high-numbered CPUs can be placed into the callback list
indexed by 1 (rather than 0), and those index-1 callbacks will be ignored.
This can result in soft lockups and hangs.

This commit therefore corrects this mapping, adding one to this shift
count as needed for systems having odd numbers of CPUs.

Fixes: 7a30871b6a27 ("rcu-tasks: Introduce ->percpu_enqueue_shift for dynamic queue selection")
Reported-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Reported-by: Martin Lau <kafai@fb.com>
Cc: Neeraj Upadhyay <neeraj.iitr10@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

+8 -4
+8 -4
kernel/rcu/tasks.h
··· 123 123 .call_func = call, \ 124 124 .rtpcpu = &rt_name ## __percpu, \ 125 125 .name = n, \ 126 - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \ 126 + .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ 127 127 .percpu_enqueue_lim = 1, \ 128 128 .percpu_dequeue_lim = 1, \ 129 129 .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ ··· 216 216 int cpu; 217 217 unsigned long flags; 218 218 int lim; 219 + int shift; 219 220 220 221 raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); 221 222 if (rcu_task_enqueue_lim < 0) { ··· 230 229 231 230 if (lim > nr_cpu_ids) 232 231 lim = nr_cpu_ids; 233 - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim)); 232 + shift = ilog2(nr_cpu_ids / lim); 233 + if (((nr_cpu_ids - 1) >> shift) >= lim) 234 + shift++; 235 + WRITE_ONCE(rtp->percpu_enqueue_shift, shift); 234 236 WRITE_ONCE(rtp->percpu_dequeue_lim, lim); 235 237 smp_store_release(&rtp->percpu_enqueue_lim, lim); 236 238 for_each_possible_cpu(cpu) { ··· 302 298 if (unlikely(needadjust)) { 303 299 raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); 304 300 if (rtp->percpu_enqueue_lim != nr_cpu_ids) { 305 - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); 301 + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); 306 302 WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); 307 303 smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); 308 304 pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); ··· 417 413 if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { 418 414 raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); 419 415 if (rtp->percpu_enqueue_lim > 1) { 420 - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); 416 + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); 421 417 smp_store_release(&rtp->percpu_enqueue_lim, 1); 422 418 rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); 423 419 pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);