genirq: Synchronize interrupt thread startup

A kernel hang can be observed when running setserial in a loop on a kernel
with force threaded interrupts. The sequence of events is:

setserial
open("/dev/ttyXXX")
request_irq()
do_stuff()
-> serial interrupt
-> wake(irq_thread)
desc->threads_active++;
close()
free_irq()
kthread_stop(irq_thread)
synchronize_irq() <- hangs because desc->threads_active != 0

The thread is created in request_irq() and woken up, but does not get on a
CPU to reach the actual thread function, which would handle the pending
wake-up. kthread_stop() sets the should stop condition which makes the
thread immediately exit, which in turn leaves the stale threads_active
count around.

This problem was introduced with commit 519cc8652b3a, which addressed a
interrupt sharing issue in the PCIe code.

Before that commit free_irq() invoked synchronize_irq(), which waits for
the hard interrupt handler and also for associated threads to complete.

To address the PCIe issue synchronize_irq() was replaced with
__synchronize_hardirq(), which only waits for the hard interrupt handler to
complete, but not for threaded handlers.

This was done under the assumption, that the interrupt thread already
reached the thread function and waits for a wake-up, which is guaranteed to
be handled before acting on the stop condition. The problematic case, that
the thread would not reach the thread function, was obviously overlooked.

Make sure that the interrupt thread is really started and reaches
thread_fn() before returning from __setup_irq().

This utilizes the existing wait queue in the interrupt descriptor. The
wait queue is unused for non-shared interrupts. For shared interrupts the
usage might cause a spurious wake-up of a waiter in synchronize_irq() or the
completion of a threaded handler might cause a spurious wake-up of the
waiter for the ready flag. Both are harmless and have no functional impact.

[ tglx: Amended changelog ]

Fixes: 519cc8652b3a ("genirq: Synchronize only with single thread on free_irq()")
Signed-off-by: Thomas Pfaff <tpfaff@pcs.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/552fe7b4-9224-b183-bb87-a8f36d335690@pcs.com

authored by Thomas Pfaff and committed by Thomas Gleixner 8707898e 672c0c51

+33 -10
+2
kernel/irq/internals.h
··· 29 * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed 30 * IRQTF_AFFINITY - irq thread is requested to adjust affinity 31 * IRQTF_FORCED_THREAD - irq action is force threaded 32 */ 33 enum { 34 IRQTF_RUNTHREAD, 35 IRQTF_WARNED, 36 IRQTF_AFFINITY, 37 IRQTF_FORCED_THREAD, 38 }; 39 40 /*
··· 29 * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed 30 * IRQTF_AFFINITY - irq thread is requested to adjust affinity 31 * IRQTF_FORCED_THREAD - irq action is force threaded 32 + * IRQTF_READY - signals that irq thread is ready 33 */ 34 enum { 35 IRQTF_RUNTHREAD, 36 IRQTF_WARNED, 37 IRQTF_AFFINITY, 38 IRQTF_FORCED_THREAD, 39 + IRQTF_READY, 40 }; 41 42 /*
+2
kernel/irq/irqdesc.c
··· 407 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 408 mutex_init(&desc->request_mutex); 409 init_rcu_head(&desc->rcu); 410 411 desc_set_defaults(irq, desc, node, affinity, owner); 412 irqd_set(&desc->irq_data, flags); ··· 576 raw_spin_lock_init(&desc[i].lock); 577 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 578 mutex_init(&desc[i].request_mutex); 579 desc_set_defaults(i, &desc[i], node, NULL, NULL); 580 } 581 return arch_early_irq_init();
··· 407 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 408 mutex_init(&desc->request_mutex); 409 init_rcu_head(&desc->rcu); 410 + init_waitqueue_head(&desc->wait_for_threads); 411 412 desc_set_defaults(irq, desc, node, affinity, owner); 413 irqd_set(&desc->irq_data, flags); ··· 575 raw_spin_lock_init(&desc[i].lock); 576 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 577 mutex_init(&desc[i].request_mutex); 578 + init_waitqueue_head(&desc[i].wait_for_threads); 579 desc_set_defaults(i, &desc[i], node, NULL, NULL); 580 } 581 return arch_early_irq_init();
+29 -10
kernel/irq/manage.c
··· 1249 } 1250 1251 /* 1252 * Interrupt handler thread 1253 */ 1254 static int irq_thread(void *data) ··· 1283 struct irq_desc *desc = irq_to_desc(action->irq); 1284 irqreturn_t (*handler_fn)(struct irq_desc *desc, 1285 struct irqaction *action); 1286 1287 sched_set_fifo(current); 1288 ··· 1710 } 1711 1712 if (!shared) { 1713 - init_waitqueue_head(&desc->wait_for_threads); 1714 - 1715 /* Setup the type (level, edge polarity) if configured: */ 1716 if (new->flags & IRQF_TRIGGER_MASK) { 1717 ret = __irq_set_trigger(desc, ··· 1805 1806 irq_setup_timings(desc, new); 1807 1808 - /* 1809 - * Strictly no need to wake it up, but hung_task complains 1810 - * when no hard interrupt wakes the thread up. 1811 - */ 1812 - if (new->thread) 1813 - wake_up_process(new->thread); 1814 - if (new->secondary) 1815 - wake_up_process(new->secondary->thread); 1816 1817 register_irq_proc(irq, desc); 1818 new->dir = NULL;
··· 1249 } 1250 1251 /* 1252 + * Internal function to notify that a interrupt thread is ready. 1253 + */ 1254 + static void irq_thread_set_ready(struct irq_desc *desc, 1255 + struct irqaction *action) 1256 + { 1257 + set_bit(IRQTF_READY, &action->thread_flags); 1258 + wake_up(&desc->wait_for_threads); 1259 + } 1260 + 1261 + /* 1262 + * Internal function to wake up a interrupt thread and wait until it is 1263 + * ready. 1264 + */ 1265 + static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, 1266 + struct irqaction *action) 1267 + { 1268 + if (!action || !action->thread) 1269 + return; 1270 + 1271 + wake_up_process(action->thread); 1272 + wait_event(desc->wait_for_threads, 1273 + test_bit(IRQTF_READY, &action->thread_flags)); 1274 + } 1275 + 1276 + /* 1277 * Interrupt handler thread 1278 */ 1279 static int irq_thread(void *data) ··· 1258 struct irq_desc *desc = irq_to_desc(action->irq); 1259 irqreturn_t (*handler_fn)(struct irq_desc *desc, 1260 struct irqaction *action); 1261 + 1262 + irq_thread_set_ready(desc, action); 1263 1264 sched_set_fifo(current); 1265 ··· 1683 } 1684 1685 if (!shared) { 1686 /* Setup the type (level, edge polarity) if configured: */ 1687 if (new->flags & IRQF_TRIGGER_MASK) { 1688 ret = __irq_set_trigger(desc, ··· 1780 1781 irq_setup_timings(desc, new); 1782 1783 + wake_up_and_wait_for_irq_thread_ready(desc, new); 1784 + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); 1785 1786 register_irq_proc(irq, desc); 1787 new->dir = NULL;