Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: Add NMI IPI infrastructure

Add a simple NMI IPI system that handles concurrency and reentrancy.

The platform does not have to implement a true non-maskable interrupt,
the default is to simply use the debugger break IPI message. This has
now been co-opted for a general IPI message, and users (debugger and
crash) have been reimplemented on top of the NMI system.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Incorporate incremental fixes from Nick]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Nicholas Piggin and committed by
Michael Ellerman
ddd703ca 2b4f3ac5

+226 -42
+5
arch/powerpc/Kconfig
··· 80 80 /proc/interrupts. If you configure your system to have too few, 81 81 drivers will fail to load or worse - handle with care. 82 82 83 + config NMI_IPI 84 + bool 85 + depends on SMP && (DEBUGGER || KEXEC_CORE) 86 + default y 87 + 83 88 config STACKTRACE_SUPPORT 84 89 bool 85 90 default y
+11 -3
arch/powerpc/include/asm/smp.h
··· 112 112 * 113 113 * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up 114 114 * in /proc/interrupts will be wrong!!! --Troy */ 115 - #define PPC_MSG_CALL_FUNCTION 0 116 - #define PPC_MSG_RESCHEDULE 1 115 + #define PPC_MSG_CALL_FUNCTION 0 116 + #define PPC_MSG_RESCHEDULE 1 117 117 #define PPC_MSG_TICK_BROADCAST 2 118 - #define PPC_MSG_DEBUGGER_BREAK 3 118 + #define PPC_MSG_NMI_IPI 3 119 119 120 120 /* This is only used by the powernv kernel */ 121 121 #define PPC_MSG_RM_HOST_ACTION 4 122 + 123 + #define NMI_IPI_ALL_OTHERS -2 124 + 125 + #ifdef CONFIG_NMI_IPI 126 + extern int smp_handle_nmi_ipi(struct pt_regs *regs); 127 + #else 128 + static inline int smp_handle_nmi_ipi(struct pt_regs *regs) { return 0; } 129 + #endif 122 130 123 131 /* for irq controllers that have dedicated ipis per message (4) */ 124 132 extern int smp_request_message_ipi(int virq, int message);
+207 -36
arch/powerpc/kernel/smp.c
··· 87 87 88 88 int smt_enabled_at_boot = 1; 89 89 90 - static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; 91 - 92 90 /* 93 91 * Returns 1 if the specified cpu should be brought up during boot. 94 92 * Used to inhibit booting threads if they've been disabled or ··· 157 159 return IRQ_HANDLED; 158 160 } 159 161 160 - static irqreturn_t debug_ipi_action(int irq, void *data) 162 + #ifdef CONFIG_NMI_IPI 163 + static irqreturn_t nmi_ipi_action(int irq, void *data) 161 164 { 162 - if (crash_ipi_function_ptr) { 163 - crash_ipi_function_ptr(get_irq_regs()); 164 - return IRQ_HANDLED; 165 - } 166 - 167 - #ifdef CONFIG_DEBUGGER 168 - debugger_ipi(get_irq_regs()); 169 - #endif /* CONFIG_DEBUGGER */ 170 - 165 + smp_handle_nmi_ipi(get_irq_regs()); 171 166 return IRQ_HANDLED; 172 167 } 168 + #endif 173 169 174 170 static irq_handler_t smp_ipi_action[] = { 175 171 [PPC_MSG_CALL_FUNCTION] = call_function_action, 176 172 [PPC_MSG_RESCHEDULE] = reschedule_action, 177 173 [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, 178 - [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, 174 + #ifdef CONFIG_NMI_IPI 175 + [PPC_MSG_NMI_IPI] = nmi_ipi_action, 176 + #endif 179 177 }; 180 178 179 + /* 180 + * The NMI IPI is a fallback and not truly non-maskable. It is simpler 181 + * than going through the call function infrastructure, and strongly 182 + * serialized, so it is more appropriate for debugging. 183 + */ 181 184 const char *smp_ipi_name[] = { 182 185 [PPC_MSG_CALL_FUNCTION] = "ipi call function", 183 186 [PPC_MSG_RESCHEDULE] = "ipi reschedule", 184 187 [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", 185 - [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", 188 + [PPC_MSG_NMI_IPI] = "nmi ipi", 186 189 }; 187 190 188 191 /* optional function to request ipi, for controllers with >= 4 ipis */ ··· 191 192 { 192 193 int err; 193 194 194 - if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { 195 + if (msg < 0 || msg > PPC_MSG_NMI_IPI) 195 196 return -EINVAL; 196 - } 197 - #if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) 198 - if (msg == PPC_MSG_DEBUGGER_BREAK) { 197 + #ifndef CONFIG_NMI_IPI 198 + if (msg == PPC_MSG_NMI_IPI) 199 199 return 1; 200 - } 201 200 #endif 201 + 202 202 err = request_irq(virq, smp_ipi_action[msg], 203 203 IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, 204 204 smp_ipi_name[msg], NULL); ··· 275 277 scheduler_ipi(); 276 278 if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) 277 279 tick_broadcast_ipi_handler(); 278 - if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) 279 - debug_ipi_action(0, NULL); 280 + #ifdef CONFIG_NMI_IPI 281 + if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) 282 + nmi_ipi_action(0, NULL); 283 + #endif 280 284 } while (info->messages); 281 285 282 286 return IRQ_HANDLED; ··· 315 315 do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); 316 316 } 317 317 318 + #ifdef CONFIG_NMI_IPI 319 + 320 + /* 321 + * "NMI IPI" system. 322 + * 323 + * NMI IPIs may not be recoverable, so should not be used as ongoing part of 324 + * a running system. They can be used for crash, debug, halt/reboot, etc. 325 + * 326 + * NMI IPIs are globally single threaded. No more than one in progress at 327 + * any time. 328 + * 329 + * The IPI call waits with interrupts disabled until all targets enter the 330 + * NMI handler, then the call returns. 331 + * 332 + * No new NMI can be initiated until targets exit the handler. 333 + * 334 + * The IPI call may time out without all targets entering the NMI handler. 335 + * In that case, there is some logic to recover (and ignore subsequent 336 + * NMI interrupts that may eventually be raised), but the platform interrupt 337 + * handler may not be able to distinguish this from other exception causes, 338 + * which may cause a crash. 339 + */ 340 + 341 + static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); 342 + static struct cpumask nmi_ipi_pending_mask; 343 + static int nmi_ipi_busy_count = 0; 344 + static void (*nmi_ipi_function)(struct pt_regs *) = NULL; 345 + 346 + static void nmi_ipi_lock_start(unsigned long *flags) 347 + { 348 + raw_local_irq_save(*flags); 349 + hard_irq_disable(); 350 + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { 351 + raw_local_irq_restore(*flags); 352 + cpu_relax(); 353 + raw_local_irq_save(*flags); 354 + hard_irq_disable(); 355 + } 356 + } 357 + 358 + static void nmi_ipi_lock(void) 359 + { 360 + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) 361 + cpu_relax(); 362 + } 363 + 364 + static void nmi_ipi_unlock(void) 365 + { 366 + smp_mb(); 367 + WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); 368 + atomic_set(&__nmi_ipi_lock, 0); 369 + } 370 + 371 + static void nmi_ipi_unlock_end(unsigned long *flags) 372 + { 373 + nmi_ipi_unlock(); 374 + raw_local_irq_restore(*flags); 375 + } 376 + 377 + /* 378 + * Platform NMI handler calls this to ack 379 + */ 380 + int smp_handle_nmi_ipi(struct pt_regs *regs) 381 + { 382 + void (*fn)(struct pt_regs *); 383 + unsigned long flags; 384 + int me = raw_smp_processor_id(); 385 + int ret = 0; 386 + 387 + /* 388 + * Unexpected NMIs are possible here because the interrupt may not 389 + * be able to distinguish NMI IPIs from other types of NMIs, or 390 + * because the caller may have timed out. 391 + */ 392 + nmi_ipi_lock_start(&flags); 393 + if (!nmi_ipi_busy_count) 394 + goto out; 395 + if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask)) 396 + goto out; 397 + 398 + fn = nmi_ipi_function; 399 + if (!fn) 400 + goto out; 401 + 402 + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); 403 + nmi_ipi_busy_count++; 404 + nmi_ipi_unlock(); 405 + 406 + ret = 1; 407 + 408 + fn(regs); 409 + 410 + nmi_ipi_lock(); 411 + nmi_ipi_busy_count--; 412 + out: 413 + nmi_ipi_unlock_end(&flags); 414 + 415 + return ret; 416 + } 417 + 418 + static void do_smp_send_nmi_ipi(int cpu) 419 + { 420 + if (cpu >= 0) { 421 + do_message_pass(cpu, PPC_MSG_NMI_IPI); 422 + } else { 423 + int c; 424 + 425 + for_each_online_cpu(c) { 426 + if (c == raw_smp_processor_id()) 427 + continue; 428 + do_message_pass(c, PPC_MSG_NMI_IPI); 429 + } 430 + } 431 + } 432 + 433 + /* 434 + * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. 435 + * - fn is the target callback function. 436 + * - delay_us > 0 is the delay before giving up waiting for targets to 437 + * enter the handler, == 0 specifies indefinite delay. 438 + */ 439 + static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) 440 + { 441 + unsigned long flags; 442 + int me = raw_smp_processor_id(); 443 + int ret = 1; 444 + 445 + BUG_ON(cpu == me); 446 + BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); 447 + 448 + if (unlikely(!smp_ops)) 449 + return 0; 450 + 451 + /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */ 452 + nmi_ipi_lock_start(&flags); 453 + while (nmi_ipi_busy_count) { 454 + nmi_ipi_unlock_end(&flags); 455 + cpu_relax(); 456 + nmi_ipi_lock_start(&flags); 457 + } 458 + 459 + nmi_ipi_function = fn; 460 + 461 + if (cpu < 0) { 462 + /* ALL_OTHERS */ 463 + cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); 464 + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); 465 + } else { 466 + /* cpumask starts clear */ 467 + cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); 468 + } 469 + nmi_ipi_busy_count++; 470 + nmi_ipi_unlock(); 471 + 472 + do_smp_send_nmi_ipi(cpu); 473 + 474 + while (!cpumask_empty(&nmi_ipi_pending_mask)) { 475 + udelay(1); 476 + if (delay_us) { 477 + delay_us--; 478 + if (!delay_us) 479 + break; 480 + } 481 + } 482 + 483 + nmi_ipi_lock(); 484 + if (!cpumask_empty(&nmi_ipi_pending_mask)) { 485 + /* Could not gather all CPUs */ 486 + ret = 0; 487 + cpumask_clear(&nmi_ipi_pending_mask); 488 + } 489 + nmi_ipi_busy_count--; 490 + nmi_ipi_unlock_end(&flags); 491 + 492 + return ret; 493 + } 494 + #endif /* CONFIG_NMI_IPI */ 495 + 318 496 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 319 497 void tick_broadcast(const struct cpumask *mask) 320 498 { ··· 503 325 } 504 326 #endif 505 327 506 - #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) 328 + #ifdef CONFIG_DEBUGGER 329 + void debugger_ipi_callback(struct pt_regs *regs) 330 + { 331 + debugger_ipi(regs); 332 + } 333 + 507 334 void smp_send_debugger_break(void) 508 335 { 509 - int cpu; 510 - int me = raw_smp_processor_id(); 511 - 512 - if (unlikely(!smp_ops)) 513 - return; 514 - 515 - for_each_online_cpu(cpu) 516 - if (cpu != me) 517 - do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); 336 + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); 518 337 } 519 338 #endif 520 339 521 340 #ifdef CONFIG_KEXEC_CORE 522 341 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) 523 342 { 524 - crash_ipi_function_ptr = crash_ipi_callback; 525 - if (crash_ipi_callback) { 526 - mb(); 527 - smp_send_debugger_break(); 528 - } 343 + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); 529 344 } 530 345 #endif 531 346
+1 -1
arch/powerpc/platforms/cell/interrupt.c
··· 211 211 iic_request_ipi(PPC_MSG_CALL_FUNCTION); 212 212 iic_request_ipi(PPC_MSG_RESCHEDULE); 213 213 iic_request_ipi(PPC_MSG_TICK_BROADCAST); 214 - iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); 214 + iic_request_ipi(PPC_MSG_NMI_IPI); 215 215 } 216 216 217 217 #endif /* CONFIG_SMP */
+2 -2
arch/powerpc/platforms/ps3/smp.c
··· 77 77 BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0); 78 78 BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1); 79 79 BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST != 2); 80 - BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK != 3); 80 + BUILD_BUG_ON(PPC_MSG_NMI_IPI != 3); 81 81 82 82 for (i = 0; i < MSG_COUNT; i++) { 83 83 result = ps3_event_receive_port_setup(cpu, &virqs[i]); ··· 96 96 ps3_register_ipi_irq(cpu, virqs[i]); 97 97 } 98 98 99 - ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_DEBUGGER_BREAK]); 99 + ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_NMI_IPI]); 100 100 101 101 DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu); 102 102 }