Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/book3s: Print task info if we take a machine check in user mode

For an MCE (Machine Check Exception) that hits while in user mode
MSR(PR=1), print the task info to the console MCE error log. This may
help to identify an application that triggered the MCE.

After this patch the MCE console looks like:

Severe Machine check interrupt [Recovered]
NIP: [0000000010039778] PID: 762 Comm: ebizzy
Initiator: CPU
Error type: SLB [Multihit]
Effective address: 0000000010039778

Severe Machine check interrupt [Not recovered]
NIP: [0000000010039778] PID: 763 Comm: ebizzy
Initiator: CPU
Error type: UE [Page table walk ifetch]
Effective address: 0000000010039778
ebizzy[763]: unhandled signal 7 at 0000000010039778 nip 0000000010039778 lr 0000000010001b44 code 30004

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

+15 -5
+2 -1
arch/powerpc/include/asm/mce.h
··· 207 207 extern int get_mce_event(struct machine_check_event *mce, bool release); 208 208 extern void release_mce_event(void); 209 209 extern void machine_check_queue_event(void); 210 - extern void machine_check_print_event_info(struct machine_check_event *evt); 210 + extern void machine_check_print_event_info(struct machine_check_event *evt, 211 + bool user_mode); 211 212 extern uint64_t get_mce_fault_addr(struct machine_check_event *evt); 212 213 213 214 #endif /* __ASM_PPC64_MCE_H__ */
+12 -3
arch/powerpc/kernel/mce.c
··· 228 228 while (__this_cpu_read(mce_queue_count) > 0) { 229 229 index = __this_cpu_read(mce_queue_count) - 1; 230 230 machine_check_print_event_info( 231 - this_cpu_ptr(&mce_event_queue[index])); 231 + this_cpu_ptr(&mce_event_queue[index]), false); 232 232 __this_cpu_dec(mce_queue_count); 233 233 } 234 234 } 235 235 236 - void machine_check_print_event_info(struct machine_check_event *evt) 236 + void machine_check_print_event_info(struct machine_check_event *evt, 237 + bool user_mode) 237 238 { 238 239 const char *level, *sevstr, *subtype; 239 240 static const char *mc_ue_types[] = { ··· 312 311 printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 313 312 evt->disposition == MCE_DISPOSITION_RECOVERED ? 314 313 "Recovered" : "Not recovered"); 315 - printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, (void *)evt->srr0); 314 + 315 + if (user_mode) { 316 + printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, 317 + evt->srr0, current->pid, current->comm); 318 + } else { 319 + printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, 320 + (void *)evt->srr0); 321 + } 322 + 316 323 printk("%s Initiator: %s\n", level, 317 324 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); 318 325 switch (evt->error_type) {
+1 -1
arch/powerpc/platforms/powernv/opal.c
··· 435 435 evt.version); 436 436 return 0; 437 437 } 438 - machine_check_print_event_info(&evt); 438 + machine_check_print_event_info(&evt, user_mode(regs)); 439 439 440 440 if (opal_recover_mce(regs, &evt)) 441 441 return 1;