Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/nmi: Print additional information

In case of an unrecoverable machine check only the machine check interrupt
code is printed to the console before the machine is stopped. This makes
root cause analysis sometimes hard.

Print additional machine check information to make analysis easier.
The output now looks like this:

Unrecoverable machine check, code: 00400F5F4C3B0000
6.16.0-rc2-11605-g987a9431e53a-dirty
HW: IBM 3931 A01 704 (z/VM 7.4.0)
PSW: 0706C00180000000 000003FFE0F0462E PFX: 0000000000070000
LBA: 000003FFE0F0462A EDC: 0000000000000000 FSA: 0000000000000000
CRS:
0080000014966A12 0000000087CB41C7 0000000000BFF140 0000000000000000
000000000000FFFF 0000000000BFF140 0000000071000000 0000000087CB41C7
0000000000008000 0000000000000000 0000000000000000 0000000000000000
0000000000000000 00000000024C0007 00000000DB000000 0000000000BFF000
GPRS:
FFFFFFFF00000000 000003FFE0F0462E E10EA4F489F897A6 0000000000000000
7FFFFFF2C0413C4C 000003FFE19B7010 0000000000000000 0000000000000000
0000000000000000 00000001F76B3380 000003FFE15D4050 0000000000000005
0000000000000000 0000000000070000 000003FFE0F0586C 0000037FE00B7DA0
System stopped

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>

authored by

Heiko Carstens and committed by
Alexander Gordeev
fbb3bdf5 819275e1

+75 -5
+2
arch/s390/include/asm/setup.h
··· 41 41 char command_line[COMMAND_LINE_SIZE]; /* 0x10480 */ 42 42 }; 43 43 44 + extern char arch_hw_string[128]; 45 + 44 46 extern struct parmarea parmarea; 45 47 46 48 extern unsigned int zlib_dfltcc_support;
+3
arch/s390/kernel/early.c
··· 105 105 } 106 106 } 107 107 108 + char arch_hw_string[128]; 109 + 108 110 static noinline __init void setup_arch_string(void) 109 111 { 110 112 struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; ··· 133 131 machine_is_vm() ? "z/VM" : 134 132 machine_is_kvm() ? "KVM" : "unknown"); 135 133 } 134 + sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr); 136 135 dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); 137 136 } 138 137
+70 -5
arch/s390/kernel/nmi.c
··· 9 9 */ 10 10 11 11 #include <linux/kernel_stat.h> 12 + #include <linux/utsname.h> 12 13 #include <linux/cpufeature.h> 13 14 #include <linux/init.h> 14 15 #include <linux/errno.h> ··· 116 115 return dest; 117 116 } 118 117 118 + static notrace void nmi_print_info(void) 119 + { 120 + struct lowcore *lc = get_lowcore(); 121 + char message[100]; 122 + char *ptr; 123 + int i; 124 + 125 + ptr = nmi_puts(message, "Unrecoverable machine check, code: "); 126 + ptr = u64_to_hex(ptr, lc->mcck_interruption_code); 127 + ptr = nmi_puts(ptr, "\n"); 128 + sclp_emergency_printk(message); 129 + 130 + ptr = nmi_puts(message, init_utsname()->release); 131 + ptr = nmi_puts(ptr, "\n"); 132 + sclp_emergency_printk(message); 133 + 134 + ptr = nmi_puts(message, arch_hw_string); 135 + ptr = nmi_puts(ptr, "\n"); 136 + sclp_emergency_printk(message); 137 + 138 + ptr = nmi_puts(message, "PSW: "); 139 + ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask); 140 + ptr = nmi_puts(ptr, " "); 141 + ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr); 142 + ptr = nmi_puts(ptr, " PFX: "); 143 + ptr = u64_to_hex(ptr, (u64)get_lowcore()); 144 + ptr = nmi_puts(ptr, "\n"); 145 + sclp_emergency_printk(message); 146 + 147 + ptr = nmi_puts(message, "LBA: "); 148 + ptr = u64_to_hex(ptr, lc->last_break_save_area); 149 + ptr = nmi_puts(ptr, " EDC: "); 150 + ptr = u64_to_hex(ptr, lc->external_damage_code); 151 + ptr = nmi_puts(ptr, " FSA: "); 152 + ptr = u64_to_hex(ptr, lc->failing_storage_address); 153 + ptr = nmi_puts(ptr, "\n"); 154 + sclp_emergency_printk(message); 155 + 156 + ptr = nmi_puts(message, "CRS:\n"); 157 + sclp_emergency_printk(message); 158 + ptr = message; 159 + for (i = 0; i < 16; i++) { 160 + ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val); 161 + ptr = nmi_puts(ptr, " "); 162 + if ((i + 1) % 4 == 0) { 163 + ptr = nmi_puts(ptr, "\n"); 164 + sclp_emergency_printk(message); 165 + ptr = message; 166 + } 167 + } 168 + 169 + ptr = nmi_puts(message, "GPRS:\n"); 170 + sclp_emergency_printk(message); 171 + ptr = message; 172 + for (i = 0; i < 16; i++) { 173 + ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]); 174 + ptr = nmi_puts(ptr, " "); 175 + if ((i + 1) % 4 == 0) { 176 + ptr = nmi_puts(ptr, "\n"); 177 + sclp_emergency_printk(message); 178 + ptr = message; 179 + } 180 + } 181 + 182 + ptr = nmi_puts(message, "System stopped\n"); 183 + sclp_emergency_printk(message); 184 + } 185 + 119 186 static notrace void s390_handle_damage(void) 120 187 { 121 188 struct lowcore *lc = get_lowcore(); 122 189 union ctlreg0 cr0, cr0_new; 123 - char message[100]; 124 190 psw_t psw_save; 125 - char *ptr; 126 191 127 192 smp_emergency_stop(); 128 193 diag_amode31_ops.diag308_reset(); 129 - ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x"); 130 - u64_to_hex(ptr, lc->mcck_interruption_code); 131 194 132 195 /* 133 196 * Disable low address protection and make machine check new PSW a ··· 205 140 psw_bits(lc->mcck_new_psw).io = 0; 206 141 psw_bits(lc->mcck_new_psw).ext = 0; 207 142 psw_bits(lc->mcck_new_psw).wait = 1; 208 - sclp_emergency_printk(message); 143 + nmi_print_info(); 209 144 210 145 /* 211 146 * Restore machine check new PSW and control register 0 to original