Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[IA64] printing support for MCA/INIT

Printing message to console from MCA/INIT handler is useful,
however doing oops_in_progress = 1 in them exactly makes
something in kernel wrong. Especially it sounds ugly if
system goes wrong after returning from recoverable MCA.

This patch adds ia64_mca_printk() function that collects
messages into temporary-not-so-large message buffer during
in MCA/INIT environment and print them out later, after
returning to normal context or when handlers determine to
down the system.

Also this print function is exported for use in extensional
MCA handler. It would be useful to describe detail about
recovery.

NOTE:
I don't think it is sane thing if temporary message buffer
is enlarged enough to hold whole stack dumps from INIT, so
buffering is disabled during stack dump from INIT-monarch
(= default_monarch_init_process). please fix it in future.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by

Hidetoshi Seto and committed by
Tony Luck
43ed3baf 816add4e

+242 -36
+198 -18
arch/ia64/kernel/mca.c
··· 54 54 * 55 55 * 2005-10-07 Keith Owens <kaos@sgi.com> 56 56 * Add notify_die() hooks. 57 + * 58 + * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> 59 + * Add printing support for MCA/INIT. 57 60 */ 58 61 #include <linux/types.h> 59 62 #include <linux/init.h> ··· 139 136 140 137 static int mca_init __initdata; 141 138 139 + /* 140 + * limited & delayed printing support for MCA/INIT handler 141 + */ 142 + 143 + #define mprintk(fmt...) ia64_mca_printk(fmt) 144 + 145 + #define MLOGBUF_SIZE (512+256*NR_CPUS) 146 + #define MLOGBUF_MSGMAX 256 147 + static char mlogbuf[MLOGBUF_SIZE]; 148 + static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ 149 + static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ 150 + static unsigned long mlogbuf_start; 151 + static unsigned long mlogbuf_end; 152 + static unsigned int mlogbuf_finished = 0; 153 + static unsigned long mlogbuf_timestamp = 0; 154 + 155 + static int loglevel_save = -1; 156 + #define BREAK_LOGLEVEL(__console_loglevel) \ 157 + oops_in_progress = 1; \ 158 + if (loglevel_save < 0) \ 159 + loglevel_save = __console_loglevel; \ 160 + __console_loglevel = 15; 161 + 162 + #define RESTORE_LOGLEVEL(__console_loglevel) \ 163 + if (loglevel_save >= 0) { \ 164 + __console_loglevel = loglevel_save; \ 165 + loglevel_save = -1; \ 166 + } \ 167 + mlogbuf_finished = 0; \ 168 + oops_in_progress = 0; 169 + 170 + /* 171 + * Push messages into buffer, print them later if not urgent. 172 + */ 173 + void ia64_mca_printk(const char *fmt, ...) 174 + { 175 + va_list args; 176 + int printed_len; 177 + char temp_buf[MLOGBUF_MSGMAX]; 178 + char *p; 179 + 180 + va_start(args, fmt); 181 + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); 182 + va_end(args); 183 + 184 + /* Copy the output into mlogbuf */ 185 + if (oops_in_progress) { 186 + /* mlogbuf was abandoned, use printk directly instead. */ 187 + printk(temp_buf); 188 + } else { 189 + spin_lock(&mlogbuf_wlock); 190 + for (p = temp_buf; *p; p++) { 191 + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; 192 + if (next != mlogbuf_start) { 193 + mlogbuf[mlogbuf_end] = *p; 194 + mlogbuf_end = next; 195 + } else { 196 + /* buffer full */ 197 + break; 198 + } 199 + } 200 + mlogbuf[mlogbuf_end] = '\0'; 201 + spin_unlock(&mlogbuf_wlock); 202 + } 203 + } 204 + EXPORT_SYMBOL(ia64_mca_printk); 205 + 206 + /* 207 + * Print buffered messages. 208 + * NOTE: call this after returning normal context. (ex. from salinfod) 209 + */ 210 + void ia64_mlogbuf_dump(void) 211 + { 212 + char temp_buf[MLOGBUF_MSGMAX]; 213 + char *p; 214 + unsigned long index; 215 + unsigned long flags; 216 + unsigned int printed_len; 217 + 218 + /* Get output from mlogbuf */ 219 + while (mlogbuf_start != mlogbuf_end) { 220 + temp_buf[0] = '\0'; 221 + p = temp_buf; 222 + printed_len = 0; 223 + 224 + spin_lock_irqsave(&mlogbuf_rlock, flags); 225 + 226 + index = mlogbuf_start; 227 + while (index != mlogbuf_end) { 228 + *p = mlogbuf[index]; 229 + index = (index + 1) % MLOGBUF_SIZE; 230 + if (!*p) 231 + break; 232 + p++; 233 + if (++printed_len >= MLOGBUF_MSGMAX - 1) 234 + break; 235 + } 236 + *p = '\0'; 237 + if (temp_buf[0]) 238 + printk(temp_buf); 239 + mlogbuf_start = index; 240 + 241 + mlogbuf_timestamp = 0; 242 + spin_unlock_irqrestore(&mlogbuf_rlock, flags); 243 + } 244 + } 245 + EXPORT_SYMBOL(ia64_mlogbuf_dump); 246 + 247 + /* 248 + * Call this if system is going to down or if immediate flushing messages to 249 + * console is required. (ex. recovery was failed, crash dump is going to be 250 + * invoked, long-wait rendezvous etc.) 251 + * NOTE: this should be called from monarch. 252 + */ 253 + static void ia64_mlogbuf_finish(int wait) 254 + { 255 + BREAK_LOGLEVEL(console_loglevel); 256 + 257 + spin_lock_init(&mlogbuf_rlock); 258 + ia64_mlogbuf_dump(); 259 + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " 260 + "MCA/INIT might be dodgy or fail.\n"); 261 + 262 + if (!wait) 263 + return; 264 + 265 + /* wait for console */ 266 + printk("Delaying for 5 seconds...\n"); 267 + udelay(5*1000000); 268 + 269 + mlogbuf_finished = 1; 270 + } 271 + EXPORT_SYMBOL(ia64_mlogbuf_finish); 272 + 273 + /* 274 + * Print buffered messages from INIT context. 275 + */ 276 + static void ia64_mlogbuf_dump_from_init(void) 277 + { 278 + if (mlogbuf_finished) 279 + return; 280 + 281 + if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { 282 + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " 283 + " and the system seems to be messed up.\n"); 284 + ia64_mlogbuf_finish(0); 285 + return; 286 + } 287 + 288 + if (!spin_trylock(&mlogbuf_rlock)) { 289 + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " 290 + "Generated messages other than stack dump will be " 291 + "buffered to mlogbuf and will be printed later.\n"); 292 + printk(KERN_ERR "INIT: If messages would not printed after " 293 + "this INIT, wait 30sec and assert INIT again.\n"); 294 + if (!mlogbuf_timestamp) 295 + mlogbuf_timestamp = jiffies; 296 + return; 297 + } 298 + spin_unlock(&mlogbuf_rlock); 299 + ia64_mlogbuf_dump(); 300 + } 142 301 143 302 static void inline 144 303 ia64_mca_spin(const char *func) 145 304 { 146 - printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); 305 + if (monarch_cpu == smp_processor_id()) 306 + ia64_mlogbuf_finish(0); 307 + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); 147 308 while (1) 148 309 cpu_relax(); 149 310 } ··· 1155 988 } 1156 989 if (!missing) 1157 990 goto all_in; 1158 - printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); 991 + /* 992 + * Maybe slave(s) dead. Print buffered messages immediately. 993 + */ 994 + ia64_mlogbuf_finish(0); 995 + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); 1159 996 for_each_online_cpu(c) { 1160 997 if (c == monarch) 1161 998 continue; 1162 999 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) 1163 - printk(" %d", c); 1000 + mprintk(" %d", c); 1164 1001 } 1165 - printk("\n"); 1002 + mprintk("\n"); 1166 1003 return; 1167 1004 1168 1005 all_in: 1169 - printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); 1006 + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); 1170 1007 return; 1171 1008 } 1172 1009 ··· 1198 1027 struct ia64_mca_notify_die nd = 1199 1028 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1200 1029 1201 - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ 1202 - console_loglevel = 15; /* make sure printks make it to console */ 1203 - printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", 1204 - sos->proc_state_param, cpu, sos->monarch); 1030 + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " 1031 + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); 1205 1032 1206 1033 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); 1207 1034 monarch_cpu = cpu; ··· 1235 1066 rh->severity = sal_log_severity_corrected; 1236 1067 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); 1237 1068 sos->os_status = IA64_MCA_CORRECTED; 1069 + } else { 1070 + /* Dump buffered message to console */ 1071 + ia64_mlogbuf_finish(1); 1238 1072 } 1239 1073 if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) 1240 1074 == NOTIFY_STOP) ··· 1477 1305 struct task_struct *g, *t; 1478 1306 if (val != DIE_INIT_MONARCH_PROCESS) 1479 1307 return NOTIFY_DONE; 1308 + 1309 + /* 1310 + * FIXME: mlogbuf will brim over with INIT stack dumps. 1311 + * To enable show_stack from INIT, we use oops_in_progress which should 1312 + * be used in real oops. This would cause something wrong after INIT. 1313 + */ 1314 + BREAK_LOGLEVEL(console_loglevel); 1315 + ia64_mlogbuf_dump_from_init(); 1316 + 1480 1317 printk(KERN_ERR "Processes interrupted by INIT -"); 1481 1318 for_each_online_cpu(c) { 1482 1319 struct ia64_sal_os_state *s; ··· 1507 1326 } while_each_thread (g, t); 1508 1327 read_unlock(&tasklist_lock); 1509 1328 } 1329 + /* FIXME: This will not restore zapped printk locks. */ 1330 + RESTORE_LOGLEVEL(console_loglevel); 1510 1331 return NOTIFY_DONE; 1511 1332 } 1512 1333 ··· 1540 1357 struct ia64_mca_notify_die nd = 1541 1358 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1542 1359 1543 - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ 1544 - console_loglevel = 15; /* make sure printks make it to console */ 1545 - 1546 1360 (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); 1547 1361 1548 - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", 1362 + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", 1549 1363 sos->proc_state_param, cpu, sos->monarch); 1550 1364 salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); 1551 1365 ··· 1555 1375 * fix their proms and get their customers updated. 1556 1376 */ 1557 1377 if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { 1558 - printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", 1378 + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", 1559 1379 __FUNCTION__, cpu); 1560 1380 atomic_dec(&slaves); 1561 1381 sos->monarch = 1; ··· 1567 1387 * fix their proms and get their customers updated. 1568 1388 */ 1569 1389 if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { 1570 - printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", 1390 + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", 1571 1391 __FUNCTION__, cpu); 1572 1392 atomic_dec(&monarchs); 1573 1393 sos->monarch = 0; ··· 1588 1408 if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) 1589 1409 == NOTIFY_STOP) 1590 1410 ia64_mca_spin(__FUNCTION__); 1591 - printk("Slave on cpu %d returning to normal service.\n", cpu); 1411 + mprintk("Slave on cpu %d returning to normal service.\n", cpu); 1592 1412 set_curr_task(cpu, previous_current); 1593 1413 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1594 1414 atomic_dec(&slaves); ··· 1606 1426 * same serial line, the user will need some time to switch out of the BMC before 1607 1427 * the dump begins. 1608 1428 */ 1609 - printk("Delaying for 5 seconds...\n"); 1429 + mprintk("Delaying for 5 seconds...\n"); 1610 1430 udelay(5*1000000); 1611 1431 ia64_wait_for_slaves(cpu, "INIT"); 1612 1432 /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through ··· 1619 1439 if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) 1620 1440 == NOTIFY_STOP) 1621 1441 ia64_mca_spin(__FUNCTION__); 1622 - printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); 1442 + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); 1623 1443 atomic_dec(&monarchs); 1624 1444 set_curr_task(cpu, previous_current); 1625 1445 monarch_cpu = -1;
+36 -18
arch/ia64/kernel/mca_drv.c
··· 79 79 fatal_mca(const char *fmt, ...) 80 80 { 81 81 va_list args; 82 + char buf[256]; 82 83 83 84 va_start(args, fmt); 84 - vprintk(fmt, args); 85 + vsnprintf(buf, sizeof(buf), fmt, args); 85 86 va_end(args); 87 + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); 86 88 87 89 return MCA_NOT_RECOVERED; 90 + } 91 + 92 + static int 93 + mca_recovered(const char *fmt, ...) 94 + { 95 + va_list args; 96 + char buf[256]; 97 + 98 + va_start(args, fmt); 99 + vsnprintf(buf, sizeof(buf), fmt, args); 100 + va_end(args); 101 + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); 102 + 103 + return MCA_RECOVERED; 88 104 } 89 105 90 106 /** ··· 156 140 void 157 141 mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) 158 142 { 143 + ia64_mlogbuf_dump(); 159 144 printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " 160 145 "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", 161 146 raw_smp_processor_id(), current->pid, current->uid, ··· 457 440 458 441 /* Is target address valid? */ 459 442 if (!pbci->tv) 460 - return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); 443 + return fatal_mca("target address not valid"); 461 444 462 445 /* 463 446 * cpu read or memory-mapped io read ··· 475 458 476 459 /* Is minstate valid? */ 477 460 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) 478 - return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); 461 + return fatal_mca("minstate not valid"); 479 462 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); 480 463 psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); 481 464 ··· 509 492 psr2->bn = 1; 510 493 psr2->i = 0; 511 494 512 - return MCA_RECOVERED; 495 + return mca_recovered("user memory corruption. " 496 + "kill affected process - recovered."); 513 497 } 514 498 515 499 } 516 500 517 - return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," 518 - " iip 0x%lx\n", pmsa->pmsa_iip); 501 + return fatal_mca("kernel context not recovered, iip 0x%lx\n", 502 + pmsa->pmsa_iip); 519 503 } 520 504 521 505 /** ··· 602 584 * The machine check is corrected. 603 585 */ 604 586 if (psp->cm == 1) 605 - return MCA_RECOVERED; 587 + return mca_recovered("machine check is already corrected."); 606 588 607 589 /* 608 590 * The error was not contained. Software must be reset. 609 591 */ 610 592 if (psp->us || psp->ci == 0) 611 - return fatal_mca(KERN_ALERT "MCA: error not contained\n"); 593 + return fatal_mca("error not contained"); 612 594 613 595 /* 614 596 * The cache check and bus check bits have four possible states ··· 619 601 * 1 1 Memory error, attempt recovery 620 602 */ 621 603 if (psp->bc == 0 || pbci == NULL) 622 - return fatal_mca(KERN_ALERT "MCA: No bus check\n"); 604 + return fatal_mca("No bus check"); 623 605 624 606 /* 625 607 * Sorry, we cannot handle so many. 626 608 */ 627 609 if (peidx_bus_check_num(peidx) > 1) 628 - return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); 610 + return fatal_mca("Too many bus checks"); 629 611 /* 630 612 * Well, here is only one bus error. 631 613 */ 632 614 if (pbci->ib) 633 - return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); 615 + return fatal_mca("Internal Bus error"); 634 616 if (pbci->cc) 635 - return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); 617 + return fatal_mca("Cache-cache error"); 636 618 if (pbci->eb && pbci->bsi > 0) 637 - return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); 619 + return fatal_mca("External bus check fatal status"); 638 620 639 621 /* 640 622 * This is a local MCA and estimated as recoverble external bus error. ··· 646 628 /* 647 629 * On account of strange SAL error record, we cannot recover. 648 630 */ 649 - return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); 631 + return fatal_mca("Strange SAL record"); 650 632 } 651 633 652 634 /** ··· 675 657 676 658 /* Now, OS can recover when there is one processor error section */ 677 659 if (n_proc_err > 1) 678 - return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); 660 + return fatal_mca("Too Many Errors"); 679 661 else if (n_proc_err == 0) 680 - /* Weird SAL record ... We need not to recover */ 681 - return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); 662 + /* Weird SAL record ... We can't do anything */ 663 + return fatal_mca("Weird SAL record"); 682 664 683 665 /* Make index of processor error section */ 684 666 mca_make_peidx((sal_log_processor_info_t*) ··· 689 671 690 672 /* Check whether MCA is global or not */ 691 673 if (is_mca_global(&peidx, &pbci, sos)) 692 - return fatal_mca(KERN_ALERT "MCA: global MCA\n"); 674 + return fatal_mca("global MCA"); 693 675 694 676 /* Try to recover a processor error */ 695 677 return recover_from_processor_error(platform_err, &slidx, &peidx,
+4
arch/ia64/kernel/mca_drv.h
··· 118 118 119 119 extern const struct mca_table_entry *search_mca_tables (unsigned long addr); 120 120 extern int mca_recover_range(unsigned long); 121 + extern void ia64_mca_printk(const char * fmt, ...) 122 + __attribute__ ((format (printf, 1, 2))); 123 + extern void ia64_mlogbuf_dump(void); 124 +
+4
arch/ia64/kernel/salinfo.c
··· 266 266 /* Check for outstanding MCA/INIT records every minute (arbitrary) */ 267 267 #define SALINFO_TIMER_DELAY (60*HZ) 268 268 static struct timer_list salinfo_timer; 269 + extern void ia64_mlogbuf_dump(void); 269 270 270 271 static void 271 272 salinfo_timeout_check(struct salinfo_data *data) ··· 284 283 static void 285 284 salinfo_timeout (unsigned long arg) 286 285 { 286 + ia64_mlogbuf_dump(); 287 287 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); 288 288 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); 289 289 salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; ··· 333 331 334 332 if (cpu == -1) 335 333 goto retry; 334 + 335 + ia64_mlogbuf_dump(); 336 336 337 337 /* for next read, start checking at next CPU */ 338 338 data->cpu_check = cpu;