Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IPMI: new NMI handling

Convert over to the new NMI handling for getting IPMI watchdog timeouts via an
NMI. This add config options to know if there is the ability to receive NMIs
and if it has an NMI post processing call. Then it modifies the IPMI watchdog
to take advantage of this so that it can know if an NMI comes in.

It also adds testing that the IPMI NMI watchdog works.

Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Corey Minyard and committed by
Linus Torvalds
612b5a8d fcfa4724

+110 -44
+5 -3
Documentation/IPMI.txt
··· 584 584 gets a pre-action. During a panic or a reboot, the watchdog will 585 585 start a 120 timer if it is running to make sure the reboot occurs. 586 586 587 - Note that if you use the NMI preaction for the watchdog, you MUST 588 - NOT use nmi watchdog mode 1. If you use the NMI watchdog, you 589 - must use mode 2. 587 + Note that if you use the NMI preaction for the watchdog, you MUST NOT 588 + use the nmi watchdog. There is no reasonable way to tell if an NMI 589 + comes from the IPMI controller, so it must assume that if it gets an 590 + otherwise unhandled NMI, it must be from IPMI and it will panic 591 + immediately. 590 592 591 593 Once you open the watchdog timer, you must write a 'V' character to the 592 594 device to close it, or the timer will not stop. This is a new semantic
+105 -41
drivers/char/ipmi/ipmi_watchdog.c
··· 50 50 #include <linux/poll.h> 51 51 #include <linux/string.h> 52 52 #include <linux/ctype.h> 53 + #include <linux/delay.h> 53 54 #include <asm/atomic.h> 54 55 55 - #ifdef CONFIG_X86_LOCAL_APIC 56 - #include <asm/apic.h> 56 + #ifdef CONFIG_X86 57 + /* This is ugly, but I've determined that x86 is the only architecture 58 + that can reasonably support the IPMI NMI watchdog timeout at this 59 + time. If another architecture adds this capability somehow, it 60 + will have to be a somewhat different mechanism and I have no idea 61 + how it will work. So in the unlikely event that another 62 + architecture supports this, we can figure out a good generic 63 + mechanism for it at that time. */ 64 + #include <asm/kdebug.h> 65 + #define HAVE_DIE_NMI 57 66 #endif 58 67 59 68 #define PFX "IPMI Watchdog: " ··· 322 313 /* If a pretimeout occurs, this is used to allow only one panic to happen. */ 323 314 static atomic_t preop_panic_excl = ATOMIC_INIT(-1); 324 315 316 + #ifdef HAVE_DIE_NMI 317 + static int testing_nmi; 318 + static int nmi_handler_registered; 319 + #endif 320 + 325 321 static int ipmi_heartbeat(void); 326 322 327 323 /* We use a mutex to make sure that only one thing can send a set ··· 365 351 struct ipmi_system_interface_addr addr; 366 352 int hbnow = 0; 367 353 354 + 355 + /* These can be cleared as we are setting the timeout. */ 356 + pretimeout_since_last_heartbeat = 0; 368 357 369 358 data[0] = 0; 370 359 WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); ··· 443 426 444 427 wait_for_completion(&set_timeout_wait); 445 428 429 + mutex_unlock(&set_timeout_lock); 430 + 446 431 if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) 447 432 || ((send_heartbeat_now) 448 433 && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) 449 - { 450 434 rv = ipmi_heartbeat(); 451 - } 452 - mutex_unlock(&set_timeout_lock); 453 435 454 436 out: 455 437 return rv; ··· 572 556 int rv; 573 557 struct ipmi_system_interface_addr addr; 574 558 575 - if (ipmi_ignore_heartbeat) { 559 + if (ipmi_ignore_heartbeat) 576 560 return 0; 577 - } 578 561 579 562 if (ipmi_start_timer_on_heartbeat) { 580 563 ipmi_start_timer_on_heartbeat = 0; ··· 584 569 We don't want to set the action, though, we want to 585 570 leave that alone (thus it can't be combined with the 586 571 above operation. */ 587 - pretimeout_since_last_heartbeat = 0; 588 572 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); 589 573 } 590 574 ··· 941 927 printk(KERN_CRIT PFX "Unable to register misc device\n"); 942 928 } 943 929 930 + #ifdef HAVE_DIE_NMI 931 + if (nmi_handler_registered) { 932 + int old_pretimeout = pretimeout; 933 + int old_timeout = timeout; 934 + int old_preop_val = preop_val; 935 + 936 + /* Set the pretimeout to go off in a second and give 937 + ourselves plenty of time to stop the timer. */ 938 + ipmi_watchdog_state = WDOG_TIMEOUT_RESET; 939 + preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */ 940 + pretimeout = 99; 941 + timeout = 100; 942 + 943 + testing_nmi = 1; 944 + 945 + rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); 946 + if (rv) { 947 + printk(KERN_WARNING PFX "Error starting timer to" 948 + " test NMI: 0x%x. The NMI pretimeout will" 949 + " likely not work\n", rv); 950 + rv = 0; 951 + goto out_restore; 952 + } 953 + 954 + msleep(1500); 955 + 956 + if (testing_nmi != 2) { 957 + printk(KERN_WARNING PFX "IPMI NMI didn't seem to" 958 + " occur. The NMI pretimeout will" 959 + " likely not work\n"); 960 + } 961 + out_restore: 962 + testing_nmi = 0; 963 + preop_val = old_preop_val; 964 + pretimeout = old_pretimeout; 965 + timeout = old_timeout; 966 + } 967 + #endif 968 + 944 969 out: 945 970 if ((start_now) && (rv == 0)) { 946 971 /* Run from startup, so start the timer now. */ ··· 987 934 ipmi_watchdog_state = action_val; 988 935 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); 989 936 printk(KERN_INFO PFX "Starting now!\n"); 937 + } else { 938 + /* Stop the timer now. */ 939 + ipmi_watchdog_state = WDOG_TIMEOUT_NONE; 940 + ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); 990 941 } 991 942 } 992 943 ··· 1025 968 return; 1026 969 } 1027 970 1028 - #ifdef HAVE_NMI_HANDLER 971 + #ifdef HAVE_DIE_NMI 1029 972 static int 1030 - ipmi_nmi(void *dev_id, int cpu, int handled) 973 + ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) 1031 974 { 975 + struct die_args *args = data; 976 + 977 + if (val != DIE_NMI) 978 + return NOTIFY_OK; 979 + 980 + /* Hack, if it's a memory or I/O error, ignore it. */ 981 + if (args->err & 0xc0) 982 + return NOTIFY_OK; 983 + 984 + /* 985 + * If we get here, it's an NMI that's not a memory or I/O 986 + * error. We can't truly tell if it's from IPMI or not 987 + * without sending a message, and sending a message is almost 988 + * impossible because of locking. 989 + */ 990 + 991 + if (testing_nmi) { 992 + testing_nmi = 2; 993 + return NOTIFY_STOP; 994 + } 995 + 1032 996 /* If we are not expecting a timeout, ignore it. */ 1033 997 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) 1034 - return NOTIFY_DONE; 998 + return NOTIFY_OK; 999 + 1000 + if (preaction_val != WDOG_PRETIMEOUT_NMI) 1001 + return NOTIFY_OK; 1035 1002 1036 1003 /* If no one else handled the NMI, we assume it was the IPMI 1037 1004 watchdog. */ 1038 - if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) { 1005 + if (preop_val == WDOG_PREOP_PANIC) { 1039 1006 /* On some machines, the heartbeat will give 1040 1007 an error and not work unless we re-enable 1041 1008 the timer. So do so. */ ··· 1068 987 panic(PFX "pre-timeout"); 1069 988 } 1070 989 1071 - return NOTIFY_DONE; 990 + return NOTIFY_STOP; 1072 991 } 1073 992 1074 - static struct nmi_handler ipmi_nmi_handler = 1075 - { 1076 - .link = LIST_HEAD_INIT(ipmi_nmi_handler.link), 1077 - .dev_name = "ipmi_watchdog", 1078 - .dev_id = NULL, 1079 - .handler = ipmi_nmi, 1080 - .priority = 0, /* Call us last. */ 993 + static struct notifier_block ipmi_nmi_handler = { 994 + .notifier_call = ipmi_nmi 1081 995 }; 1082 - int nmi_handler_registered; 1083 996 #endif 1084 997 1085 998 static int wdog_reboot_handler(struct notifier_block *this, ··· 1190 1115 preaction_val = WDOG_PRETIMEOUT_NONE; 1191 1116 else if (strcmp(inval, "pre_smi") == 0) 1192 1117 preaction_val = WDOG_PRETIMEOUT_SMI; 1193 - #ifdef HAVE_NMI_HANDLER 1118 + #ifdef HAVE_DIE_NMI 1194 1119 else if (strcmp(inval, "pre_nmi") == 0) 1195 1120 preaction_val = WDOG_PRETIMEOUT_NMI; 1196 1121 #endif ··· 1224 1149 1225 1150 static void check_parms(void) 1226 1151 { 1227 - #ifdef HAVE_NMI_HANDLER 1152 + #ifdef HAVE_DIE_NMI 1228 1153 int do_nmi = 0; 1229 1154 int rv; 1230 1155 ··· 1237 1162 preop_op("preop_none", NULL); 1238 1163 do_nmi = 0; 1239 1164 } 1240 - #ifdef CONFIG_X86_LOCAL_APIC 1241 - if (nmi_watchdog == NMI_IO_APIC) { 1242 - printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC" 1243 - " mode (value is %d), that is incompatible" 1244 - " with using NMI in the IPMI watchdog." 1245 - " Disabling IPMI nmi pretimeout.\n", 1246 - nmi_watchdog); 1247 - preaction_val = WDOG_PRETIMEOUT_NONE; 1248 - do_nmi = 0; 1249 - } 1250 - #endif 1251 1165 } 1252 1166 if (do_nmi && !nmi_handler_registered) { 1253 - rv = request_nmi(&ipmi_nmi_handler); 1167 + rv = register_die_notifier(&ipmi_nmi_handler); 1254 1168 if (rv) { 1255 1169 printk(KERN_WARNING PFX 1256 1170 "Can't register nmi handler\n"); ··· 1247 1183 } else 1248 1184 nmi_handler_registered = 1; 1249 1185 } else if (!do_nmi && nmi_handler_registered) { 1250 - release_nmi(&ipmi_nmi_handler); 1186 + unregister_die_notifier(&ipmi_nmi_handler); 1251 1187 nmi_handler_registered = 0; 1252 1188 } 1253 1189 #endif ··· 1283 1219 1284 1220 rv = ipmi_smi_watcher_register(&smi_watcher); 1285 1221 if (rv) { 1286 - #ifdef HAVE_NMI_HANDLER 1287 - if (preaction_val == WDOG_PRETIMEOUT_NMI) 1288 - release_nmi(&ipmi_nmi_handler); 1222 + #ifdef HAVE_DIE_NMI 1223 + if (nmi_handler_registered) 1224 + unregister_die_notifier(&ipmi_nmi_handler); 1289 1225 #endif 1290 1226 atomic_notifier_chain_unregister(&panic_notifier_list, 1291 1227 &wdog_panic_notifier); ··· 1304 1240 ipmi_smi_watcher_unregister(&smi_watcher); 1305 1241 ipmi_unregister_watchdog(watchdog_ifnum); 1306 1242 1307 - #ifdef HAVE_NMI_HANDLER 1243 + #ifdef HAVE_DIE_NMI 1308 1244 if (nmi_handler_registered) 1309 - release_nmi(&ipmi_nmi_handler); 1245 + unregister_die_notifier(&ipmi_nmi_handler); 1310 1246 #endif 1311 1247 1312 1248 atomic_notifier_chain_unregister(&panic_notifier_list,