Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] IPMI: Fix BT long busy

The IPMI BT subdriver has been patched to survive "long busy" timeouts seen
during firmware upgrades and resets. The patch never returns the HOSED state,
synthesizes response messages with meaningful completion codes, and recovers
gracefully when the hardware finishes the long busy. The subdriver now issues
a "Get BT Capabilities" command and properly uses those results. More
informative completion codes are returned on error from transaction starts;
this logic was propogated to the KCS and SMIC subdrivers. Finally, indent and
other style quirks were normalized.

Signed-off-by: Rocky Craig <rocky.craig@hp.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Corey Minyard and committed by
Linus Torvalds
4d7cbac7 168b35a7

+419 -262
+389 -244
drivers/char/ipmi/ipmi_bt_sm.c
··· 33 33 #include <linux/ipmi_msgdefs.h> /* for completion codes */ 34 34 #include "ipmi_si_sm.h" 35 35 36 - static int bt_debug = 0x00; /* Production value 0, see following flags */ 36 + #define BT_DEBUG_OFF 0 /* Used in production */ 37 + #define BT_DEBUG_ENABLE 1 /* Generic messages */ 38 + #define BT_DEBUG_MSG 2 /* Prints all request/response buffers */ 39 + #define BT_DEBUG_STATES 4 /* Verbose look at state changes */ 37 40 38 - #define BT_DEBUG_ENABLE 1 39 - #define BT_DEBUG_MSG 2 40 - #define BT_DEBUG_STATES 4 41 + static int bt_debug = BT_DEBUG_OFF; 42 + 41 43 module_param(bt_debug, int, 0644); 42 44 MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); 43 45 ··· 49 47 Since the Open IPMI architecture is single-message oriented at this 50 48 stage, the queue depth of BT is of no concern. */ 51 49 52 - #define BT_NORMAL_TIMEOUT 5000000 /* seconds in microseconds */ 53 - #define BT_RETRY_LIMIT 2 54 - #define BT_RESET_DELAY 6000000 /* 6 seconds after warm reset */ 50 + #define BT_NORMAL_TIMEOUT 5 /* seconds */ 51 + #define BT_NORMAL_RETRY_LIMIT 2 52 + #define BT_RESET_DELAY 6 /* seconds after warm reset */ 53 + 54 + /* States are written in chronological order and usually cover 55 + multiple rows of the state table discussion in the IPMI spec. */ 55 56 56 57 enum bt_states { 57 - BT_STATE_IDLE, 58 + BT_STATE_IDLE = 0, /* Order is critical in this list */ 58 59 BT_STATE_XACTION_START, 59 60 BT_STATE_WRITE_BYTES, 60 - BT_STATE_WRITE_END, 61 61 BT_STATE_WRITE_CONSUME, 62 - BT_STATE_B2H_WAIT, 63 - BT_STATE_READ_END, 64 - BT_STATE_RESET1, /* These must come last */ 62 + BT_STATE_READ_WAIT, 63 + BT_STATE_CLEAR_B2H, 64 + BT_STATE_READ_BYTES, 65 + BT_STATE_RESET1, /* These must come last */ 65 66 BT_STATE_RESET2, 66 67 BT_STATE_RESET3, 67 68 BT_STATE_RESTART, 68 - BT_STATE_HOSED 69 + BT_STATE_PRINTME, 70 + BT_STATE_CAPABILITIES_BEGIN, 71 + BT_STATE_CAPABILITIES_END, 72 + BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */ 69 73 }; 74 + 75 + /* Macros seen at the end of state "case" blocks. They help with legibility 76 + and debugging. */ 77 + 78 + #define BT_STATE_CHANGE(X,Y) { bt->state = X; return Y; } 79 + 80 + #define BT_SI_SM_RETURN(Y) { last_printed = BT_STATE_PRINTME; return Y; } 70 81 71 82 struct si_sm_data { 72 83 enum bt_states state; 73 - enum bt_states last_state; /* assist printing and resets */ 74 84 unsigned char seq; /* BT sequence number */ 75 85 struct si_sm_io *io; 76 - unsigned char write_data[IPMI_MAX_MSG_LENGTH]; 77 - int write_count; 78 - unsigned char read_data[IPMI_MAX_MSG_LENGTH]; 79 - int read_count; 80 - int truncated; 81 - long timeout; 82 - unsigned int error_retries; /* end of "common" fields */ 86 + unsigned char write_data[IPMI_MAX_MSG_LENGTH]; 87 + int write_count; 88 + unsigned char read_data[IPMI_MAX_MSG_LENGTH]; 89 + int read_count; 90 + int truncated; 91 + long timeout; /* microseconds countdown */ 92 + int error_retries; /* end of "common" fields */ 83 93 int nonzero_status; /* hung BMCs stay all 0 */ 94 + enum bt_states complete; /* to divert the state machine */ 95 + int BT_CAP_outreqs; 96 + long BT_CAP_req2rsp; 97 + int BT_CAP_retries; /* Recommended retries */ 84 98 }; 85 99 86 100 #define BT_CLR_WR_PTR 0x01 /* See IPMI 1.5 table 11.6.4 */ ··· 129 111 static char *state2txt(unsigned char state) 130 112 { 131 113 switch (state) { 132 - case BT_STATE_IDLE: return("IDLE"); 133 - case BT_STATE_XACTION_START: return("XACTION"); 134 - case BT_STATE_WRITE_BYTES: return("WR_BYTES"); 135 - case BT_STATE_WRITE_END: return("WR_END"); 136 - case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); 137 - case BT_STATE_B2H_WAIT: return("B2H_WAIT"); 138 - case BT_STATE_READ_END: return("RD_END"); 139 - case BT_STATE_RESET1: return("RESET1"); 140 - case BT_STATE_RESET2: return("RESET2"); 141 - case BT_STATE_RESET3: return("RESET3"); 142 - case BT_STATE_RESTART: return("RESTART"); 143 - case BT_STATE_HOSED: return("HOSED"); 114 + case BT_STATE_IDLE: return("IDLE"); 115 + case BT_STATE_XACTION_START: return("XACTION"); 116 + case BT_STATE_WRITE_BYTES: return("WR_BYTES"); 117 + case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); 118 + case BT_STATE_READ_WAIT: return("RD_WAIT"); 119 + case BT_STATE_CLEAR_B2H: return("CLEAR_B2H"); 120 + case BT_STATE_READ_BYTES: return("RD_BYTES"); 121 + case BT_STATE_RESET1: return("RESET1"); 122 + case BT_STATE_RESET2: return("RESET2"); 123 + case BT_STATE_RESET3: return("RESET3"); 124 + case BT_STATE_RESTART: return("RESTART"); 125 + case BT_STATE_LONG_BUSY: return("LONG_BUSY"); 126 + case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN"); 127 + case BT_STATE_CAPABILITIES_END: return("CAP_END"); 144 128 } 145 129 return("BAD STATE"); 146 130 } 147 131 #define STATE2TXT state2txt(bt->state) 148 132 149 - static char *status2txt(unsigned char status, char *buf) 133 + static char *status2txt(unsigned char status) 150 134 { 135 + /* 136 + * This cannot be called by two threads at the same time and 137 + * the buffer is always consumed immediately, so the static is 138 + * safe to use. 139 + */ 140 + static char buf[40]; 141 + 151 142 strcpy(buf, "[ "); 152 - if (status & BT_B_BUSY) strcat(buf, "B_BUSY "); 153 - if (status & BT_H_BUSY) strcat(buf, "H_BUSY "); 154 - if (status & BT_OEM0) strcat(buf, "OEM0 "); 155 - if (status & BT_SMS_ATN) strcat(buf, "SMS "); 156 - if (status & BT_B2H_ATN) strcat(buf, "B2H "); 157 - if (status & BT_H2B_ATN) strcat(buf, "H2B "); 143 + if (status & BT_B_BUSY) 144 + strcat(buf, "B_BUSY "); 145 + if (status & BT_H_BUSY) 146 + strcat(buf, "H_BUSY "); 147 + if (status & BT_OEM0) 148 + strcat(buf, "OEM0 "); 149 + if (status & BT_SMS_ATN) 150 + strcat(buf, "SMS "); 151 + if (status & BT_B2H_ATN) 152 + strcat(buf, "B2H "); 153 + if (status & BT_H2B_ATN) 154 + strcat(buf, "H2B "); 158 155 strcat(buf, "]"); 159 156 return buf; 160 157 } 161 - #define STATUS2TXT(buf) status2txt(status, buf) 158 + #define STATUS2TXT status2txt(status) 162 159 163 - /* This will be called from within this module on a hosed condition */ 164 - #define FIRST_SEQ 0 160 + /* called externally at insmod time, and internally on cleanup */ 161 + 165 162 static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) 166 163 { 167 - bt->state = BT_STATE_IDLE; 168 - bt->last_state = BT_STATE_IDLE; 169 - bt->seq = FIRST_SEQ; 170 - bt->io = io; 171 - bt->write_count = 0; 172 - bt->read_count = 0; 173 - bt->error_retries = 0; 174 - bt->nonzero_status = 0; 175 - bt->truncated = 0; 176 - bt->timeout = BT_NORMAL_TIMEOUT; 164 + memset(bt, 0, sizeof(struct si_sm_data)); 165 + if (bt->io != io) { /* external: one-time only things */ 166 + bt->io = io; 167 + bt->seq = 0; 168 + } 169 + bt->state = BT_STATE_IDLE; /* start here */ 170 + bt->complete = BT_STATE_IDLE; /* end here */ 171 + bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * 1000000; 172 + bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT; 173 + /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */ 177 174 return 3; /* We claim 3 bytes of space; ought to check SPMI table */ 178 175 } 176 + 177 + /* Jam a completion code (probably an error) into a response */ 178 + 179 + static void force_result(struct si_sm_data *bt, unsigned char completion_code) 180 + { 181 + bt->read_data[0] = 4; /* # following bytes */ 182 + bt->read_data[1] = bt->write_data[1] | 4; /* Odd NetFn/LUN */ 183 + bt->read_data[2] = bt->write_data[2]; /* seq (ignored) */ 184 + bt->read_data[3] = bt->write_data[3]; /* Command */ 185 + bt->read_data[4] = completion_code; 186 + bt->read_count = 5; 187 + } 188 + 189 + /* The upper state machine starts here */ 179 190 180 191 static int bt_start_transaction(struct si_sm_data *bt, 181 192 unsigned char *data, ··· 212 165 { 213 166 unsigned int i; 214 167 215 - if ((size < 2) || (size > (IPMI_MAX_MSG_LENGTH - 2))) 216 - return -1; 168 + if (size < 2) 169 + return IPMI_REQ_LEN_INVALID_ERR; 170 + if (size > IPMI_MAX_MSG_LENGTH) 171 + return IPMI_REQ_LEN_EXCEEDED_ERR; 217 172 218 - if ((bt->state != BT_STATE_IDLE) && (bt->state != BT_STATE_HOSED)) 219 - return -2; 173 + if (bt->state == BT_STATE_LONG_BUSY) 174 + return IPMI_NODE_BUSY_ERR; 175 + 176 + if (bt->state != BT_STATE_IDLE) 177 + return IPMI_NOT_IN_MY_STATE_ERR; 220 178 221 179 if (bt_debug & BT_DEBUG_MSG) { 222 - printk(KERN_WARNING "+++++++++++++++++++++++++++++++++++++\n"); 223 - printk(KERN_WARNING "BT: write seq=0x%02X:", bt->seq); 180 + printk(KERN_WARNING "BT: +++++++++++++++++ New command\n"); 181 + printk(KERN_WARNING "BT: NetFn/LUN CMD [%d data]:", size - 2); 224 182 for (i = 0; i < size; i ++) 225 - printk (" %02x", data[i]); 183 + printk (" %02x", data[i]); 226 184 printk("\n"); 227 185 } 228 186 bt->write_data[0] = size + 1; /* all data plus seq byte */ 229 187 bt->write_data[1] = *data; /* NetFn/LUN */ 230 - bt->write_data[2] = bt->seq; 188 + bt->write_data[2] = bt->seq++; 231 189 memcpy(bt->write_data + 3, data + 1, size - 1); 232 190 bt->write_count = size + 2; 233 - 234 191 bt->error_retries = 0; 235 192 bt->nonzero_status = 0; 236 - bt->read_count = 0; 237 193 bt->truncated = 0; 238 194 bt->state = BT_STATE_XACTION_START; 239 - bt->last_state = BT_STATE_IDLE; 240 - bt->timeout = BT_NORMAL_TIMEOUT; 195 + bt->timeout = bt->BT_CAP_req2rsp; 196 + force_result(bt, IPMI_ERR_UNSPECIFIED); 241 197 return 0; 242 198 } 243 199 ··· 248 198 it calls this. Strip out the length and seq bytes. */ 249 199 250 200 static int bt_get_result(struct si_sm_data *bt, 251 - unsigned char *data, 252 - unsigned int length) 201 + unsigned char *data, 202 + unsigned int length) 253 203 { 254 204 int i, msg_len; 255 205 256 206 msg_len = bt->read_count - 2; /* account for length & seq */ 257 - /* Always NetFn, Cmd, cCode */ 258 207 if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) { 259 - printk(KERN_DEBUG "BT results: bad msg_len = %d\n", msg_len); 260 - data[0] = bt->write_data[1] | 0x4; /* Kludge a response */ 261 - data[1] = bt->write_data[3]; 262 - data[2] = IPMI_ERR_UNSPECIFIED; 208 + force_result(bt, IPMI_ERR_UNSPECIFIED); 263 209 msg_len = 3; 264 - } else { 265 - data[0] = bt->read_data[1]; 266 - data[1] = bt->read_data[3]; 267 - if (length < msg_len) 268 - bt->truncated = 1; 269 - if (bt->truncated) { /* can be set in read_all_bytes() */ 270 - data[2] = IPMI_ERR_MSG_TRUNCATED; 271 - msg_len = 3; 272 - } else 273 - memcpy(data + 2, bt->read_data + 4, msg_len - 2); 274 - 275 - if (bt_debug & BT_DEBUG_MSG) { 276 - printk (KERN_WARNING "BT: res (raw)"); 277 - for (i = 0; i < msg_len; i++) 278 - printk(" %02x", data[i]); 279 - printk ("\n"); 280 - } 281 210 } 282 - bt->read_count = 0; /* paranoia */ 211 + data[0] = bt->read_data[1]; 212 + data[1] = bt->read_data[3]; 213 + if (length < msg_len || bt->truncated) { 214 + data[2] = IPMI_ERR_MSG_TRUNCATED; 215 + msg_len = 3; 216 + } else 217 + memcpy(data + 2, bt->read_data + 4, msg_len - 2); 218 + 219 + if (bt_debug & BT_DEBUG_MSG) { 220 + printk (KERN_WARNING "BT: result %d bytes:", msg_len); 221 + for (i = 0; i < msg_len; i++) 222 + printk(" %02x", data[i]); 223 + printk ("\n"); 224 + } 283 225 return msg_len; 284 226 } 285 227 ··· 280 238 281 239 static void reset_flags(struct si_sm_data *bt) 282 240 { 241 + if (bt_debug) 242 + printk(KERN_WARNING "IPMI BT: flag reset %s\n", 243 + status2txt(BT_STATUS)); 283 244 if (BT_STATUS & BT_H_BUSY) 284 - BT_CONTROL(BT_H_BUSY); 285 - if (BT_STATUS & BT_B_BUSY) 286 - BT_CONTROL(BT_B_BUSY); 287 - BT_CONTROL(BT_CLR_WR_PTR); 288 - BT_CONTROL(BT_SMS_ATN); 245 + BT_CONTROL(BT_H_BUSY); /* force clear */ 246 + BT_CONTROL(BT_CLR_WR_PTR); /* always reset */ 247 + BT_CONTROL(BT_SMS_ATN); /* always clear */ 248 + BT_INTMASK_W(BT_BMC_HWRST); 249 + } 289 250 290 - if (BT_STATUS & BT_B2H_ATN) { 291 - int i; 292 - BT_CONTROL(BT_H_BUSY); 293 - BT_CONTROL(BT_B2H_ATN); 294 - BT_CONTROL(BT_CLR_RD_PTR); 295 - for (i = 0; i < IPMI_MAX_MSG_LENGTH + 2; i++) 296 - BMC2HOST; 297 - BT_CONTROL(BT_H_BUSY); 298 - } 251 + /* Get rid of an unwanted/stale response. This should only be needed for 252 + BMCs that support multiple outstanding requests. */ 253 + 254 + static void drain_BMC2HOST(struct si_sm_data *bt) 255 + { 256 + int i, size; 257 + 258 + if (!(BT_STATUS & BT_B2H_ATN)) /* Not signalling a response */ 259 + return; 260 + 261 + BT_CONTROL(BT_H_BUSY); /* now set */ 262 + BT_CONTROL(BT_B2H_ATN); /* always clear */ 263 + BT_STATUS; /* pause */ 264 + BT_CONTROL(BT_B2H_ATN); /* some BMCs are stubborn */ 265 + BT_CONTROL(BT_CLR_RD_PTR); /* always reset */ 266 + if (bt_debug) 267 + printk(KERN_WARNING "IPMI BT: stale response %s; ", 268 + status2txt(BT_STATUS)); 269 + size = BMC2HOST; 270 + for (i = 0; i < size ; i++) 271 + BMC2HOST; 272 + BT_CONTROL(BT_H_BUSY); /* now clear */ 273 + if (bt_debug) 274 + printk("drained %d bytes\n", size + 1); 299 275 } 300 276 301 277 static inline void write_all_bytes(struct si_sm_data *bt) ··· 321 261 int i; 322 262 323 263 if (bt_debug & BT_DEBUG_MSG) { 324 - printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", 264 + printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", 325 265 bt->write_count, bt->seq); 326 266 for (i = 0; i < bt->write_count; i++) 327 267 printk (" %02x", bt->write_data[i]); 328 268 printk ("\n"); 329 269 } 330 270 for (i = 0; i < bt->write_count; i++) 331 - HOST2BMC(bt->write_data[i]); 271 + HOST2BMC(bt->write_data[i]); 332 272 } 333 273 334 274 static inline int read_all_bytes(struct si_sm_data *bt) 335 275 { 336 276 unsigned char i; 337 277 278 + /* length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode. 279 + Keep layout of first four bytes aligned with write_data[] */ 280 + 338 281 bt->read_data[0] = BMC2HOST; 339 282 bt->read_count = bt->read_data[0]; 340 - if (bt_debug & BT_DEBUG_MSG) 341 - printk(KERN_WARNING "BT: read %d bytes:", bt->read_count); 342 283 343 - /* minimum: length, NetFn, Seq, Cmd, cCode == 5 total, or 4 more 344 - following the length byte. */ 345 284 if (bt->read_count < 4 || bt->read_count >= IPMI_MAX_MSG_LENGTH) { 346 285 if (bt_debug & BT_DEBUG_MSG) 347 - printk("bad length %d\n", bt->read_count); 286 + printk(KERN_WARNING "BT: bad raw rsp len=%d\n", 287 + bt->read_count); 348 288 bt->truncated = 1; 349 289 return 1; /* let next XACTION START clean it up */ 350 290 } 351 291 for (i = 1; i <= bt->read_count; i++) 352 - bt->read_data[i] = BMC2HOST; 353 - bt->read_count++; /* account for the length byte */ 292 + bt->read_data[i] = BMC2HOST; 293 + bt->read_count++; /* Account internally for length byte */ 354 294 355 295 if (bt_debug & BT_DEBUG_MSG) { 356 - for (i = 0; i < bt->read_count; i++) 357 - printk (" %02x", bt->read_data[i]); 358 - printk ("\n"); 359 - } 360 - if (bt->seq != bt->write_data[2]) /* idiot check */ 361 - printk(KERN_DEBUG "BT: internal error: sequence mismatch\n"); 296 + int max = bt->read_count; 362 297 363 - /* per the spec, the (NetFn, Seq, Cmd) tuples should match */ 364 - if ((bt->read_data[3] == bt->write_data[3]) && /* Cmd */ 365 - (bt->read_data[2] == bt->write_data[2]) && /* Sequence */ 366 - ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) 298 + printk(KERN_WARNING "BT: got %d bytes seq=0x%02X", 299 + max, bt->read_data[2]); 300 + if (max > 16) 301 + max = 16; 302 + for (i = 0; i < max; i++) 303 + printk (" %02x", bt->read_data[i]); 304 + printk ("%s\n", bt->read_count == max ? "" : " ..."); 305 + } 306 + 307 + /* per the spec, the (NetFn[1], Seq[2], Cmd[3]) tuples must match */ 308 + if ((bt->read_data[3] == bt->write_data[3]) && 309 + (bt->read_data[2] == bt->write_data[2]) && 310 + ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) 367 311 return 1; 368 312 369 313 if (bt_debug & BT_DEBUG_MSG) 370 - printk(KERN_WARNING "BT: bad packet: " 314 + printk(KERN_WARNING "IPMI BT: bad packet: " 371 315 "want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n", 372 - bt->write_data[1], bt->write_data[2], bt->write_data[3], 316 + bt->write_data[1] | 0x04, bt->write_data[2], bt->write_data[3], 373 317 bt->read_data[1], bt->read_data[2], bt->read_data[3]); 374 318 return 0; 375 319 } 376 320 377 - /* Modifies bt->state appropriately, need to get into the bt_event() switch */ 321 + /* Restart if retries are left, or return an error completion code */ 378 322 379 - static void error_recovery(struct si_sm_data *bt, char *reason) 323 + static enum si_sm_result error_recovery(struct si_sm_data *bt, 324 + unsigned char status, 325 + unsigned char cCode) 380 326 { 381 - unsigned char status; 382 - char buf[40]; /* For getting status */ 327 + char *reason; 383 328 384 - bt->timeout = BT_NORMAL_TIMEOUT; /* various places want to retry */ 329 + bt->timeout = bt->BT_CAP_req2rsp; 385 330 386 - status = BT_STATUS; 387 - printk(KERN_DEBUG "BT: %s in %s %s\n", reason, STATE2TXT, 388 - STATUS2TXT(buf)); 331 + switch (cCode) { 332 + case IPMI_TIMEOUT_ERR: 333 + reason = "timeout"; 334 + break; 335 + default: 336 + reason = "internal error"; 337 + break; 338 + } 389 339 340 + printk(KERN_WARNING "IPMI BT: %s in %s %s ", /* open-ended line */ 341 + reason, STATE2TXT, STATUS2TXT); 342 + 343 + /* Per the IPMI spec, retries are based on the sequence number 344 + known only to this module, so manage a restart here. */ 390 345 (bt->error_retries)++; 391 - if (bt->error_retries > BT_RETRY_LIMIT) { 392 - printk(KERN_DEBUG "retry limit (%d) exceeded\n", BT_RETRY_LIMIT); 393 - bt->state = BT_STATE_HOSED; 394 - if (!bt->nonzero_status) 395 - printk(KERN_ERR "IPMI: BT stuck, try power cycle\n"); 396 - else if (bt->error_retries <= BT_RETRY_LIMIT + 1) { 397 - printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n"); 398 - bt->state = BT_STATE_RESET1; 346 + if (bt->error_retries < bt->BT_CAP_retries) { 347 + printk("%d retries left\n", 348 + bt->BT_CAP_retries - bt->error_retries); 349 + bt->state = BT_STATE_RESTART; 350 + return SI_SM_CALL_WITHOUT_DELAY; 351 + } 352 + 353 + printk("failed %d retries, sending error response\n", 354 + bt->BT_CAP_retries); 355 + if (!bt->nonzero_status) 356 + printk(KERN_ERR "IPMI BT: stuck, try power cycle\n"); 357 + 358 + /* this is most likely during insmod */ 359 + else if (bt->seq <= (unsigned char)(bt->BT_CAP_retries & 0xFF)) { 360 + printk(KERN_WARNING "IPMI: BT reset (takes 5 secs)\n"); 361 + bt->state = BT_STATE_RESET1; 362 + return SI_SM_CALL_WITHOUT_DELAY; 363 + } 364 + 365 + /* Concoct a useful error message, set up the next state, and 366 + be done with this sequence. */ 367 + 368 + bt->state = BT_STATE_IDLE; 369 + switch (cCode) { 370 + case IPMI_TIMEOUT_ERR: 371 + if (status & BT_B_BUSY) { 372 + cCode = IPMI_NODE_BUSY_ERR; 373 + bt->state = BT_STATE_LONG_BUSY; 399 374 } 400 - return; 375 + break; 376 + default: 377 + break; 401 378 } 402 - 403 - /* Sometimes the BMC queues get in an "off-by-one" state...*/ 404 - if ((bt->state == BT_STATE_B2H_WAIT) && (status & BT_B2H_ATN)) { 405 - printk(KERN_DEBUG "retry B2H_WAIT\n"); 406 - return; 407 - } 408 - 409 - printk(KERN_DEBUG "restart command\n"); 410 - bt->state = BT_STATE_RESTART; 379 + force_result(bt, cCode); 380 + return SI_SM_TRANSACTION_COMPLETE; 411 381 } 412 382 413 - /* Check the status and (possibly) advance the BT state machine. The 414 - default return is SI_SM_CALL_WITH_DELAY. */ 383 + /* Check status and (usually) take action and change this state machine. */ 415 384 416 385 static enum si_sm_result bt_event(struct si_sm_data *bt, long time) 417 386 { 418 - unsigned char status; 419 - char buf[40]; /* For getting status */ 387 + unsigned char status, BT_CAP[8]; 388 + static enum bt_states last_printed = BT_STATE_PRINTME; 420 389 int i; 421 390 422 391 status = BT_STATUS; 423 392 bt->nonzero_status |= status; 424 - 425 - if ((bt_debug & BT_DEBUG_STATES) && (bt->state != bt->last_state)) 393 + if ((bt_debug & BT_DEBUG_STATES) && (bt->state != last_printed)) { 426 394 printk(KERN_WARNING "BT: %s %s TO=%ld - %ld \n", 427 395 STATE2TXT, 428 - STATUS2TXT(buf), 396 + STATUS2TXT, 429 397 bt->timeout, 430 398 time); 431 - bt->last_state = bt->state; 399 + last_printed = bt->state; 400 + } 432 401 433 - if (bt->state == BT_STATE_HOSED) 434 - return SI_SM_HOSED; 402 + /* Commands that time out may still (eventually) provide a response. 403 + This stale response will get in the way of a new response so remove 404 + it if possible (hopefully during IDLE). Even if it comes up later 405 + it will be rejected by its (now-forgotten) seq number. */ 435 406 436 - if (bt->state != BT_STATE_IDLE) { /* do timeout test */ 407 + if ((bt->state < BT_STATE_WRITE_BYTES) && (status & BT_B2H_ATN)) { 408 + drain_BMC2HOST(bt); 409 + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); 410 + } 411 + 412 + if ((bt->state != BT_STATE_IDLE) && 413 + (bt->state < BT_STATE_PRINTME)) { /* check timeout */ 437 414 bt->timeout -= time; 438 - if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) { 439 - error_recovery(bt, "timed out"); 440 - return SI_SM_CALL_WITHOUT_DELAY; 441 - } 415 + if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) 416 + return error_recovery(bt, 417 + status, 418 + IPMI_TIMEOUT_ERR); 442 419 } 443 420 444 421 switch (bt->state) { 445 422 446 - case BT_STATE_IDLE: /* check for asynchronous messages */ 423 + /* Idle state first checks for asynchronous messages from another 424 + channel, then does some opportunistic housekeeping. */ 425 + 426 + case BT_STATE_IDLE: 447 427 if (status & BT_SMS_ATN) { 448 428 BT_CONTROL(BT_SMS_ATN); /* clear it */ 449 429 return SI_SM_ATTN; 450 430 } 451 - return SI_SM_IDLE; 431 + 432 + if (status & BT_H_BUSY) /* clear a leftover H_BUSY */ 433 + BT_CONTROL(BT_H_BUSY); 434 + 435 + /* Read BT capabilities if it hasn't been done yet */ 436 + if (!bt->BT_CAP_outreqs) 437 + BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN, 438 + SI_SM_CALL_WITHOUT_DELAY); 439 + bt->timeout = bt->BT_CAP_req2rsp; 440 + BT_SI_SM_RETURN(SI_SM_IDLE); 452 441 453 442 case BT_STATE_XACTION_START: 454 - if (status & BT_H_BUSY) { 455 - BT_CONTROL(BT_H_BUSY); 456 - break; 457 - } 458 - if (status & BT_B2H_ATN) 459 - break; 460 - bt->state = BT_STATE_WRITE_BYTES; 461 - return SI_SM_CALL_WITHOUT_DELAY; /* for logging */ 443 + if (status & (BT_B_BUSY | BT_H2B_ATN)) 444 + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); 445 + if (BT_STATUS & BT_H_BUSY) 446 + BT_CONTROL(BT_H_BUSY); /* force clear */ 447 + BT_STATE_CHANGE(BT_STATE_WRITE_BYTES, 448 + SI_SM_CALL_WITHOUT_DELAY); 462 449 463 450 case BT_STATE_WRITE_BYTES: 464 - if (status & (BT_B_BUSY | BT_H2B_ATN)) 465 - break; 451 + if (status & BT_H_BUSY) 452 + BT_CONTROL(BT_H_BUSY); /* clear */ 466 453 BT_CONTROL(BT_CLR_WR_PTR); 467 454 write_all_bytes(bt); 468 - BT_CONTROL(BT_H2B_ATN); /* clears too fast to catch? */ 469 - bt->state = BT_STATE_WRITE_CONSUME; 470 - return SI_SM_CALL_WITHOUT_DELAY; /* it MIGHT sail through */ 455 + BT_CONTROL(BT_H2B_ATN); /* can clear too fast to catch */ 456 + BT_STATE_CHANGE(BT_STATE_WRITE_CONSUME, 457 + SI_SM_CALL_WITHOUT_DELAY); 471 458 472 - case BT_STATE_WRITE_CONSUME: /* BMCs usually blow right thru here */ 473 - if (status & (BT_H2B_ATN | BT_B_BUSY)) 474 - break; 475 - bt->state = BT_STATE_B2H_WAIT; 476 - /* fall through with status */ 459 + case BT_STATE_WRITE_CONSUME: 460 + if (status & (BT_B_BUSY | BT_H2B_ATN)) 461 + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); 462 + BT_STATE_CHANGE(BT_STATE_READ_WAIT, 463 + SI_SM_CALL_WITHOUT_DELAY); 477 464 478 - /* Stay in BT_STATE_B2H_WAIT until a packet matches. However, spinning 479 - hard here, constantly reading status, seems to hold off the 480 - generation of B2H_ATN so ALWAYS return CALL_WITH_DELAY. */ 465 + /* Spinning hard can suppress B2H_ATN and force a timeout */ 481 466 482 - case BT_STATE_B2H_WAIT: 483 - if (!(status & BT_B2H_ATN)) 484 - break; 467 + case BT_STATE_READ_WAIT: 468 + if (!(status & BT_B2H_ATN)) 469 + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); 470 + BT_CONTROL(BT_H_BUSY); /* set */ 485 471 486 - /* Assume ordered, uncached writes: no need to wait */ 487 - if (!(status & BT_H_BUSY)) 488 - BT_CONTROL(BT_H_BUSY); /* set */ 489 - BT_CONTROL(BT_B2H_ATN); /* clear it, ACK to the BMC */ 490 - BT_CONTROL(BT_CLR_RD_PTR); /* reset the queue */ 491 - i = read_all_bytes(bt); 492 - BT_CONTROL(BT_H_BUSY); /* clear */ 493 - if (!i) /* Try this state again */ 494 - break; 495 - bt->state = BT_STATE_READ_END; 496 - return SI_SM_CALL_WITHOUT_DELAY; /* for logging */ 472 + /* Uncached, ordered writes should just proceeed serially but 473 + some BMCs don't clear B2H_ATN with one hit. Fast-path a 474 + workaround without too much penalty to the general case. */ 497 475 498 - case BT_STATE_READ_END: 476 + BT_CONTROL(BT_B2H_ATN); /* clear it to ACK the BMC */ 477 + BT_STATE_CHANGE(BT_STATE_CLEAR_B2H, 478 + SI_SM_CALL_WITHOUT_DELAY); 499 479 500 - /* I could wait on BT_H_BUSY to go clear for a truly clean 501 - exit. However, this is already done in XACTION_START 502 - and the (possible) extra loop/status/possible wait affects 503 - performance. So, as long as it works, just ignore H_BUSY */ 480 + case BT_STATE_CLEAR_B2H: 481 + if (status & BT_B2H_ATN) { /* keep hitting it */ 482 + BT_CONTROL(BT_B2H_ATN); 483 + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); 484 + } 485 + BT_STATE_CHANGE(BT_STATE_READ_BYTES, 486 + SI_SM_CALL_WITHOUT_DELAY); 504 487 505 - #ifdef MAKE_THIS_TRUE_IF_NECESSARY 488 + case BT_STATE_READ_BYTES: 489 + if (!(status & BT_H_BUSY)) /* check in case of retry */ 490 + BT_CONTROL(BT_H_BUSY); 491 + BT_CONTROL(BT_CLR_RD_PTR); /* start of BMC2HOST buffer */ 492 + i = read_all_bytes(bt); /* true == packet seq match */ 493 + BT_CONTROL(BT_H_BUSY); /* NOW clear */ 494 + if (!i) /* Not my message */ 495 + BT_STATE_CHANGE(BT_STATE_READ_WAIT, 496 + SI_SM_CALL_WITHOUT_DELAY); 497 + bt->state = bt->complete; 498 + return bt->state == BT_STATE_IDLE ? /* where to next? */ 499 + SI_SM_TRANSACTION_COMPLETE : /* normal */ 500 + SI_SM_CALL_WITHOUT_DELAY; /* Startup magic */ 506 501 507 - if (status & BT_H_BUSY) 508 - break; 509 - #endif 510 - bt->seq++; 511 - bt->state = BT_STATE_IDLE; 512 - return SI_SM_TRANSACTION_COMPLETE; 502 + case BT_STATE_LONG_BUSY: /* For example: after FW update */ 503 + if (!(status & BT_B_BUSY)) { 504 + reset_flags(bt); /* next state is now IDLE */ 505 + bt_init_data(bt, bt->io); 506 + } 507 + return SI_SM_CALL_WITH_DELAY; /* No repeat printing */ 513 508 514 509 case BT_STATE_RESET1: 515 - reset_flags(bt); 516 - bt->timeout = BT_RESET_DELAY; 517 - bt->state = BT_STATE_RESET2; 518 - break; 510 + reset_flags(bt); 511 + drain_BMC2HOST(bt); 512 + BT_STATE_CHANGE(BT_STATE_RESET2, 513 + SI_SM_CALL_WITH_DELAY); 519 514 520 515 case BT_STATE_RESET2: /* Send a soft reset */ 521 516 BT_CONTROL(BT_CLR_WR_PTR); ··· 579 464 HOST2BMC(42); /* Sequence number */ 580 465 HOST2BMC(3); /* Cmd == Soft reset */ 581 466 BT_CONTROL(BT_H2B_ATN); 582 - bt->state = BT_STATE_RESET3; 583 - break; 467 + bt->timeout = BT_RESET_DELAY * 1000000; 468 + BT_STATE_CHANGE(BT_STATE_RESET3, 469 + SI_SM_CALL_WITH_DELAY); 584 470 585 - case BT_STATE_RESET3: 471 + case BT_STATE_RESET3: /* Hold off everything for a bit */ 586 472 if (bt->timeout > 0) 587 - return SI_SM_CALL_WITH_DELAY; 588 - bt->state = BT_STATE_RESTART; /* printk in debug modes */ 589 - break; 473 + return SI_SM_CALL_WITH_DELAY; 474 + drain_BMC2HOST(bt); 475 + BT_STATE_CHANGE(BT_STATE_RESTART, 476 + SI_SM_CALL_WITH_DELAY); 590 477 591 - case BT_STATE_RESTART: /* don't reset retries! */ 592 - reset_flags(bt); 593 - bt->write_data[2] = ++bt->seq; 478 + case BT_STATE_RESTART: /* don't reset retries or seq! */ 594 479 bt->read_count = 0; 595 480 bt->nonzero_status = 0; 596 - bt->timeout = BT_NORMAL_TIMEOUT; 597 - bt->state = BT_STATE_XACTION_START; 598 - break; 481 + bt->timeout = bt->BT_CAP_req2rsp; 482 + BT_STATE_CHANGE(BT_STATE_XACTION_START, 483 + SI_SM_CALL_WITH_DELAY); 599 484 600 - default: /* HOSED is supposed to be caught much earlier */ 601 - error_recovery(bt, "internal logic error"); 602 - break; 603 - } 604 - return SI_SM_CALL_WITH_DELAY; 485 + /* Get BT Capabilities, using timing of upper level state machine. 486 + Set outreqs to prevent infinite loop on timeout. */ 487 + case BT_STATE_CAPABILITIES_BEGIN: 488 + bt->BT_CAP_outreqs = 1; 489 + { 490 + unsigned char GetBT_CAP[] = { 0x18, 0x36 }; 491 + bt->state = BT_STATE_IDLE; 492 + bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP)); 493 + } 494 + bt->complete = BT_STATE_CAPABILITIES_END; 495 + BT_STATE_CHANGE(BT_STATE_XACTION_START, 496 + SI_SM_CALL_WITH_DELAY); 497 + 498 + case BT_STATE_CAPABILITIES_END: 499 + i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP)); 500 + bt_init_data(bt, bt->io); 501 + if ((i == 8) && !BT_CAP[2]) { 502 + bt->BT_CAP_outreqs = BT_CAP[3]; 503 + bt->BT_CAP_req2rsp = BT_CAP[6] * 1000000; 504 + bt->BT_CAP_retries = BT_CAP[7]; 505 + } else 506 + printk(KERN_WARNING "IPMI BT: using default values\n"); 507 + if (!bt->BT_CAP_outreqs) 508 + bt->BT_CAP_outreqs = 1; 509 + printk(KERN_WARNING "IPMI BT: req2rsp=%ld secs retries=%d\n", 510 + bt->BT_CAP_req2rsp / 1000000L, bt->BT_CAP_retries); 511 + bt->timeout = bt->BT_CAP_req2rsp; 512 + return SI_SM_CALL_WITHOUT_DELAY; 513 + 514 + default: /* should never occur */ 515 + return error_recovery(bt, 516 + status, 517 + IPMI_ERR_UNSPECIFIED); 518 + } 519 + return SI_SM_CALL_WITH_DELAY; 605 520 } 606 521 607 522 static int bt_detect(struct si_sm_data *bt) ··· 642 497 test that first. The calling routine uses negative logic. */ 643 498 644 499 if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF)) 645 - return 1; 500 + return 1; 646 501 reset_flags(bt); 647 502 return 0; 648 503 } ··· 658 513 659 514 struct si_sm_handlers bt_smi_handlers = 660 515 { 661 - .init_data = bt_init_data, 662 - .start_transaction = bt_start_transaction, 663 - .get_result = bt_get_result, 664 - .event = bt_event, 665 - .detect = bt_detect, 666 - .cleanup = bt_cleanup, 667 - .size = bt_size, 516 + .init_data = bt_init_data, 517 + .start_transaction = bt_start_transaction, 518 + .get_result = bt_get_result, 519 + .event = bt_event, 520 + .detect = bt_detect, 521 + .cleanup = bt_cleanup, 522 + .size = bt_size, 668 523 };
+8 -6
drivers/char/ipmi/ipmi_kcs_sm.c
··· 261 261 { 262 262 unsigned int i; 263 263 264 - if ((size < 2) || (size > MAX_KCS_WRITE_SIZE)) { 265 - return -1; 266 - } 267 - if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) { 268 - return -2; 269 - } 264 + if (size < 2) 265 + return IPMI_REQ_LEN_INVALID_ERR; 266 + if (size > MAX_KCS_WRITE_SIZE) 267 + return IPMI_REQ_LEN_EXCEEDED_ERR; 268 + 269 + if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) 270 + return IPMI_NOT_IN_MY_STATE_ERR; 271 + 270 272 if (kcs_debug & KCS_DEBUG_MSG) { 271 273 printk(KERN_DEBUG "start_kcs_transaction -"); 272 274 for (i = 0; i < size; i ++) {
+8 -4
drivers/char/ipmi/ipmi_si_intf.c
··· 247 247 spin_lock(&(smi_info->si_lock)); 248 248 } 249 249 250 - static void return_hosed_msg(struct smi_info *smi_info) 250 + static void return_hosed_msg(struct smi_info *smi_info, int cCode) 251 251 { 252 252 struct ipmi_smi_msg *msg = smi_info->curr_msg; 253 + 254 + if (cCode < 0 || cCode > IPMI_ERR_UNSPECIFIED) 255 + cCode = IPMI_ERR_UNSPECIFIED; 256 + /* else use it as is */ 253 257 254 258 /* Make it a reponse */ 255 259 msg->rsp[0] = msg->data[0] | 4; 256 260 msg->rsp[1] = msg->data[1]; 257 - msg->rsp[2] = IPMI_ERR_UNSPECIFIED; 261 + msg->rsp[2] = cCode; 258 262 msg->rsp_size = 3; 259 263 260 264 smi_info->curr_msg = NULL; ··· 309 305 smi_info->curr_msg->data, 310 306 smi_info->curr_msg->data_size); 311 307 if (err) { 312 - return_hosed_msg(smi_info); 308 + return_hosed_msg(smi_info, err); 313 309 } 314 310 315 311 rv = SI_SM_CALL_WITHOUT_DELAY; ··· 651 647 /* If we were handling a user message, format 652 648 a response to send to the upper layer to 653 649 tell it about the error. */ 654 - return_hosed_msg(smi_info); 650 + return_hosed_msg(smi_info, IPMI_ERR_UNSPECIFIED); 655 651 } 656 652 si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0); 657 653 }
+8 -6
drivers/char/ipmi/ipmi_smic_sm.c
··· 141 141 { 142 142 unsigned int i; 143 143 144 - if ((size < 2) || (size > MAX_SMIC_WRITE_SIZE)) { 145 - return -1; 146 - } 147 - if ((smic->state != SMIC_IDLE) && (smic->state != SMIC_HOSED)) { 148 - return -2; 149 - } 144 + if (size < 2) 145 + return IPMI_REQ_LEN_INVALID_ERR; 146 + if (size > MAX_SMIC_WRITE_SIZE) 147 + return IPMI_REQ_LEN_EXCEEDED_ERR; 148 + 149 + if ((smic->state != SMIC_IDLE) && (smic->state != SMIC_HOSED)) 150 + return IPMI_NOT_IN_MY_STATE_ERR; 151 + 150 152 if (smic_debug & SMIC_DEBUG_MSG) { 151 153 printk(KERN_INFO "start_smic_transaction -"); 152 154 for (i = 0; i < size; i ++) {
+6 -2
include/linux/ipmi_msgdefs.h
··· 71 71 /* The BT interface on high-end HP systems supports up to 255 bytes in 72 72 * one transfer. Its "virtual" BMC supports some commands that are longer 73 73 * than 128 bytes. Use the full 256, plus NetFn/LUN, Cmd, cCode, plus 74 - * some overhead. It would be nice to base this on the "BT Capabilities" 75 - * but that's too hard to propagate to the rest of the driver. */ 74 + * some overhead; it's not worth the effort to dynamically size this based 75 + * on the results of the "Get BT Capabilities" command. */ 76 76 #define IPMI_MAX_MSG_LENGTH 272 /* multiple of 16 */ 77 77 78 78 #define IPMI_CC_NO_ERROR 0x00 79 79 #define IPMI_NODE_BUSY_ERR 0xc0 80 80 #define IPMI_INVALID_COMMAND_ERR 0xc1 81 + #define IPMI_TIMEOUT_ERR 0xc3 81 82 #define IPMI_ERR_MSG_TRUNCATED 0xc6 83 + #define IPMI_REQ_LEN_INVALID_ERR 0xc7 84 + #define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8 85 + #define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */ 82 86 #define IPMI_LOST_ARBITRATION_ERR 0x81 83 87 #define IPMI_BUS_ERR 0x82 84 88 #define IPMI_NAK_ON_WRITE_ERR 0x83