Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: wd719x: Fix resets and aborts

Host reset oopses because it calls wd719x_chip_init, which calls
request_firmware, under a spinlock. Stop the RISC first, then flush active
SCBs under a spinlock. Finally call wd719x_chip_init unlocked.

Also found and fixed more bugs during tests:

Affected active SCBs were not flushed during abort, bus and device
reset. This caused problems in a following host reset (hang or oops).

Device and bus reset failed under load because the result of the reset
command is WD719X_SUE_TERM or WD719X_SUE_RESET. Don't treat these codes as
error in wd719x_wait_done.

wd719x_direct_cmd for RESET/ABORT commands didn't work properly, causing
timeouts. Looks like it was caused by the WD719X_DISABLE_INT bit. Not
setting it for RESET/ABORT commands seems to fix the probem. Also lower
the log level of the corresponding "direct command completed" message to
debug.

Unfortunately, my documentation is missing some pages, including page
67 (SPIDER67.gif) about resets :(

Reported-by: Hariprasad Kelam <hariprasad.kelam@gmail.com>
Signed-off-by: Ondrej Zary <linux@zary.sk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Ondrej Zary and committed by
Martin K. Petersen
5da1faa0 fd561412

+30 -12
+30 -12
drivers/scsi/wd719x.c
··· 107 107 } 108 108 109 109 if (status != WD719X_INT_NOERRORS) { 110 + u8 sue = wd719x_readb(wd, WD719X_AMR_SCB_ERROR); 111 + /* we get this after wd719x_dev_reset, it's not an error */ 112 + if (sue == WD719X_SUE_TERM) 113 + return 0; 114 + /* we get this after wd719x_bus_reset, it's not an error */ 115 + if (sue == WD719X_SUE_RESET) 116 + return 0; 110 117 dev_err(&wd->pdev->dev, "direct command failed, status 0x%02x, SUE 0x%02x\n", 111 - status, wd719x_readb(wd, WD719X_AMR_SCB_ERROR)); 118 + status, sue); 112 119 return -EIO; 113 120 } 114 121 ··· 134 127 if (wd719x_wait_ready(wd)) 135 128 return -ETIMEDOUT; 136 129 137 - /* make sure we get NO interrupts */ 138 - dev |= WD719X_DISABLE_INT; 130 + /* disable interrupts except for RESET/ABORT (it breaks them) */ 131 + if (opcode != WD719X_CMD_BUSRESET && opcode != WD719X_CMD_ABORT && 132 + opcode != WD719X_CMD_ABORT_TAG && opcode != WD719X_CMD_RESET) 133 + dev |= WD719X_DISABLE_INT; 139 134 wd719x_writeb(wd, WD719X_AMR_CMD_PARAM, dev); 140 135 wd719x_writeb(wd, WD719X_AMR_CMD_PARAM_2, lun); 141 136 wd719x_writeb(wd, WD719X_AMR_CMD_PARAM_3, tag); ··· 473 464 spin_lock_irqsave(wd->sh->host_lock, flags); 474 465 result = wd719x_direct_cmd(wd, action, cmd->device->id, 475 466 cmd->device->lun, cmd->tag, scb->phys, 0); 467 + wd719x_finish_cmd(scb, DID_ABORT); 476 468 spin_unlock_irqrestore(wd->sh->host_lock, flags); 477 469 if (result) 478 470 return FAILED; ··· 486 476 int result; 487 477 unsigned long flags; 488 478 struct wd719x *wd = shost_priv(cmd->device->host); 479 + struct wd719x_scb *scb, *tmp; 489 480 490 481 dev_info(&wd->pdev->dev, "%s reset requested\n", 491 482 (opcode == WD719X_CMD_BUSRESET) ? "bus" : "device"); ··· 494 483 spin_lock_irqsave(wd->sh->host_lock, flags); 495 484 result = wd719x_direct_cmd(wd, opcode, device, 0, 0, 0, 496 485 WD719X_WAIT_FOR_SCSI_RESET); 486 + /* flush all SCBs (or all for a device if dev_reset) */ 487 + list_for_each_entry_safe(scb, tmp, &wd->active_scbs, list) { 488 + if (opcode == WD719X_CMD_BUSRESET || 489 + scb->cmd->device->id == device) 490 + wd719x_finish_cmd(scb, DID_RESET); 491 + } 497 492 spin_unlock_irqrestore(wd->sh->host_lock, flags); 498 493 if (result) 499 494 return FAILED; ··· 522 505 struct wd719x *wd = shost_priv(cmd->device->host); 523 506 struct wd719x_scb *scb, *tmp; 524 507 unsigned long flags; 525 - int result; 526 508 527 509 dev_info(&wd->pdev->dev, "host reset requested\n"); 528 510 spin_lock_irqsave(wd->sh->host_lock, flags); 529 - /* Try to reinit the RISC */ 530 - if (wd719x_chip_init(wd) == 0) 531 - result = SUCCESS; 532 - else 533 - result = FAILED; 511 + /* stop the RISC */ 512 + if (wd719x_direct_cmd(wd, WD719X_CMD_SLEEP, 0, 0, 0, 0, 513 + WD719X_WAIT_FOR_RISC)) 514 + dev_warn(&wd->pdev->dev, "RISC sleep command failed\n"); 515 + /* disable RISC */ 516 + wd719x_writeb(wd, WD719X_PCI_MODE_SELECT, 0); 534 517 535 518 /* flush all SCBs */ 536 519 list_for_each_entry_safe(scb, tmp, &wd->active_scbs, list) 537 - wd719x_finish_cmd(scb, result); 520 + wd719x_finish_cmd(scb, DID_RESET); 538 521 spin_unlock_irqrestore(wd->sh->host_lock, flags); 539 522 540 - return result; 523 + /* Try to reinit the RISC */ 524 + return wd719x_chip_init(wd) == 0 ? SUCCESS : FAILED; 541 525 } 542 526 543 527 static int wd719x_biosparam(struct scsi_device *sdev, struct block_device *bdev, ··· 690 672 else 691 673 dev_err(&wd->pdev->dev, "card returned invalid SCB pointer\n"); 692 674 } else 693 - dev_warn(&wd->pdev->dev, "direct command 0x%x completed\n", 675 + dev_dbg(&wd->pdev->dev, "direct command 0x%x completed\n", 694 676 regs.bytes.OPC); 695 677 break; 696 678 case WD719X_INT_PIOREADY: