Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] qla1280: error recovery rewrite

The driver now waits for the scsi commands associated with a
particular error recovery step to be returned to the mid-layer,
and returns the appropriate SUCCESS or FAILED status. Removes
unneeded polling of chip for interrupts.

This patch also bumps the driver version number.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

authored by

Michael Reed and committed by
James Bottomley
413e6e18 fd65e5e9

+161 -136
+159 -135
drivers/scsi/qla1280.c
··· 17 17 * General Public License for more details. 18 18 * 19 19 ******************************************************************************/ 20 - #define QLA1280_VERSION "3.26" 20 + #define QLA1280_VERSION "3.27" 21 21 /***************************************************************************** 22 22 Revision History: 23 + Rev 3.27, February 10, 2009, Michael Reed 24 + - General code cleanup. 25 + - Improve error recovery. 23 26 Rev 3.26, January 16, 2006 Jes Sorensen 24 27 - Ditch all < 2.6 support 25 28 Rev 3.25.1, February 10, 2005 Christoph Hellwig ··· 721 718 cmd->scsi_done = fn; 722 719 sp->cmd = cmd; 723 720 sp->flags = 0; 721 + sp->wait = NULL; 722 + CMD_HANDLE(cmd) = (unsigned char *)NULL; 724 723 725 724 qla1280_print_scsi_cmd(5, cmd); 726 725 ··· 747 742 ADAPTER_RESET, 748 743 }; 749 744 750 - /* timer action for error action processor */ 751 - static void qla1280_error_wait_timeout(unsigned long __data) 752 - { 753 - struct scsi_cmnd *cmd = (struct scsi_cmnd *)__data; 754 - struct srb *sp = (struct srb *)CMD_SP(cmd); 755 - 756 - complete(sp->wait); 757 - } 758 745 759 746 static void qla1280_mailbox_timeout(unsigned long __data) 760 747 { ··· 759 762 "ictrl %04x, istatus %04x\n", ha->host_no, ha->mailbox_out[0], 760 763 RD_REG_WORD(&reg->ictrl), RD_REG_WORD(&reg->istatus)); 761 764 complete(ha->mailbox_wait); 765 + } 766 + 767 + static int 768 + _qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp, 769 + struct completion *wait) 770 + { 771 + int status = FAILED; 772 + struct scsi_cmnd *cmd = sp->cmd; 773 + 774 + spin_unlock_irq(ha->host->host_lock); 775 + wait_for_completion_timeout(wait, 4*HZ); 776 + spin_lock_irq(ha->host->host_lock); 777 + sp->wait = NULL; 778 + if(CMD_HANDLE(cmd) == COMPLETED_HANDLE) { 779 + status = SUCCESS; 780 + (*cmd->scsi_done)(cmd); 781 + } 782 + return status; 783 + } 784 + 785 + static int 786 + qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp) 787 + { 788 + DECLARE_COMPLETION_ONSTACK(wait); 789 + 790 + sp->wait = &wait; 791 + return _qla1280_wait_for_single_command(ha, sp, &wait); 792 + } 793 + 794 + static int 795 + qla1280_wait_for_pending_commands(struct scsi_qla_host *ha, int bus, int target) 796 + { 797 + int cnt; 798 + int status; 799 + struct srb *sp; 800 + struct scsi_cmnd *cmd; 801 + 802 + status = SUCCESS; 803 + 804 + /* 805 + * Wait for all commands with the designated bus/target 806 + * to be completed by the firmware 807 + */ 808 + for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { 809 + sp = ha->outstanding_cmds[cnt]; 810 + if (sp) { 811 + cmd = sp->cmd; 812 + 813 + if (bus >= 0 && SCSI_BUS_32(cmd) != bus) 814 + continue; 815 + if (target >= 0 && SCSI_TCN_32(cmd) != target) 816 + continue; 817 + 818 + status = qla1280_wait_for_single_command(ha, sp); 819 + if (status == FAILED) 820 + break; 821 + } 822 + } 823 + return status; 762 824 } 763 825 764 826 /************************************************************************** ··· 833 777 * Returns: 834 778 * SUCCESS or FAILED 835 779 * 836 - * Note: 837 - * Resetting the bus always succeeds - is has to, otherwise the 838 - * kernel will panic! Try a surgical technique - sending a BUS 839 - * DEVICE RESET message - on the offending target before pulling 840 - * the SCSI bus reset line. 841 780 **************************************************************************/ 842 781 static int 843 782 qla1280_error_action(struct scsi_cmnd *cmd, enum action action) ··· 840 789 struct scsi_qla_host *ha; 841 790 int bus, target, lun; 842 791 struct srb *sp; 843 - uint16_t data; 844 - unsigned char *handle; 845 - int result, i; 792 + int i, found; 793 + int result=FAILED; 794 + int wait_for_bus=-1; 795 + int wait_for_target = -1; 846 796 DECLARE_COMPLETION_ONSTACK(wait); 847 - struct timer_list timer; 848 797 849 798 ENTER("qla1280_error_action"); 850 799 851 800 ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata); 801 + sp = (struct srb *)CMD_SP(cmd); 802 + bus = SCSI_BUS_32(cmd); 803 + target = SCSI_TCN_32(cmd); 804 + lun = SCSI_LUN_32(cmd); 852 805 853 806 dprintk(4, "error_action %i, istatus 0x%04x\n", action, 854 807 RD_REG_WORD(&ha->iobase->istatus)); ··· 866 811 "Handle=0x%p, action=0x%x\n", 867 812 ha->host_no, cmd, CMD_HANDLE(cmd), action); 868 813 869 - sp = (struct srb *)CMD_SP(cmd); 870 - handle = CMD_HANDLE(cmd); 871 - 872 - /* Check for pending interrupts. */ 873 - data = qla1280_debounce_register(&ha->iobase->istatus); 874 814 /* 875 - * The io_request_lock is held when the reset handler is called, hence 876 - * the interrupt handler cannot be running in parallel as it also 877 - * grabs the lock. /Jes 815 + * Check to see if we have the command in the outstanding_cmds[] 816 + * array. If not then it must have completed before this error 817 + * action was initiated. If the error_action isn't ABORT_COMMAND 818 + * then the driver must proceed with the requested action. 878 819 */ 879 - if (data & RISC_INT) 880 - qla1280_isr(ha, &ha->done_q); 881 - 882 - /* 883 - * Determine the suggested action that the mid-level driver wants 884 - * us to perform. 885 - */ 886 - if (handle == (unsigned char *)INVALID_HANDLE || handle == NULL) { 887 - if(action == ABORT_COMMAND) { 888 - /* we never got this command */ 889 - printk(KERN_INFO "qla1280: Aborting a NULL handle\n"); 890 - return SUCCESS; /* no action - we don't have command */ 891 - } 892 - } else { 893 - sp->wait = &wait; 894 - } 895 - 896 - bus = SCSI_BUS_32(cmd); 897 - target = SCSI_TCN_32(cmd); 898 - lun = SCSI_LUN_32(cmd); 899 - 900 - /* Overloading result. Here it means the success or fail of the 901 - * *issue* of the action. When we return from the routine, it must 902 - * mean the actual success or fail of the action */ 903 - result = FAILED; 904 - switch (action) { 905 - case ABORT_COMMAND: 906 - if ((sp->flags & SRB_ABORT_PENDING)) { 907 - printk(KERN_WARNING 908 - "scsi(): Command has a pending abort " 909 - "message - ABORT_PENDING.\n"); 910 - /* This should technically be impossible since we 911 - * now wait for abort completion */ 820 + found = -1; 821 + for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) { 822 + if (sp == ha->outstanding_cmds[i]) { 823 + found = i; 824 + sp->wait = &wait; /* we'll wait for it to complete */ 912 825 break; 913 826 } 827 + } 914 828 915 - for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) { 916 - if (sp == ha->outstanding_cmds[i]) { 917 - dprintk(1, "qla1280: RISC aborting command\n"); 918 - if (qla1280_abort_command(ha, sp, i) == 0) 919 - result = SUCCESS; 920 - else { 921 - /* 922 - * Since we don't know what might 923 - * have happend to the command, it 924 - * is unsafe to remove it from the 925 - * device's queue at this point. 926 - * Wait and let the escalation 927 - * process take care of it. 928 - */ 929 - printk(KERN_WARNING 930 - "scsi(%li:%i:%i:%i): Unable" 931 - " to abort command!\n", 932 - ha->host_no, bus, target, lun); 933 - } 934 - } 829 + if (found < 0) { /* driver doesn't have command */ 830 + result = SUCCESS; 831 + if (qla1280_verbose) { 832 + printk(KERN_INFO 833 + "scsi(%ld:%d:%d:%d): specified command has " 834 + "already completed.\n", ha->host_no, bus, 835 + target, lun); 935 836 } 837 + } 838 + 839 + switch (action) { 840 + 841 + case ABORT_COMMAND: 842 + dprintk(1, "qla1280: RISC aborting command\n"); 843 + /* 844 + * The abort might fail due to race when the host_lock 845 + * is released to issue the abort. As such, we 846 + * don't bother to check the return status. 847 + */ 848 + if (found >= 0) 849 + qla1280_abort_command(ha, sp, found); 936 850 break; 937 851 938 852 case DEVICE_RESET: ··· 909 885 printk(KERN_INFO 910 886 "scsi(%ld:%d:%d:%d): Queueing device reset " 911 887 "command.\n", ha->host_no, bus, target, lun); 912 - if (qla1280_device_reset(ha, bus, target) == 0) 913 - result = SUCCESS; 888 + if (qla1280_device_reset(ha, bus, target) == 0) { 889 + /* issued device reset, set wait conditions */ 890 + wait_for_bus = bus; 891 + wait_for_target = target; 892 + } 914 893 break; 915 894 916 895 case BUS_RESET: 917 896 if (qla1280_verbose) 918 897 printk(KERN_INFO "qla1280(%ld:%d): Issued bus " 919 898 "reset.\n", ha->host_no, bus); 920 - if (qla1280_bus_reset(ha, bus) == 0) 921 - result = SUCCESS; 899 + if (qla1280_bus_reset(ha, bus) == 0) { 900 + /* issued bus reset, set wait conditions */ 901 + wait_for_bus = bus; 902 + } 922 903 break; 923 904 924 905 case ADAPTER_RESET: ··· 936 907 "continue automatically\n", ha->host_no); 937 908 } 938 909 ha->flags.reset_active = 1; 939 - /* 940 - * We restarted all of the commands automatically, so the 941 - * mid-level code can expect completions momentitarily. 942 - */ 943 - if (qla1280_abort_isp(ha) == 0) 944 - result = SUCCESS; 910 + 911 + if (qla1280_abort_isp(ha) != 0) { /* it's dead */ 912 + result = FAILED; 913 + } 945 914 946 915 ha->flags.reset_active = 0; 947 916 } 948 917 949 - if (!list_empty(&ha->done_q)) 950 - qla1280_done(ha); 918 + /* 919 + * At this point, the host_lock has been released and retaken 920 + * by the issuance of the mailbox command. 921 + * Wait for the command passed in by the mid-layer if it 922 + * was found by the driver. It might have been returned 923 + * between eh recovery steps, hence the check of the "found" 924 + * variable. 925 + */ 951 926 952 - /* If we didn't manage to issue the action, or we have no 953 - * command to wait for, exit here */ 954 - if (result == FAILED || handle == NULL || 955 - handle == (unsigned char *)INVALID_HANDLE) { 956 - /* 957 - * Clear completion queue to avoid qla1280_done() trying 958 - * to complete the command at a later stage after we 959 - * have exited the current context 960 - */ 961 - sp->wait = NULL; 962 - goto leave; 927 + if (found >= 0) 928 + result = _qla1280_wait_for_single_command(ha, sp, &wait); 929 + 930 + if (action == ABORT_COMMAND && result != SUCCESS) { 931 + printk(KERN_WARNING 932 + "scsi(%li:%i:%i:%i): " 933 + "Unable to abort command!\n", 934 + ha->host_no, bus, target, lun); 963 935 } 964 936 965 - /* set up a timer just in case we're really jammed */ 966 - init_timer(&timer); 967 - timer.expires = jiffies + 4*HZ; 968 - timer.data = (unsigned long)cmd; 969 - timer.function = qla1280_error_wait_timeout; 970 - add_timer(&timer); 971 - 972 - /* wait for the action to complete (or the timer to expire) */ 973 - spin_unlock_irq(ha->host->host_lock); 974 - wait_for_completion(&wait); 975 - del_timer_sync(&timer); 976 - spin_lock_irq(ha->host->host_lock); 977 - sp->wait = NULL; 978 - 979 - /* the only action we might get a fail for is abort */ 980 - if (action == ABORT_COMMAND) { 981 - if(sp->flags & SRB_ABORTED) 982 - result = SUCCESS; 983 - else 984 - result = FAILED; 937 + /* 938 + * If the command passed in by the mid-layer has been 939 + * returned by the board, then wait for any additional 940 + * commands which are supposed to complete based upon 941 + * the error action. 942 + * 943 + * All commands are unconditionally returned during a 944 + * call to qla1280_abort_isp(), ADAPTER_RESET. No need 945 + * to wait for them. 946 + */ 947 + if (result == SUCCESS && wait_for_bus >= 0) { 948 + result = qla1280_wait_for_pending_commands(ha, 949 + wait_for_bus, wait_for_target); 985 950 } 986 951 987 - leave: 988 952 dprintk(1, "RESET returning %d\n", result); 989 953 990 954 LEAVE("qla1280_error_action"); ··· 1280 1258 switch ((CMD_RESULT(cmd) >> 16)) { 1281 1259 case DID_RESET: 1282 1260 /* Issue marker command. */ 1283 - qla1280_marker(ha, bus, target, 0, MK_SYNC_ID); 1261 + if (!ha->flags.abort_isp_active) 1262 + qla1280_marker(ha, bus, target, 0, MK_SYNC_ID); 1284 1263 break; 1285 1264 case DID_ABORT: 1286 1265 sp->flags &= ~SRB_ABORT_PENDING; ··· 1295 1272 scsi_dma_unmap(cmd); 1296 1273 1297 1274 /* Call the mid-level driver interrupt handler */ 1298 - CMD_HANDLE(sp->cmd) = (unsigned char *)INVALID_HANDLE; 1299 1275 ha->actthreads--; 1300 1276 1301 - (*(cmd)->scsi_done)(cmd); 1302 - 1303 - if(sp->wait != NULL) 1277 + if (sp->wait == NULL) 1278 + (*(cmd)->scsi_done)(cmd); 1279 + else 1304 1280 complete(sp->wait); 1305 1281 } 1306 1282 LEAVE("qla1280_done"); ··· 3437 3415 3438 3416 /* Save ISP completion status */ 3439 3417 CMD_RESULT(sp->cmd) = 0; 3418 + CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE; 3440 3419 3441 3420 /* Place block on done queue */ 3442 3421 list_add_tail(&sp->list, done_q); ··· 3704 3681 } 3705 3682 } 3706 3683 3684 + CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE; 3685 + 3707 3686 /* Place command on done queue. */ 3708 3687 list_add_tail(&sp->list, done_q); 3709 3688 out: ··· 3761 3736 CMD_RESULT(sp->cmd) = DID_ERROR << 16; 3762 3737 } 3763 3738 3739 + CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE; 3740 + 3764 3741 /* Place command on done queue. */ 3765 3742 list_add_tail(&sp->list, done_q); 3766 3743 } ··· 3813 3786 struct scsi_cmnd *cmd; 3814 3787 sp = ha->outstanding_cmds[cnt]; 3815 3788 if (sp) { 3816 - 3817 3789 cmd = sp->cmd; 3818 3790 CMD_RESULT(cmd) = DID_RESET << 16; 3819 - 3820 - sp->cmd = NULL; 3791 + CMD_HANDLE(cmd) = COMPLETED_HANDLE; 3821 3792 ha->outstanding_cmds[cnt] = NULL; 3822 - 3823 - (*cmd->scsi_done)(cmd); 3824 - 3825 - sp->flags = 0; 3793 + list_add_tail(&sp->list, &ha->done_q); 3826 3794 } 3827 3795 } 3796 + 3797 + qla1280_done(ha); 3828 3798 3829 3799 status = qla1280_load_firmware(ha); 3830 3800 if (status)
+2 -1
drivers/scsi/qla1280.h
··· 88 88 89 89 /* Maximum outstanding commands in ISP queues */ 90 90 #define MAX_OUTSTANDING_COMMANDS 512 91 - #define INVALID_HANDLE (MAX_OUTSTANDING_COMMANDS + 2) 91 + #define COMPLETED_HANDLE ((unsigned char *) \ 92 + (MAX_OUTSTANDING_COMMANDS + 2)) 92 93 93 94 /* ISP request and response entry counts (37-65535) */ 94 95 #define REQUEST_ENTRY_CNT 255 /* Number of request entries. */