Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] Handle MLQUEUE busy response in scsi_send_eh_cmnd

scsi_send_eh_cmnd() is calling queuecommand() directly, so
it needs to check the return value here.
The only valid return codes for queuecommand() are 'busy'
states, so we need to wait for a bit to allow the LLDD
to recover.

Based on an earlier patch from Wen Xiong.

[jejb: fix confusion between msec and jiffies values and other issues]
[bvanassche: correct stall_for interval]
Cc: Wen Xiong <wenxiong@linux.vnet.ibm.com>
Cc: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>

authored by

Hannes Reinecke and committed by
James Bottomley
fc73648a d522844a

+27 -10
+27 -10
drivers/scsi/scsi_error.c
··· 25 25 #include <linux/interrupt.h> 26 26 #include <linux/blkdev.h> 27 27 #include <linux/delay.h> 28 + #include <linux/jiffies.h> 28 29 29 30 #include <scsi/scsi.h> 30 31 #include <scsi/scsi_cmnd.h> ··· 792 791 struct scsi_device *sdev = scmd->device; 793 792 struct Scsi_Host *shost = sdev->host; 794 793 DECLARE_COMPLETION_ONSTACK(done); 795 - unsigned long timeleft; 794 + unsigned long timeleft = timeout; 796 795 struct scsi_eh_save ses; 796 + const unsigned long stall_for = msecs_to_jiffies(100); 797 797 int rtn; 798 798 799 + retry: 799 800 scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes); 800 801 shost->eh_action = &done; 801 802 802 803 scsi_log_send(scmd); 803 804 scmd->scsi_done = scsi_eh_done; 804 - shost->hostt->queuecommand(shost, scmd); 805 - 806 - timeleft = wait_for_completion_timeout(&done, timeout); 805 + rtn = shost->hostt->queuecommand(shost, scmd); 806 + if (rtn) { 807 + if (timeleft > stall_for) { 808 + scsi_eh_restore_cmnd(scmd, &ses); 809 + timeleft -= stall_for; 810 + msleep(jiffies_to_msecs(stall_for)); 811 + goto retry; 812 + } 813 + /* signal not to enter either branch of the if () below */ 814 + timeleft = 0; 815 + rtn = NEEDS_RETRY; 816 + } else { 817 + timeleft = wait_for_completion_timeout(&done, timeout); 818 + } 807 819 808 820 shost->eh_action = NULL; 809 821 810 - scsi_log_completion(scmd, SUCCESS); 822 + scsi_log_completion(scmd, rtn); 811 823 812 824 SCSI_LOG_ERROR_RECOVERY(3, 813 825 printk("%s: scmd: %p, timeleft: %ld\n", 814 826 __func__, scmd, timeleft)); 815 827 816 828 /* 817 - * If there is time left scsi_eh_done got called, and we will 818 - * examine the actual status codes to see whether the command 819 - * actually did complete normally, else tell the host to forget 820 - * about this command. 829 + * If there is time left scsi_eh_done got called, and we will examine 830 + * the actual status codes to see whether the command actually did 831 + * complete normally, else if we have a zero return and no time left, 832 + * the command must still be pending, so abort it and return FAILED. 833 + * If we never actually managed to issue the command, because 834 + * ->queuecommand() kept returning non zero, use the rtn = FAILED 835 + * value above (so don't execute either branch of the if) 821 836 */ 822 837 if (timeleft) { 823 838 rtn = scsi_eh_completed_normally(scmd); ··· 854 837 rtn = FAILED; 855 838 break; 856 839 } 857 - } else { 840 + } else if (!rtn) { 858 841 scsi_abort_eh_cmnd(scmd); 859 842 rtn = FAILED; 860 843 }