Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] fc class: unblock target after calling terminate callback (take 2)

When we block a rport and the driver implements the terminate
callback we will fail IO that was running quickly. However
IO that was in the scsi_device/block queue sits there until
the dev_loss_tmo fires, and this can make it look like IO is
lost because new IO will get executed but that IO stuck in
the blocked queue sits there for some time longer.

With this patch when the fast io fail tmo fires, we will
fail the blocked IO and any new IO. This patch also allows
all drivers to partially support the fast io fail tmo. If the
terminate io callback is not implemented, we will still fail blocked
IO and any new IO, so multipath can handle that.

This patch also allows the fc and iscsi classes to implement the
same behavior. The timers are just unfornately named differently.

This patch also fixes the problem where drivers were unblocking
the target in their terminate callback, which was needed for
rport removal, but for fast io fail timeout it would cause
IO to bounce arround the scsi/block layer and the LLD queuecommand.
And it for drivers that could have IO stuck but did not have
a terminate callback the unblock calls in the class will fix
them.

v2.
- fix up bit setting style to meet JamesS's pref.
- Broke out new host byte error changes to make it easier to read.
- added JamesS's ack from list.
v1
- initial patch

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

authored by

Mike Christie and committed by
James Bottomley
fff9d40c a93ce024

+33 -20
+28 -19
drivers/scsi/scsi_transport_fc.c
··· 2133 2133 SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(roles); 2134 2134 SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(port_state); 2135 2135 SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(scsi_target_id); 2136 - if (ft->terminate_rport_io) 2137 - SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(fast_io_fail_tmo); 2136 + SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(fast_io_fail_tmo); 2138 2137 2139 2138 BUG_ON(count > FC_RPORT_NUM_ATTRS); 2140 2139 ··· 2327 2328 } 2328 2329 EXPORT_SYMBOL(fc_remove_host); 2329 2330 2331 + static void fc_terminate_rport_io(struct fc_rport *rport) 2332 + { 2333 + struct Scsi_Host *shost = rport_to_shost(rport); 2334 + struct fc_internal *i = to_fc_internal(shost->transportt); 2335 + 2336 + /* Involve the LLDD if possible to terminate all io on the rport. */ 2337 + if (i->f->terminate_rport_io) 2338 + i->f->terminate_rport_io(rport); 2339 + 2340 + /* 2341 + * must unblock to flush queued IO. The caller will have set 2342 + * the port_state or flags, so that fc_remote_port_chkready will 2343 + * fail IO. 2344 + */ 2345 + scsi_target_unblock(&rport->dev); 2346 + } 2330 2347 2331 2348 /** 2332 2349 * fc_starget_delete - called to delete the scsi decendents of an rport ··· 2355 2340 { 2356 2341 struct fc_rport *rport = 2357 2342 container_of(work, struct fc_rport, stgt_delete_work); 2358 - struct Scsi_Host *shost = rport_to_shost(rport); 2359 - struct fc_internal *i = to_fc_internal(shost->transportt); 2360 2343 2361 - /* Involve the LLDD if possible to terminate all io on the rport. */ 2362 - if (i->f->terminate_rport_io) 2363 - i->f->terminate_rport_io(rport); 2364 - 2344 + fc_terminate_rport_io(rport); 2365 2345 scsi_remove_target(&rport->dev); 2366 2346 } 2367 2347 ··· 2382 2372 if (rport->flags & FC_RPORT_SCAN_PENDING) 2383 2373 scsi_flush_work(shost); 2384 2374 2385 - /* involve the LLDD to terminate all pending i/o */ 2386 - if (i->f->terminate_rport_io) 2387 - i->f->terminate_rport_io(rport); 2388 - 2375 + fc_terminate_rport_io(rport); 2389 2376 /* 2390 2377 * Cancel any outstanding timers. These should really exist 2391 2378 * only when rmmod'ing the LLDD and we're asking for ··· 2646 2639 2647 2640 spin_lock_irqsave(shost->host_lock, flags); 2648 2641 2649 - rport->flags &= ~FC_RPORT_DEVLOSS_PENDING; 2642 + rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT | 2643 + FC_RPORT_DEVLOSS_PENDING); 2650 2644 2651 2645 /* if target, initiate a scan */ 2652 2646 if (rport->scsi_target_id != -1) { ··· 2710 2702 rport->port_id = ids->port_id; 2711 2703 rport->roles = ids->roles; 2712 2704 rport->port_state = FC_PORTSTATE_ONLINE; 2705 + rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT; 2713 2706 2714 2707 if (fci->f->dd_fcrport_size) 2715 2708 memset(rport->dd_data, 0, ··· 2793 2784 fc_remote_port_delete(struct fc_rport *rport) 2794 2785 { 2795 2786 struct Scsi_Host *shost = rport_to_shost(rport); 2796 - struct fc_internal *i = to_fc_internal(shost->transportt); 2797 2787 int timeout = rport->dev_loss_tmo; 2798 2788 unsigned long flags; 2799 2789 ··· 2838 2830 2839 2831 /* see if we need to kill io faster than waiting for device loss */ 2840 2832 if ((rport->fast_io_fail_tmo != -1) && 2841 - (rport->fast_io_fail_tmo < timeout) && (i->f->terminate_rport_io)) 2833 + (rport->fast_io_fail_tmo < timeout)) 2842 2834 fc_queue_devloss_work(shost, &rport->fail_io_work, 2843 2835 rport->fast_io_fail_tmo * HZ); 2844 2836 ··· 2914 2906 fc_flush_devloss(shost); 2915 2907 2916 2908 spin_lock_irqsave(shost->host_lock, flags); 2917 - rport->flags &= ~FC_RPORT_DEVLOSS_PENDING; 2909 + rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT | 2910 + FC_RPORT_DEVLOSS_PENDING); 2918 2911 spin_unlock_irqrestore(shost->host_lock, flags); 2919 2912 2920 2913 /* ensure any stgt delete functions are done */ ··· 3010 3001 rport->supported_classes = FC_COS_UNSPECIFIED; 3011 3002 rport->roles = FC_PORT_ROLE_UNKNOWN; 3012 3003 rport->port_state = FC_PORTSTATE_NOTPRESENT; 3004 + rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT; 3013 3005 3014 3006 /* remove the identifiers that aren't used in the consisting binding */ 3015 3007 switch (fc_host->tgtid_bind_type) { ··· 3053 3043 { 3054 3044 struct fc_rport *rport = 3055 3045 container_of(work, struct fc_rport, fail_io_work.work); 3056 - struct Scsi_Host *shost = rport_to_shost(rport); 3057 - struct fc_internal *i = to_fc_internal(shost->transportt); 3058 3046 3059 3047 if (rport->port_state != FC_PORTSTATE_BLOCKED) 3060 3048 return; 3061 3049 3062 - i->f->terminate_rport_io(rport); 3050 + rport->flags |= FC_RPORT_FAST_FAIL_TIMEDOUT; 3051 + fc_terminate_rport_io(rport); 3063 3052 } 3064 3053 3065 3054 /**
+5 -1
include/scsi/scsi_transport_fc.h
··· 357 357 /* bit field values for struct fc_rport "flags" field: */ 358 358 #define FC_RPORT_DEVLOSS_PENDING 0x01 359 359 #define FC_RPORT_SCAN_PENDING 0x02 360 + #define FC_RPORT_FAST_FAIL_TIMEDOUT 0x03 360 361 361 362 #define dev_to_rport(d) \ 362 363 container_of(d, struct fc_rport, dev) ··· 684 683 result = DID_NO_CONNECT << 16; 685 684 break; 686 685 case FC_PORTSTATE_BLOCKED: 687 - result = DID_IMM_RETRY << 16; 686 + if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT) 687 + result = DID_NO_CONNECT << 16; 688 + else 689 + result = DID_IMM_RETRY << 16; 688 690 break; 689 691 default: 690 692 result = DID_NO_CONNECT << 16;