Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] block: separate failfast into multiple bits.

Multipath is best at handling transport errors. If it gets a device
error then there is not much the multipath layer can do. It will just
access the same device but from a different path.

This patch breaks up failfast into device, transport and driver errors.
The multipath layers (md and dm mutlipath) only ask the lower levels to
fast fail transport errors. The user of failfast, read ahead, will ask
to fast fail on all errors.

Note that blk_noretry_request will return true if any failfast bit
is set. This allows drivers that do not support the multipath failfast
bits to continue to fail on any failfast error like before. Drivers
like scsi that are able to fail fast specific errors can check
for the specific fail fast type. In the next patch I will convert
scsi.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

authored by

Mike Christie and committed by
James Bottomley
6000a368 056a4483

+57 -26
+9 -2
block/blk-core.c
··· 1075 1075 /* 1076 1076 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1077 1077 */ 1078 - if (bio_rw_ahead(bio) || bio_failfast(bio)) 1079 - req->cmd_flags |= REQ_FAILFAST; 1078 + if (bio_rw_ahead(bio)) 1079 + req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 1080 + REQ_FAILFAST_DRIVER); 1081 + if (bio_failfast_dev(bio)) 1082 + req->cmd_flags |= REQ_FAILFAST_DEV; 1083 + if (bio_failfast_transport(bio)) 1084 + req->cmd_flags |= REQ_FAILFAST_TRANSPORT; 1085 + if (bio_failfast_driver(bio)) 1086 + req->cmd_flags |= REQ_FAILFAST_DRIVER; 1080 1087 1081 1088 /* 1082 1089 * REQ_BARRIER implies no merging, but lets make it explicit
+1 -1
drivers/md/dm-mpath.c
··· 849 849 dm_bio_record(&mpio->details, bio); 850 850 851 851 map_context->ptr = mpio; 852 - bio->bi_rw |= (1 << BIO_RW_FAILFAST); 852 + bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); 853 853 r = map_io(m, bio, mpio, 0); 854 854 if (r < 0 || r == DM_MAPIO_REQUEUE) 855 855 mempool_free(mpio, m->mpio_pool);
+2 -2
drivers/md/multipath.c
··· 176 176 mp_bh->bio = *bio; 177 177 mp_bh->bio.bi_sector += multipath->rdev->data_offset; 178 178 mp_bh->bio.bi_bdev = multipath->rdev->bdev; 179 - mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST); 179 + mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); 180 180 mp_bh->bio.bi_end_io = multipath_end_request; 181 181 mp_bh->bio.bi_private = mp_bh; 182 182 generic_make_request(&mp_bh->bio); ··· 402 402 *bio = *(mp_bh->master_bio); 403 403 bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; 404 404 bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; 405 - bio->bi_rw |= (1 << BIO_RW_FAILFAST); 405 + bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); 406 406 bio->bi_end_io = multipath_end_request; 407 407 bio->bi_private = mp_bh; 408 408 generic_make_request(bio);
+1 -1
drivers/s390/block/dasd_diag.c
··· 544 544 } 545 545 cqr->retries = DIAG_MAX_RETRIES; 546 546 cqr->buildclk = get_clock(); 547 - if (req->cmd_flags & REQ_FAILFAST) 547 + if (blk_noretry_request(req)) 548 548 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 549 549 cqr->startdev = memdev; 550 550 cqr->memdev = memdev;
+1 -1
drivers/s390/block/dasd_eckd.c
··· 1700 1700 recid++; 1701 1701 } 1702 1702 } 1703 - if (req->cmd_flags & REQ_FAILFAST) 1703 + if (blk_noretry_request(req)) 1704 1704 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 1705 1705 cqr->startdev = startdev; 1706 1706 cqr->memdev = startdev;
+1 -1
drivers/s390/block/dasd_fba.c
··· 355 355 recid++; 356 356 } 357 357 } 358 - if (req->cmd_flags & REQ_FAILFAST) 358 + if (blk_noretry_request(req)) 359 359 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); 360 360 cqr->startdev = memdev; 361 361 cqr->memdev = memdev;
+2 -1
drivers/scsi/device_handler/scsi_dh_alua.c
··· 109 109 } 110 110 111 111 rq->cmd_type = REQ_TYPE_BLOCK_PC; 112 - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; 112 + rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 113 + REQ_FAILFAST_DRIVER | REQ_NOMERGE; 113 114 rq->retries = ALUA_FAILOVER_RETRIES; 114 115 rq->timeout = ALUA_FAILOVER_TIMEOUT; 115 116
+2 -1
drivers/scsi/device_handler/scsi_dh_emc.c
··· 303 303 304 304 rq->cmd[4] = len; 305 305 rq->cmd_type = REQ_TYPE_BLOCK_PC; 306 - rq->cmd_flags |= REQ_FAILFAST; 306 + rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 307 + REQ_FAILFAST_DRIVER; 307 308 rq->timeout = CLARIION_TIMEOUT; 308 309 rq->retries = CLARIION_RETRIES; 309 310
+4 -2
drivers/scsi/device_handler/scsi_dh_hp_sw.c
··· 112 112 return SCSI_DH_RES_TEMP_UNAVAIL; 113 113 114 114 req->cmd_type = REQ_TYPE_BLOCK_PC; 115 - req->cmd_flags |= REQ_FAILFAST; 115 + req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 116 + REQ_FAILFAST_DRIVER; 116 117 req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY); 117 118 req->cmd[0] = TEST_UNIT_READY; 118 119 req->timeout = HP_SW_TIMEOUT; ··· 205 204 return SCSI_DH_RES_TEMP_UNAVAIL; 206 205 207 206 req->cmd_type = REQ_TYPE_BLOCK_PC; 208 - req->cmd_flags |= REQ_FAILFAST; 207 + req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 208 + REQ_FAILFAST_DRIVER; 209 209 req->cmd_len = COMMAND_SIZE(START_STOP); 210 210 req->cmd[0] = START_STOP; 211 211 req->cmd[4] = 1; /* Start spin cycle */
+2 -1
drivers/scsi/device_handler/scsi_dh_rdac.c
··· 226 226 } 227 227 228 228 rq->cmd_type = REQ_TYPE_BLOCK_PC; 229 - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; 229 + rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 230 + REQ_FAILFAST_DRIVER; 230 231 rq->retries = RDAC_RETRIES; 231 232 rq->timeout = RDAC_TIMEOUT; 232 233
+3 -1
drivers/scsi/scsi_transport_spi.c
··· 109 109 for(i = 0; i < DV_RETRIES; i++) { 110 110 result = scsi_execute(sdev, cmd, dir, buffer, bufflen, 111 111 sense, DV_TIMEOUT, /* retries */ 1, 112 - REQ_FAILFAST); 112 + REQ_FAILFAST_DEV | 113 + REQ_FAILFAST_TRANSPORT | 114 + REQ_FAILFAST_DRIVER); 113 115 if (result & DRIVER_SENSE) { 114 116 struct scsi_sense_hdr sshdr_tmp; 115 117 if (!sshdr)
+17 -9
include/linux/bio.h
··· 129 129 * bit 2 -- barrier 130 130 * Insert a serialization point in the IO queue, forcing previously 131 131 * submitted IO to be completed before this oen is issued. 132 - * bit 3 -- fail fast, don't want low level driver retries 133 - * bit 4 -- synchronous I/O hint: the block layer will unplug immediately 132 + * bit 3 -- synchronous I/O hint: the block layer will unplug immediately 134 133 * Note that this does NOT indicate that the IO itself is sync, just 135 134 * that the block layer will not postpone issue of this IO by plugging. 136 - * bit 5 -- metadata request 135 + * bit 4 -- metadata request 137 136 * Used for tracing to differentiate metadata and data IO. May also 138 137 * get some preferential treatment in the IO scheduler 139 - * bit 6 -- discard sectors 138 + * bit 5 -- discard sectors 140 139 * Informs the lower level device that this range of sectors is no longer 141 140 * used by the file system and may thus be freed by the device. Used 142 141 * for flash based storage. 142 + * bit 6 -- fail fast device errors 143 + * bit 7 -- fail fast transport errors 144 + * bit 8 -- fail fast driver errors 145 + * Don't want driver retries for any fast fail whatever the reason. 143 146 */ 144 147 #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ 145 148 #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ 146 149 #define BIO_RW_BARRIER 2 147 - #define BIO_RW_FAILFAST 3 148 - #define BIO_RW_SYNC 4 149 - #define BIO_RW_META 5 150 - #define BIO_RW_DISCARD 6 150 + #define BIO_RW_SYNC 3 151 + #define BIO_RW_META 4 152 + #define BIO_RW_DISCARD 5 153 + #define BIO_RW_FAILFAST_DEV 6 154 + #define BIO_RW_FAILFAST_TRANSPORT 7 155 + #define BIO_RW_FAILFAST_DRIVER 8 151 156 152 157 /* 153 158 * upper 16 bits of bi_rw define the io priority of this bio ··· 179 174 #define bio_sectors(bio) ((bio)->bi_size >> 9) 180 175 #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) 181 176 #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) 182 - #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) 177 + #define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) 178 + #define bio_failfast_transport(bio) \ 179 + ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) 180 + #define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER)) 183 181 #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) 184 182 #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) 185 183 #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD))
+12 -3
include/linux/blkdev.h
··· 87 87 */ 88 88 enum rq_flag_bits { 89 89 __REQ_RW, /* not set, read. set, write */ 90 - __REQ_FAILFAST, /* no low level driver retries */ 90 + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ 91 + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ 92 + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ 91 93 __REQ_DISCARD, /* request to discard sectors */ 92 94 __REQ_SORTED, /* elevator knows about this request */ 93 95 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ ··· 113 111 }; 114 112 115 113 #define REQ_RW (1 << __REQ_RW) 114 + #define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) 115 + #define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) 116 + #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) 116 117 #define REQ_DISCARD (1 << __REQ_DISCARD) 117 - #define REQ_FAILFAST (1 << __REQ_FAILFAST) 118 118 #define REQ_SORTED (1 << __REQ_SORTED) 119 119 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 120 120 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) ··· 564 560 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) 565 561 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) 566 562 567 - #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) 563 + #define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV) 564 + #define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT) 565 + #define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER) 566 + #define blk_noretry_request(rq) (blk_failfast_dev(rq) || \ 567 + blk_failfast_transport(rq) || \ 568 + blk_failfast_driver(rq)) 568 569 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 569 570 570 571 #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))