···550550 return !q->mq_ops && req->special;551551}552552553553+static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,554554+ struct request *next)555555+{556556+ unsigned short segments = blk_rq_nr_discard_segments(req);557557+558558+ if (segments >= queue_max_discard_segments(q))559559+ goto no_merge;560560+ if (blk_rq_sectors(req) + bio_sectors(next->bio) >561561+ blk_rq_get_max_sectors(req, blk_rq_pos(req)))562562+ goto no_merge;563563+564564+ req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);565565+ return true;566566+no_merge:567567+ req_set_nomerge(q, req);568568+ return false;569569+}570570+553571static int ll_merge_requests_fn(struct request_queue *q, struct request *req,554572 struct request *next)555573{···701683 * If we are allowed to merge, then append bio list702684 * from next to rq and release next. merge_requests_fn703685 * will have updated segment counts, update sector704704- * counts here.686686+ * counts here. Handle DISCARDs separately, as they687687+ * have separate settings.705688 */706706- if (!ll_merge_requests_fn(q, req, next))689689+ if (req_op(req) == REQ_OP_DISCARD) {690690+ if (!req_attempt_discard_merge(q, req, next))691691+ return NULL;692692+ } else if (!ll_merge_requests_fn(q, req, next))707693 return NULL;708694709695 /*···737715738716 req->__data_len += blk_rq_bytes(next);739717740740- elv_merge_requests(q, req, next);718718+ if (req_op(req) != REQ_OP_DISCARD)719719+ elv_merge_requests(q, req, next);741720742721 /*743722 * 'next' is going away, so update stats accordingly
+2
block/blk-mq-sched.c
···259259 if (!*merged_request)260260 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);261261 return true;262262+ case ELEVATOR_DISCARD_MERGE:263263+ return bio_attempt_discard_merge(q, rq, bio);262264 default:263265 return false;264266 }
+16-4
block/blk-mq.c
···11621162 return true;11631163}1164116411651165+#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */11661166+11651167bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,11661168 bool got_budget)11671169{···11711169 struct request *rq, *nxt;11721170 bool no_tag = false;11731171 int errors, queued;11721172+ blk_status_t ret = BLK_STS_OK;1174117311751174 if (list_empty(list))11761175 return false;···11841181 errors = queued = 0;11851182 do {11861183 struct blk_mq_queue_data bd;11871187- blk_status_t ret;1188118411891185 rq = list_first_entry(list, struct request, queuelist);11901186 if (!blk_mq_get_driver_tag(rq, &hctx, false)) {···12281226 }1229122712301228 ret = q->mq_ops->queue_rq(hctx, &bd);12311231- if (ret == BLK_STS_RESOURCE) {12291229+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {12321230 /*12331231 * If an I/O scheduler has been configured and we got a12341232 * driver tag for the next request already, free it···12591257 * that is where we will continue on next queue run.12601258 */12611259 if (!list_empty(list)) {12601260+ bool needs_restart;12611261+12621262 spin_lock(&hctx->lock);12631263 list_splice_init(list, &hctx->dispatch);12641264 spin_unlock(&hctx->lock);···12841280 * - Some but not all block drivers stop a queue before12851281 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq12861282 * and dm-rq.12831283+ *12841284+ * If driver returns BLK_STS_RESOURCE and SCHED_RESTART12851285+ * bit is set, run queue after a delay to avoid IO stalls12861286+ * that could otherwise occur if the queue is idle.12871287 */12881288- if (!blk_mq_sched_needs_restart(hctx) ||12881288+ needs_restart = blk_mq_sched_needs_restart(hctx);12891289+ if (!needs_restart ||12891290 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))12901291 blk_mq_run_hw_queue(hctx, true);12921292+ else if (needs_restart && (ret == BLK_STS_RESOURCE))12931293+ blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);12911294 }1292129512931296 return (queued + errors) != 0;···17751764 *cookie = new_cookie;17761765 break;17771766 case BLK_STS_RESOURCE:17671767+ case BLK_STS_DEV_RESOURCE:17781768 __blk_mq_requeue_request(rq);17791769 break;17801770 default:···18381826 hctx_lock(hctx, &srcu_idx);1839182718401828 ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);18411841- if (ret == BLK_STS_RESOURCE)18291829+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)18421830 blk_mq_sched_insert_request(rq, false, true, false);18431831 else if (ret != BLK_STS_OK)18441832 blk_mq_end_request(rq, ret);
···276276 /* Out of mem doesn't actually happen, since we fall back277277 * to direct descriptors */278278 if (err == -ENOMEM || err == -ENOSPC)279279- return BLK_STS_RESOURCE;279279+ return BLK_STS_DEV_RESOURCE;280280 return BLK_STS_IOERR;281281 }282282
···408408409409 clone->start_time = jiffies;410410 r = blk_insert_cloned_request(clone->q, clone);411411- if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)411411+ if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)412412 /* must complete clone in terms of original request */413413 dm_complete_request(rq, r);414414 return r;···500500 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),501501 blk_rq_pos(rq));502502 ret = dm_dispatch_clone_request(clone, rq);503503- if (ret == BLK_STS_RESOURCE) {503503+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {504504 blk_rq_unprep_clone(clone);505505 tio->ti->type->release_clone_rq(clone);506506 tio->clone = NULL;···772772 /* Undo dm_start_request() before requeuing */773773 rq_end_stats(md, rq);774774 rq_completed(md, rq_data_dir(rq), false);775775- blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);776775 return BLK_STS_RESOURCE;777776 }778777
+2-10
drivers/nvme/host/fc.c
···3535 NVME_FC_Q_LIVE,3636};37373838-#define NVMEFC_QUEUE_DELAY 3 /* ms units */3939-4038#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */41394240struct nvme_fc_queue {···22292231 * the target device is present22302232 */22312233 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)22322232- goto busy;22342234+ return BLK_STS_RESOURCE;2233223522342236 if (!nvme_fc_ctrl_get(ctrl))22352237 return BLK_STS_IOERR;···23092311 ret != -EBUSY)23102312 return BLK_STS_IOERR;2311231323122312- goto busy;23142314+ return BLK_STS_RESOURCE;23132315 }2314231623152317 return BLK_STS_OK;23162316-23172317-busy:23182318- if (!(op->flags & FCOP_FLAGS_AEN) && queue->hctx)23192319- blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);23202320-23212321- return BLK_STS_RESOURCE;23222318}2323231923242320static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue,
+3-3
drivers/scsi/scsi_lib.c
···20462046 case BLK_STS_OK:20472047 break;20482048 case BLK_STS_RESOURCE:20492049- if (atomic_read(&sdev->device_busy) == 0 &&20502050- !scsi_device_blocked(sdev))20512051- blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);20492049+ if (atomic_read(&sdev->device_busy) ||20502050+ scsi_device_blocked(sdev))20512051+ ret = BLK_STS_DEV_RESOURCE;20522052 break;20532053 default:20542054 /*
+18
include/linux/blk_types.h
···39394040#define BLK_STS_AGAIN ((__force blk_status_t)12)41414242+/*4343+ * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if4444+ * device related resources are unavailable, but the driver can guarantee4545+ * that the queue will be rerun in the future once resources become4646+ * available again. This is typically the case for device specific4747+ * resources that are consumed for IO. If the driver fails allocating these4848+ * resources, we know that inflight (or pending) IO will free these4949+ * resource upon completion.5050+ *5151+ * This is different from BLK_STS_RESOURCE in that it explicitly references5252+ * a device specific resource. For resources of wider scope, allocation5353+ * failure can happen without having pending IO. This means that we can't5454+ * rely on request completions freeing these resources, as IO may not be in5555+ * flight. Examples of that are kernel memory allocations, DMA mappings, or5656+ * any other system wide resources.5757+ */5858+#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13)5959+4260/**4361 * blk_path_error - returns true if error may be path related4462 * @error: status the request was completed with