Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'block-5.13-2021-05-07' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- dasd spelling fixes (Bhaskar)

- Limit bio max size on multi-page bvecs to the hardware limit, to
avoid overly large bio's (and hence latencies). Originally queued for
the merge window, but needed a fix and was dropped from the initial
pull (Changheun)

- NVMe pull request (Christoph):
- reset the bdev to ns head when failover (Daniel Wagner)
- remove unsupported command noise (Keith Busch)
- misc passthrough improvements (Kanchan Joshi)
- fix controller ioctl through ns_head (Minwoo Im)
- fix controller timeouts during reset (Tao Chiu)

- rnbd fixes/cleanups (Gioh, Md, Dima)

- Fix iov_iter re-expansion (yangerkun)

* tag 'block-5.13-2021-05-07' of git://git.kernel.dk/linux-block:
block: reexpand iov_iter after read/write
nvmet: remove unsupported command noise
nvme-multipath: reset bdev to ns head when failover
nvme-pci: fix controller reset hang when racing with nvme_timeout
nvme: move the fabrics queue ready check routines to core
nvme: avoid memset for passthrough requests
nvme: add nvme_get_ns helper
nvme: fix controller ioctl through ns_head
bio: limit bio max size
RDMA/rtrs: fix uninitialized symbol 'cnt'
s390: dasd: Mundane spelling fixes
block/rnbd: Remove all likely and unlikely
block/rnbd-clt: Check the return value of the function rtrs_clt_query
block/rnbd: Fix style issues
block/rnbd-clt: Change queue_depth type in rnbd_clt_session to size_t

+216 -166
+11 -2
block/bio.c
··· 255 255 } 256 256 EXPORT_SYMBOL(bio_init); 257 257 258 + unsigned int bio_max_size(struct bio *bio) 259 + { 260 + struct block_device *bdev = bio->bi_bdev; 261 + 262 + return bdev ? bdev->bd_disk->queue->limits.bio_max_bytes : UINT_MAX; 263 + } 264 + 258 265 /** 259 266 * bio_reset - reinitialize a bio 260 267 * @bio: bio to reset ··· 873 866 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; 874 867 875 868 if (page_is_mergeable(bv, page, len, off, same_page)) { 876 - if (bio->bi_iter.bi_size > UINT_MAX - len) { 869 + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) { 877 870 *same_page = false; 878 871 return false; 879 872 } ··· 1002 995 { 1003 996 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; 1004 997 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; 998 + unsigned int bytes_left = bio_max_size(bio) - bio->bi_iter.bi_size; 1005 999 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; 1006 1000 struct page **pages = (struct page **)bv; 1007 1001 bool same_page = false; ··· 1018 1010 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); 1019 1011 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); 1020 1012 1021 - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); 1013 + size = iov_iter_get_pages(iter, pages, bytes_left, nr_pages, 1014 + &offset); 1022 1015 if (unlikely(size <= 0)) 1023 1016 return size ? size : -EFAULT; 1024 1017
+5
block/blk-settings.c
··· 32 32 */ 33 33 void blk_set_default_limits(struct queue_limits *lim) 34 34 { 35 + lim->bio_max_bytes = UINT_MAX; 35 36 lim->max_segments = BLK_MAX_SEGMENTS; 36 37 lim->max_discard_segments = 1; 37 38 lim->max_integrity_segments = 0; ··· 140 139 max_sectors = round_down(max_sectors, 141 140 limits->logical_block_size >> SECTOR_SHIFT); 142 141 limits->max_sectors = max_sectors; 142 + 143 + if (check_shl_overflow(max_sectors, SECTOR_SHIFT, 144 + &limits->bio_max_bytes)) 145 + limits->bio_max_bytes = UINT_MAX; 143 146 144 147 q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); 145 148 }
+27 -19
drivers/block/rnbd/rnbd-clt.c
··· 88 88 dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); 89 89 dev->secure_discard = le16_to_cpu(rsp->secure_discard); 90 90 dev->rotational = rsp->rotational; 91 - dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); 91 + dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); 92 92 dev->fua = !!(rsp->cache_policy & RNBD_FUA); 93 93 94 94 dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; ··· 241 241 cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(cpu_q->cpu))) { 242 242 if (!spin_trylock_irqsave(&cpu_q->requeue_lock, flags)) 243 243 continue; 244 - if (unlikely(!test_bit(cpu_q->cpu, sess->cpu_queues_bm))) 244 + if (!test_bit(cpu_q->cpu, sess->cpu_queues_bm)) 245 245 goto unlock; 246 246 q = list_first_entry_or_null(&cpu_q->requeue_list, 247 247 typeof(*q), requeue_list); ··· 320 320 struct rtrs_permit *permit; 321 321 322 322 permit = rtrs_clt_get_permit(sess->rtrs, con_type, wait); 323 - if (likely(permit)) 323 + if (permit) 324 324 /* We have a subtle rare case here, when all permits can be 325 325 * consumed before busy counter increased. This is safe, 326 326 * because loser will get NULL as a permit, observe 0 busy ··· 351 351 struct rtrs_permit *permit; 352 352 353 353 iu = kzalloc(sizeof(*iu), GFP_KERNEL); 354 - if (!iu) { 354 + if (!iu) 355 355 return NULL; 356 - } 357 356 358 357 permit = rnbd_get_permit(sess, con_type, wait); 359 - if (unlikely(!permit)) { 358 + if (!permit) { 360 359 kfree(iu); 361 360 return NULL; 362 361 } ··· 691 692 return; 692 693 } 693 694 694 - rtrs_clt_query(sess->rtrs, &attrs); 695 + err = rtrs_clt_query(sess->rtrs, &attrs); 696 + if (err) { 697 + pr_err("rtrs_clt_query(\"%s\"): %d\n", sess->sessname, err); 698 + return; 699 + } 695 700 mutex_lock(&sess->lock); 696 701 sess->max_io_size = attrs.max_io_size; 697 702 ··· 808 805 mutex_init(&sess->lock); 809 806 INIT_LIST_HEAD(&sess->devs_list); 810 807 INIT_LIST_HEAD(&sess->list); 811 - bitmap_zero(sess->cpu_queues_bm, NR_CPUS); 808 + bitmap_zero(sess->cpu_queues_bm, num_possible_cpus()); 812 809 init_waitqueue_head(&sess->rtrs_waitq); 813 810 refcount_set(&sess->refcount, 1); 814 811 ··· 1050 1047 }; 1051 1048 err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit, 1052 1049 &vec, 1, size, iu->sgt.sgl, sg_cnt); 1053 - if (unlikely(err)) { 1050 + if (err) { 1054 1051 rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n", 1055 1052 err); 1056 1053 return err; ··· 1081 1078 cpu_q = get_cpu_ptr(sess->cpu_queues); 1082 1079 spin_lock_irqsave(&cpu_q->requeue_lock, flags); 1083 1080 1084 - if (likely(!test_and_set_bit_lock(0, &q->in_list))) { 1081 + if (!test_and_set_bit_lock(0, &q->in_list)) { 1085 1082 if (WARN_ON(!list_empty(&q->requeue_list))) 1086 1083 goto unlock; 1087 1084 ··· 1093 1090 */ 1094 1091 smp_mb__before_atomic(); 1095 1092 } 1096 - if (likely(atomic_read(&sess->busy))) { 1093 + if (atomic_read(&sess->busy)) { 1097 1094 list_add_tail(&q->requeue_list, &cpu_q->requeue_list); 1098 1095 } else { 1099 1096 /* Very unlikely, but possible: busy counter was ··· 1121 1118 1122 1119 if (delay != RNBD_DELAY_IFBUSY) 1123 1120 blk_mq_delay_run_hw_queue(hctx, delay); 1124 - else if (unlikely(!rnbd_clt_dev_add_to_requeue(dev, q))) 1121 + else if (!rnbd_clt_dev_add_to_requeue(dev, q)) 1125 1122 /* 1126 1123 * If session is not busy we have to restart 1127 1124 * the queue ourselves. ··· 1138 1135 int err; 1139 1136 blk_status_t ret = BLK_STS_IOERR; 1140 1137 1141 - if (unlikely(dev->dev_state != DEV_STATE_MAPPED)) 1138 + if (dev->dev_state != DEV_STATE_MAPPED) 1142 1139 return BLK_STS_IOERR; 1143 1140 1144 1141 iu->permit = rnbd_get_permit(dev->sess, RTRS_IO_CON, 1145 1142 RTRS_PERMIT_NOWAIT); 1146 - if (unlikely(!iu->permit)) { 1143 + if (!iu->permit) { 1147 1144 rnbd_clt_dev_kick_mq_queue(dev, hctx, RNBD_DELAY_IFBUSY); 1148 1145 return BLK_STS_RESOURCE; 1149 1146 } ··· 1151 1148 iu->sgt.sgl = iu->first_sgl; 1152 1149 err = sg_alloc_table_chained(&iu->sgt, 1153 1150 /* Even-if the request has no segment, 1154 - * sglist must have one entry at least */ 1151 + * sglist must have one entry at least. 1152 + */ 1155 1153 blk_rq_nr_phys_segments(rq) ? : 1, 1156 1154 iu->sgt.sgl, 1157 1155 RNBD_INLINE_SG_CNT); ··· 1165 1161 1166 1162 blk_mq_start_request(rq); 1167 1163 err = rnbd_client_xfer_request(dev, rq, iu); 1168 - if (likely(err == 0)) 1164 + if (err == 0) 1169 1165 return BLK_STS_OK; 1170 - if (unlikely(err == -EAGAIN || err == -ENOMEM)) { 1166 + if (err == -EAGAIN || err == -ENOMEM) { 1171 1167 rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); 1172 1168 ret = BLK_STS_RESOURCE; 1173 1169 } ··· 1298 1294 err = PTR_ERR(sess->rtrs); 1299 1295 goto wake_up_and_put; 1300 1296 } 1301 - rtrs_clt_query(sess->rtrs, &attrs); 1297 + 1298 + err = rtrs_clt_query(sess->rtrs, &attrs); 1299 + if (err) 1300 + goto close_rtrs; 1301 + 1302 1302 sess->max_io_size = attrs.max_io_size; 1303 1303 sess->queue_depth = attrs.queue_depth; 1304 1304 sess->nr_poll_queues = nr_poll_queues; ··· 1584 1576 struct rnbd_clt_dev *dev; 1585 1577 int ret; 1586 1578 1587 - if (unlikely(exists_devpath(pathname, sessname))) 1579 + if (exists_devpath(pathname, sessname)) 1588 1580 return ERR_PTR(-EEXIST); 1589 1581 1590 1582 sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
+1 -1
drivers/block/rnbd/rnbd-clt.h
··· 87 87 DECLARE_BITMAP(cpu_queues_bm, NR_CPUS); 88 88 int __percpu *cpu_rr; /* per-cpu var for CPU round-robin */ 89 89 atomic_t busy; 90 - int queue_depth; 90 + size_t queue_depth; 91 91 u32 max_io_size; 92 92 struct blk_mq_tag_set tag_set; 93 93 u32 nr_poll_queues;
+1 -1
drivers/block/rnbd/rnbd-srv.c
··· 104 104 105 105 rcu_read_lock(); 106 106 sess_dev = xa_load(&srv_sess->index_idr, dev_id); 107 - if (likely(sess_dev)) 107 + if (sess_dev) 108 108 ret = kref_get_unless_zero(&sess_dev->kref); 109 109 rcu_read_unlock(); 110 110
+2 -1
drivers/infiniband/ulp/rtrs/rtrs-clt.c
··· 2976 2976 2977 2977 int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index) 2978 2978 { 2979 - int cnt; 2979 + /* If no path, return -1 for block layer not to try again */ 2980 + int cnt = -1; 2980 2981 struct rtrs_con *con; 2981 2982 struct rtrs_clt_sess *sess; 2982 2983 struct path_it it;
+70 -28
drivers/nvme/host/core.c
··· 576 576 kfree(ns); 577 577 } 578 578 579 + static inline bool nvme_get_ns(struct nvme_ns *ns) 580 + { 581 + return kref_get_unless_zero(&ns->kref); 582 + } 583 + 579 584 void nvme_put_ns(struct nvme_ns *ns) 580 585 { 581 586 kref_put(&ns->kref, nvme_free_ns); ··· 589 584 590 585 static inline void nvme_clear_nvme_request(struct request *req) 591 586 { 592 - struct nvme_command *cmd = nvme_req(req)->cmd; 593 - 594 - memset(cmd, 0, sizeof(*cmd)); 595 587 nvme_req(req)->retries = 0; 596 588 nvme_req(req)->flags = 0; 597 589 req->rq_flags |= RQF_DONTPREP; ··· 638 636 nvme_init_request(req, cmd); 639 637 return req; 640 638 } 639 + 640 + /* 641 + * For something we're not in a state to send to the device the default action 642 + * is to busy it and retry it after the controller state is recovered. However, 643 + * if the controller is deleting or if anything is marked for failfast or 644 + * nvme multipath it is immediately failed. 645 + * 646 + * Note: commands used to initialize the controller will be marked for failfast. 647 + * Note: nvme cli/ioctl commands are marked for failfast. 648 + */ 649 + blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, 650 + struct request *rq) 651 + { 652 + if (ctrl->state != NVME_CTRL_DELETING_NOIO && 653 + ctrl->state != NVME_CTRL_DEAD && 654 + !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && 655 + !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) 656 + return BLK_STS_RESOURCE; 657 + return nvme_host_path_error(rq); 658 + } 659 + EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); 660 + 661 + bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 662 + bool queue_live) 663 + { 664 + struct nvme_request *req = nvme_req(rq); 665 + 666 + /* 667 + * currently we have a problem sending passthru commands 668 + * on the admin_q if the controller is not LIVE because we can't 669 + * make sure that they are going out after the admin connect, 670 + * controller enable and/or other commands in the initialization 671 + * sequence. until the controller will be LIVE, fail with 672 + * BLK_STS_RESOURCE so that they will be rescheduled. 673 + */ 674 + if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD)) 675 + return false; 676 + 677 + if (ctrl->ops->flags & NVME_F_FABRICS) { 678 + /* 679 + * Only allow commands on a live queue, except for the connect 680 + * command, which is require to set the queue live in the 681 + * appropinquate states. 682 + */ 683 + switch (ctrl->state) { 684 + case NVME_CTRL_CONNECTING: 685 + if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && 686 + req->cmd->fabrics.fctype == nvme_fabrics_type_connect) 687 + return true; 688 + break; 689 + default: 690 + break; 691 + case NVME_CTRL_DEAD: 692 + return false; 693 + } 694 + } 695 + 696 + return queue_live; 697 + } 698 + EXPORT_SYMBOL_GPL(__nvme_check_ready); 641 699 642 700 static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) 643 701 { ··· 960 898 struct nvme_command *cmd = nvme_req(req)->cmd; 961 899 blk_status_t ret = BLK_STS_OK; 962 900 963 - if (!(req->rq_flags & RQF_DONTPREP)) 901 + if (!(req->rq_flags & RQF_DONTPREP)) { 964 902 nvme_clear_nvme_request(req); 903 + memset(cmd, 0, sizeof(*cmd)); 904 + } 965 905 966 906 switch (req_op(req)) { 967 907 case REQ_OP_DRV_IN: ··· 1558 1494 /* should never be called due to GENHD_FL_HIDDEN */ 1559 1495 if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head))) 1560 1496 goto fail; 1561 - if (!kref_get_unless_zero(&ns->kref)) 1497 + if (!nvme_get_ns(ns)) 1562 1498 goto fail; 1563 1499 if (!try_module_get(ns->ctrl->ops->module)) 1564 1500 goto fail_put_ns; ··· 2062 1998 .report_zones = nvme_report_zones, 2063 1999 .pr_ops = &nvme_pr_ops, 2064 2000 }; 2065 - 2066 - #ifdef CONFIG_NVME_MULTIPATH 2067 - struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys) 2068 - { 2069 - struct nvme_ctrl *ctrl; 2070 - int ret; 2071 - 2072 - ret = mutex_lock_killable(&nvme_subsystems_lock); 2073 - if (ret) 2074 - return ERR_PTR(ret); 2075 - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 2076 - if (ctrl->state == NVME_CTRL_LIVE) 2077 - goto found; 2078 - } 2079 - mutex_unlock(&nvme_subsystems_lock); 2080 - return ERR_PTR(-EWOULDBLOCK); 2081 - found: 2082 - nvme_get_ctrl(ctrl); 2083 - mutex_unlock(&nvme_subsystems_lock); 2084 - return ctrl; 2085 - } 2086 - #endif /* CONFIG_NVME_MULTIPATH */ 2087 2001 2088 2002 static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) 2089 2003 { ··· 3646 3604 down_read(&ctrl->namespaces_rwsem); 3647 3605 list_for_each_entry(ns, &ctrl->namespaces, list) { 3648 3606 if (ns->head->ns_id == nsid) { 3649 - if (!kref_get_unless_zero(&ns->kref)) 3607 + if (!nvme_get_ns(ns)) 3650 3608 continue; 3651 3609 ret = ns; 3652 3610 break;
-57
drivers/nvme/host/fabrics.c
··· 533 533 return NULL; 534 534 } 535 535 536 - /* 537 - * For something we're not in a state to send to the device the default action 538 - * is to busy it and retry it after the controller state is recovered. However, 539 - * if the controller is deleting or if anything is marked for failfast or 540 - * nvme multipath it is immediately failed. 541 - * 542 - * Note: commands used to initialize the controller will be marked for failfast. 543 - * Note: nvme cli/ioctl commands are marked for failfast. 544 - */ 545 - blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, 546 - struct request *rq) 547 - { 548 - if (ctrl->state != NVME_CTRL_DELETING_NOIO && 549 - ctrl->state != NVME_CTRL_DEAD && 550 - !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && 551 - !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) 552 - return BLK_STS_RESOURCE; 553 - return nvme_host_path_error(rq); 554 - } 555 - EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); 556 - 557 - bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 558 - bool queue_live) 559 - { 560 - struct nvme_request *req = nvme_req(rq); 561 - 562 - /* 563 - * currently we have a problem sending passthru commands 564 - * on the admin_q if the controller is not LIVE because we can't 565 - * make sure that they are going out after the admin connect, 566 - * controller enable and/or other commands in the initialization 567 - * sequence. until the controller will be LIVE, fail with 568 - * BLK_STS_RESOURCE so that they will be rescheduled. 569 - */ 570 - if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD)) 571 - return false; 572 - 573 - /* 574 - * Only allow commands on a live queue, except for the connect command, 575 - * which is require to set the queue live in the appropinquate states. 576 - */ 577 - switch (ctrl->state) { 578 - case NVME_CTRL_CONNECTING: 579 - if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && 580 - req->cmd->fabrics.fctype == nvme_fabrics_type_connect) 581 - return true; 582 - break; 583 - default: 584 - break; 585 - case NVME_CTRL_DEAD: 586 - return false; 587 - } 588 - 589 - return queue_live; 590 - } 591 - EXPORT_SYMBOL_GPL(__nvmf_check_ready); 592 - 593 536 static const match_table_t opt_tokens = { 594 537 { NVMF_OPT_TRANSPORT, "transport=%s" }, 595 538 { NVMF_OPT_TRADDR, "traddr=%s" },
-13
drivers/nvme/host/fabrics.h
··· 184 184 void nvmf_free_options(struct nvmf_ctrl_options *opts); 185 185 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 186 186 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 187 - blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, 188 - struct request *rq); 189 - bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 190 - bool queue_live); 191 187 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, 192 188 struct nvmf_ctrl_options *opts); 193 - 194 - static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 195 - bool queue_live) 196 - { 197 - if (likely(ctrl->state == NVME_CTRL_LIVE || 198 - ctrl->state == NVME_CTRL_DELETING)) 199 - return true; 200 - return __nvmf_check_ready(ctrl, rq, queue_live); 201 - } 202 189 203 190 #endif /* _NVME_FABRICS_H */
+2 -2
drivers/nvme/host/fc.c
··· 2766 2766 blk_status_t ret; 2767 2767 2768 2768 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 2769 - !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2770 - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); 2769 + !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2770 + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); 2771 2771 2772 2772 ret = nvme_setup_cmd(ns, rq); 2773 2773 if (ret)
+41 -24
drivers/nvme/host/ioctl.c
··· 370 370 } 371 371 372 372 #ifdef CONFIG_NVME_MULTIPATH 373 - static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head, 374 - unsigned int cmd, void __user *argp) 373 + static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 374 + void __user *argp, struct nvme_ns_head *head, int srcu_idx) 375 375 { 376 - struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys); 376 + struct nvme_ctrl *ctrl = ns->ctrl; 377 377 int ret; 378 378 379 - if (IS_ERR(ctrl)) 380 - return PTR_ERR(ctrl); 381 - ret = nvme_ctrl_ioctl(ctrl, cmd, argp); 379 + nvme_get_ctrl(ns->ctrl); 380 + nvme_put_ns_from_disk(head, srcu_idx); 381 + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); 382 + 382 383 nvme_put_ctrl(ctrl); 383 - return ret; 384 - } 385 - 386 - static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head, 387 - unsigned int cmd, void __user *argp) 388 - { 389 - int srcu_idx = srcu_read_lock(&head->srcu); 390 - struct nvme_ns *ns = nvme_find_path(head); 391 - int ret = -EWOULDBLOCK; 392 - 393 - if (ns) 394 - ret = nvme_ns_ioctl(ns, cmd, argp); 395 - srcu_read_unlock(&head->srcu, srcu_idx); 396 384 return ret; 397 385 } 398 386 399 387 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, 400 388 unsigned int cmd, unsigned long arg) 401 389 { 402 - struct nvme_ns_head *head = bdev->bd_disk->private_data; 390 + struct nvme_ns_head *head = NULL; 403 391 void __user *argp = (void __user *)arg; 392 + struct nvme_ns *ns; 393 + int srcu_idx, ret; 404 394 395 + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); 396 + if (unlikely(!ns)) 397 + return -EWOULDBLOCK; 398 + 399 + /* 400 + * Handle ioctls that apply to the controller instead of the namespace 401 + * seperately and drop the ns SRCU reference early. This avoids a 402 + * deadlock when deleting namespaces using the passthrough interface. 403 + */ 405 404 if (is_ctrl_ioctl(cmd)) 406 - return nvme_ns_head_ctrl_ioctl(head, cmd, argp); 407 - return nvme_ns_head_ns_ioctl(head, cmd, argp); 405 + ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 406 + else { 407 + ret = nvme_ns_ioctl(ns, cmd, argp); 408 + nvme_put_ns_from_disk(head, srcu_idx); 409 + } 410 + 411 + return ret; 408 412 } 409 413 410 414 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, ··· 418 414 struct nvme_ns_head *head = 419 415 container_of(cdev, struct nvme_ns_head, cdev); 420 416 void __user *argp = (void __user *)arg; 417 + struct nvme_ns *ns; 418 + int srcu_idx, ret; 419 + 420 + srcu_idx = srcu_read_lock(&head->srcu); 421 + ns = nvme_find_path(head); 422 + if (!ns) { 423 + srcu_read_unlock(&head->srcu, srcu_idx); 424 + return -EWOULDBLOCK; 425 + } 421 426 422 427 if (is_ctrl_ioctl(cmd)) 423 - return nvme_ns_head_ctrl_ioctl(head, cmd, argp); 424 - return nvme_ns_head_ns_ioctl(head, cmd, argp); 428 + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); 429 + 430 + ret = nvme_ns_ioctl(ns, cmd, argp); 431 + nvme_put_ns_from_disk(head, srcu_idx); 432 + 433 + return ret; 425 434 } 426 435 #endif /* CONFIG_NVME_MULTIPATH */ 427 436
+3
drivers/nvme/host/multipath.c
··· 70 70 struct nvme_ns *ns = req->q->queuedata; 71 71 u16 status = nvme_req(req)->status & 0x7ff; 72 72 unsigned long flags; 73 + struct bio *bio; 73 74 74 75 nvme_mpath_clear_current_path(ns); 75 76 ··· 85 84 } 86 85 87 86 spin_lock_irqsave(&ns->head->requeue_lock, flags); 87 + for (bio = req->bio; bio; bio = bio->bi_next) 88 + bio_set_dev(bio, ns->head->disk->part0); 88 89 blk_steal_bios(&ns->head->requeue_list, req); 89 90 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); 90 91
+15 -1
drivers/nvme/host/nvme.h
··· 638 638 struct nvme_command *cmd, blk_mq_req_flags_t flags); 639 639 void nvme_cleanup_cmd(struct request *req); 640 640 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); 641 + blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, 642 + struct request *req); 643 + bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 644 + bool queue_live); 645 + 646 + static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 647 + bool queue_live) 648 + { 649 + if (likely(ctrl->state == NVME_CTRL_LIVE)) 650 + return true; 651 + if (ctrl->ops->flags & NVME_F_FABRICS && 652 + ctrl->state == NVME_CTRL_DELETING) 653 + return true; 654 + return __nvme_check_ready(ctrl, rq, queue_live); 655 + } 641 656 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 642 657 void *buf, unsigned bufflen); 643 658 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, ··· 679 664 void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx); 680 665 bool nvme_tryget_ns_head(struct nvme_ns_head *head); 681 666 void nvme_put_ns_head(struct nvme_ns_head *head); 682 - struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys); 683 667 int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, 684 668 const struct file_operations *fops, struct module *owner); 685 669 void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
+3
drivers/nvme/host/pci.c
··· 933 933 if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) 934 934 return BLK_STS_IOERR; 935 935 936 + if (!nvme_check_ready(&dev->ctrl, req, true)) 937 + return nvme_fail_nonready_command(&dev->ctrl, req); 938 + 936 939 ret = nvme_setup_cmd(ns, req); 937 940 if (ret) 938 941 return ret;
+2 -2
drivers/nvme/host/rdma.c
··· 2050 2050 2051 2051 WARN_ON_ONCE(rq->tag < 0); 2052 2052 2053 - if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2054 - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); 2053 + if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2054 + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); 2055 2055 2056 2056 dev = queue->device->dev; 2057 2057
+2 -2
drivers/nvme/host/tcp.c
··· 2338 2338 bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags); 2339 2339 blk_status_t ret; 2340 2340 2341 - if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2342 - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); 2341 + if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2342 + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); 2343 2343 2344 2344 ret = nvme_tcp_setup_cmd_pdu(ns, rq); 2345 2345 if (unlikely(ret))
+3 -3
drivers/nvme/target/admin-cmd.c
··· 307 307 case NVME_LOG_ANA: 308 308 return nvmet_execute_get_log_page_ana(req); 309 309 } 310 - pr_err("unhandled lid %d on qid %d\n", 310 + pr_debug("unhandled lid %d on qid %d\n", 311 311 req->cmd->get_log_page.lid, req->sq->qid); 312 312 req->error_loc = offsetof(struct nvme_get_log_page_command, lid); 313 313 nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); ··· 659 659 return nvmet_execute_identify_desclist(req); 660 660 } 661 661 662 - pr_err("unhandled identify cns %d on qid %d\n", 662 + pr_debug("unhandled identify cns %d on qid %d\n", 663 663 req->cmd->identify.cns, req->sq->qid); 664 664 req->error_loc = offsetof(struct nvme_identify, cns); 665 665 nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); ··· 977 977 return 0; 978 978 } 979 979 980 - pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, 980 + pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode, 981 981 req->sq->qid); 982 982 req->error_loc = offsetof(struct nvme_common_command, opcode); 983 983 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+2 -2
drivers/nvme/target/loop.c
··· 138 138 bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags); 139 139 blk_status_t ret; 140 140 141 - if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) 142 - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req); 141 + if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready)) 142 + return nvme_fail_nonready_command(&queue->ctrl->ctrl, req); 143 143 144 144 ret = nvme_setup_cmd(ns, req); 145 145 if (ret)
+4 -4
drivers/s390/block/dasd_eckd.h
··· 52 52 #define DASD_ECKD_CCW_RCD 0xFA 53 53 #define DASD_ECKD_CCW_DSO 0xF7 54 54 55 - /* Define Subssystem Function / Orders */ 55 + /* Define Subsystem Function / Orders */ 56 56 #define DSO_ORDER_RAS 0x81 57 57 58 58 /* ··· 110 110 #define DASD_ECKD_PG_GROUPED 0x10 111 111 112 112 /* 113 - * Size that is reportet for large volumes in the old 16-bit no_cyl field 113 + * Size that is reported for large volumes in the old 16-bit no_cyl field 114 114 */ 115 115 #define LV_COMPAT_CYL 0xFFFE 116 116 ··· 555 555 } __packed; 556 556 557 557 /* 558 - * Define Subsytem Operation - Release Allocated Space 558 + * Define Subsystem Operation - Release Allocated Space 559 559 */ 560 560 struct dasd_dso_ras_data { 561 561 __u8 order; ··· 676 676 struct dasd_ext_pool_sum eps; 677 677 u32 real_cyl; 678 678 679 - /* alias managemnet */ 679 + /* alias management */ 680 680 struct dasd_uid uid; 681 681 struct alias_pav_group *pavgroup; 682 682 struct alias_lcu *lcu;
+17 -3
fs/block_dev.c
··· 1677 1677 struct inode *bd_inode = bdev_file_inode(file); 1678 1678 loff_t size = i_size_read(bd_inode); 1679 1679 struct blk_plug plug; 1680 + size_t shorted = 0; 1680 1681 ssize_t ret; 1681 1682 1682 1683 if (bdev_read_only(I_BDEV(bd_inode))) ··· 1695 1694 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) 1696 1695 return -EOPNOTSUPP; 1697 1696 1698 - iov_iter_truncate(from, size - iocb->ki_pos); 1697 + size -= iocb->ki_pos; 1698 + if (iov_iter_count(from) > size) { 1699 + shorted = iov_iter_count(from) - size; 1700 + iov_iter_truncate(from, size); 1701 + } 1699 1702 1700 1703 blk_start_plug(&plug); 1701 1704 ret = __generic_file_write_iter(iocb, from); 1702 1705 if (ret > 0) 1703 1706 ret = generic_write_sync(iocb, ret); 1707 + iov_iter_reexpand(from, iov_iter_count(from) + shorted); 1704 1708 blk_finish_plug(&plug); 1705 1709 return ret; 1706 1710 } ··· 1717 1711 struct inode *bd_inode = bdev_file_inode(file); 1718 1712 loff_t size = i_size_read(bd_inode); 1719 1713 loff_t pos = iocb->ki_pos; 1714 + size_t shorted = 0; 1715 + ssize_t ret; 1720 1716 1721 1717 if (pos >= size) 1722 1718 return 0; 1723 1719 1724 1720 size -= pos; 1725 - iov_iter_truncate(to, size); 1726 - return generic_file_read_iter(iocb, to); 1721 + if (iov_iter_count(to) > size) { 1722 + shorted = iov_iter_count(to) - size; 1723 + iov_iter_truncate(to, size); 1724 + } 1725 + 1726 + ret = generic_file_read_iter(iocb, to); 1727 + iov_iter_reexpand(to, iov_iter_count(to) + shorted); 1728 + return ret; 1727 1729 } 1728 1730 EXPORT_SYMBOL_GPL(blkdev_read_iter); 1729 1731
+3 -1
include/linux/bio.h
··· 106 106 return NULL; 107 107 } 108 108 109 + extern unsigned int bio_max_size(struct bio *bio); 110 + 109 111 /** 110 112 * bio_full - check if the bio is full 111 113 * @bio: bio to check ··· 121 119 if (bio->bi_vcnt >= bio->bi_max_vecs) 122 120 return true; 123 121 124 - if (bio->bi_iter.bi_size > UINT_MAX - len) 122 + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) 125 123 return true; 126 124 127 125 return false;
+2
include/linux/blkdev.h
··· 326 326 }; 327 327 328 328 struct queue_limits { 329 + unsigned int bio_max_bytes; 330 + 329 331 enum blk_bounce bounce; 330 332 unsigned long seg_boundary_mask; 331 333 unsigned long virt_boundary_mask;