Merge tag 'for-linus-20180623' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- Further timeout fixes. We aren't quite there yet, so expect another
round of fixes for that to completely close some of the IRQ vs
completion races. (Christoph/Bart)

- Set of NVMe fixes from the usual suspects, mostly error handling

- Two off-by-one fixes (Dan)

- Another bdi race fix (Jan)

- Fix nbd reconfigure with NBD_DISCONNECT_ON_CLOSE (Doron)

* tag 'for-linus-20180623' of git://git.kernel.dk/linux-block:
blk-mq: Fix timeout handling in case the timeout handler returns BLK_EH_DONE
bdi: Fix another oops in wb_workfn()
lightnvm: Remove depends on HAS_DMA in case of platform dependency
nvme-pci: limit max IO size and segments to avoid high order allocations
nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl
nvme-fc: release io queues to allow fast fail
nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.
block: sed-opal: Fix a couple off by one bugs
blk-mq-debugfs: Off by one in blk_mq_rq_state_name()
nvmet: reset keep alive timer in controller enable
nvme-rdma: don't override opts->queue_size
nvme-rdma: Fix command completion race at error recovery
nvme-rdma: fix possible free of a non-allocated async event buffer
nvme-rdma: fix possible double free condition when failing to create a controller
Revert "block: Add warning for bi_next not NULL in bio_endio()"
block: fix timeout changes for legacy request drivers

+142 -84
-3
block/bio.c
··· 1807 if (!bio_integrity_endio(bio)) 1808 return; 1809 1810 - if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL")) 1811 - bio->bi_next = NULL; 1812 - 1813 /* 1814 * Need to have a real endio function for chained bios, otherwise 1815 * various corner cases will break (like stacking block devices that
··· 1807 if (!bio_integrity_endio(bio)) 1808 return; 1809 1810 /* 1811 * Need to have a real endio function for chained bios, otherwise 1812 * various corner cases will break (like stacking block devices that
+1 -7
block/blk-core.c
··· 273 bio_advance(bio, nbytes); 274 275 /* don't actually finish bio if it's part of flush sequence */ 276 - /* 277 - * XXX this code looks suspicious - it's not consistent with advancing 278 - * req->bio in caller 279 - */ 280 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) 281 bio_endio(bio); 282 } ··· 3077 struct bio *bio = req->bio; 3078 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); 3079 3080 - if (bio_bytes == bio->bi_iter.bi_size) { 3081 req->bio = bio->bi_next; 3082 - bio->bi_next = NULL; 3083 - } 3084 3085 /* Completion has already been traced */ 3086 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
··· 273 bio_advance(bio, nbytes); 274 275 /* don't actually finish bio if it's part of flush sequence */ 276 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) 277 bio_endio(bio); 278 } ··· 3081 struct bio *bio = req->bio; 3082 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); 3083 3084 + if (bio_bytes == bio->bi_iter.bi_size) 3085 req->bio = bio->bi_next; 3086 3087 /* Completion has already been traced */ 3088 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+1 -1
block/blk-mq-debugfs.c
··· 356 357 static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state) 358 { 359 - if (WARN_ON_ONCE((unsigned int)rq_state > 360 ARRAY_SIZE(blk_mq_rq_state_name_array))) 361 return "(?)"; 362 return blk_mq_rq_state_name_array[rq_state];
··· 356 357 static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state) 358 { 359 + if (WARN_ON_ONCE((unsigned int)rq_state >= 360 ARRAY_SIZE(blk_mq_rq_state_name_array))) 361 return "(?)"; 362 return blk_mq_rq_state_name_array[rq_state];
-1
block/blk-mq.c
··· 781 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); 782 } 783 784 - req->rq_flags &= ~RQF_TIMED_OUT; 785 blk_add_timer(req); 786 } 787
··· 781 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); 782 } 783 784 blk_add_timer(req); 785 } 786
+1
block/blk-softirq.c
··· 144 145 local_irq_restore(flags); 146 } 147 148 /** 149 * blk_complete_request - end I/O on a request
··· 144 145 local_irq_restore(flags); 146 } 147 + EXPORT_SYMBOL(__blk_complete_request); 148 149 /** 150 * blk_complete_request - end I/O on a request
+1
block/blk-timeout.c
··· 210 if (!req->timeout) 211 req->timeout = q->rq_timeout; 212 213 blk_rq_set_deadline(req, jiffies + req->timeout); 214 215 /*
··· 210 if (!req->timeout) 211 req->timeout = q->rq_timeout; 212 213 + req->rq_flags &= ~RQF_TIMED_OUT; 214 blk_rq_set_deadline(req, jiffies + req->timeout); 215 216 /*
+2 -2
block/sed-opal.c
··· 877 return 0; 878 } 879 880 - if (n > resp->num) { 881 pr_debug("Response has %d tokens. Can't access %d\n", 882 resp->num, n); 883 return 0; ··· 916 return 0; 917 } 918 919 - if (n > resp->num) { 920 pr_debug("Response has %d tokens. Can't access %d\n", 921 resp->num, n); 922 return 0;
··· 877 return 0; 878 } 879 880 + if (n >= resp->num) { 881 pr_debug("Response has %d tokens. Can't access %d\n", 882 resp->num, n); 883 return 0; ··· 916 return 0; 917 } 918 919 + if (n >= resp->num) { 920 pr_debug("Response has %d tokens. Can't access %d\n", 921 resp->num, n); 922 return 0;
+34 -8
drivers/block/nbd.c
··· 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 80 struct nbd_config { 81 u32 flags; ··· 139 static void nbd_connect_reply(struct genl_info *info, int index); 140 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 141 static void nbd_dead_link_work(struct work_struct *work); 142 143 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 144 { ··· 1307 static void nbd_release(struct gendisk *disk, fmode_t mode) 1308 { 1309 struct nbd_device *nbd = disk->private_data; 1310 nbd_config_put(nbd); 1311 nbd_put(nbd); 1312 } ··· 1713 &config->runtime_flags); 1714 put_dev = true; 1715 } 1716 } 1717 1718 if (info->attrs[NBD_ATTR_SOCKETS]) { ··· 1761 return ret; 1762 } 1763 1764 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1765 { 1766 struct nbd_device *nbd; ··· 1804 nbd_put(nbd); 1805 return 0; 1806 } 1807 - mutex_lock(&nbd->config_lock); 1808 - nbd_disconnect(nbd); 1809 - nbd_clear_sock(nbd); 1810 - mutex_unlock(&nbd->config_lock); 1811 - if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1812 - &nbd->config->runtime_flags)) 1813 - nbd_config_put(nbd); 1814 nbd_config_put(nbd); 1815 nbd_put(nbd); 1816 return 0; ··· 1815 struct nbd_device *nbd = NULL; 1816 struct nbd_config *config; 1817 int index; 1818 - int ret = -EINVAL; 1819 bool put_dev = false; 1820 1821 if (!netlink_capable(skb, CAP_SYS_ADMIN)) ··· 1855 !nbd->task_recv) { 1856 dev_err(nbd_to_dev(nbd), 1857 "not configured, cannot reconfigure\n"); 1858 goto out; 1859 } 1860 ··· 1879 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1880 &config->runtime_flags)) 1881 refcount_inc(&nbd->refs); 1882 } 1883 } 1884
··· 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 + #define NBD_DISCONNECT_ON_CLOSE 7 80 81 struct nbd_config { 82 u32 flags; ··· 138 static void nbd_connect_reply(struct genl_info *info, int index); 139 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 140 static void nbd_dead_link_work(struct work_struct *work); 141 + static void nbd_disconnect_and_put(struct nbd_device *nbd); 142 143 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 144 { ··· 1305 static void nbd_release(struct gendisk *disk, fmode_t mode) 1306 { 1307 struct nbd_device *nbd = disk->private_data; 1308 + struct block_device *bdev = bdget_disk(disk, 0); 1309 + 1310 + if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1311 + bdev->bd_openers == 0) 1312 + nbd_disconnect_and_put(nbd); 1313 + 1314 nbd_config_put(nbd); 1315 nbd_put(nbd); 1316 } ··· 1705 &config->runtime_flags); 1706 put_dev = true; 1707 } 1708 + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1709 + set_bit(NBD_DISCONNECT_ON_CLOSE, 1710 + &config->runtime_flags); 1711 + } 1712 } 1713 1714 if (info->attrs[NBD_ATTR_SOCKETS]) { ··· 1749 return ret; 1750 } 1751 1752 + static void nbd_disconnect_and_put(struct nbd_device *nbd) 1753 + { 1754 + mutex_lock(&nbd->config_lock); 1755 + nbd_disconnect(nbd); 1756 + nbd_clear_sock(nbd); 1757 + mutex_unlock(&nbd->config_lock); 1758 + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1759 + &nbd->config->runtime_flags)) 1760 + nbd_config_put(nbd); 1761 + } 1762 + 1763 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1764 { 1765 struct nbd_device *nbd; ··· 1781 nbd_put(nbd); 1782 return 0; 1783 } 1784 + nbd_disconnect_and_put(nbd); 1785 nbd_config_put(nbd); 1786 nbd_put(nbd); 1787 return 0; ··· 1798 struct nbd_device *nbd = NULL; 1799 struct nbd_config *config; 1800 int index; 1801 + int ret = 0; 1802 bool put_dev = false; 1803 1804 if (!netlink_capable(skb, CAP_SYS_ADMIN)) ··· 1838 !nbd->task_recv) { 1839 dev_err(nbd_to_dev(nbd), 1840 "not configured, cannot reconfigure\n"); 1841 + ret = -EINVAL; 1842 goto out; 1843 } 1844 ··· 1861 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1862 &config->runtime_flags)) 1863 refcount_inc(&nbd->refs); 1864 + } 1865 + 1866 + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1867 + set_bit(NBD_DISCONNECT_ON_CLOSE, 1868 + &config->runtime_flags); 1869 + } else { 1870 + clear_bit(NBD_DISCONNECT_ON_CLOSE, 1871 + &config->runtime_flags); 1872 } 1873 } 1874
+1 -1
drivers/block/null_blk.c
··· 1365 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) 1366 { 1367 pr_info("null: rq %p timed out\n", rq); 1368 - blk_mq_complete_request(rq); 1369 return BLK_EH_DONE; 1370 } 1371
··· 1365 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) 1366 { 1367 pr_info("null: rq %p timed out\n", rq); 1368 + __blk_complete_request(rq); 1369 return BLK_EH_DONE; 1370 } 1371
+1 -1
drivers/lightnvm/Kconfig
··· 4 5 menuconfig NVM 6 bool "Open-Channel SSD target support" 7 - depends on BLOCK && HAS_DMA && PCI 8 select BLK_DEV_NVME 9 help 10 Say Y here to get to enable Open-channel SSDs.
··· 4 5 menuconfig NVM 6 bool "Open-Channel SSD target support" 7 + depends on BLOCK && PCI 8 select BLK_DEV_NVME 9 help 10 Say Y here to get to enable Open-channel SSDs.
+1
drivers/nvme/host/core.c
··· 1808 u32 max_segments = 1809 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; 1810 1811 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1812 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1813 }
··· 1808 u32 max_segments = 1809 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; 1810 1811 + max_segments = min_not_zero(max_segments, ctrl->max_segments); 1812 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1813 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1814 }
+3 -3
drivers/nvme/host/fc.c
··· 2790 /* re-enable the admin_q so anything new can fast fail */ 2791 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2792 2793 nvme_fc_ctlr_inactive_on_rport(ctrl); 2794 } 2795 ··· 2807 * waiting for io to terminate 2808 */ 2809 nvme_fc_delete_association(ctrl); 2810 - 2811 - /* resume the io queues so that things will fast fail */ 2812 - nvme_start_queues(nctrl); 2813 } 2814 2815 static void
··· 2790 /* re-enable the admin_q so anything new can fast fail */ 2791 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2792 2793 + /* resume the io queues so that things will fast fail */ 2794 + nvme_start_queues(&ctrl->ctrl); 2795 + 2796 nvme_fc_ctlr_inactive_on_rport(ctrl); 2797 } 2798 ··· 2804 * waiting for io to terminate 2805 */ 2806 nvme_fc_delete_association(ctrl); 2807 } 2808 2809 static void
+1
drivers/nvme/host/nvme.h
··· 170 u64 cap; 171 u32 page_size; 172 u32 max_hw_sectors; 173 u16 oncs; 174 u16 oacs; 175 u16 nssa;
··· 170 u64 cap; 171 u32 page_size; 172 u32 max_hw_sectors; 173 + u32 max_segments; 174 u16 oncs; 175 u16 oacs; 176 u16 nssa;
+38 -6
drivers/nvme/host/pci.c
··· 38 39 #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 40 41 static int use_threaded_interrupts; 42 module_param(use_threaded_interrupts, int, 0); 43 ··· 106 u32 cmbloc; 107 struct nvme_ctrl ctrl; 108 struct completion ioq_wait; 109 110 /* shadow doorbell buffer support: */ 111 u32 *dbbuf_dbs; ··· 486 iod->use_sgl = nvme_pci_use_sgls(dev, rq); 487 488 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 489 - size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, 490 - iod->use_sgl); 491 - 492 - iod->sg = kmalloc(alloc_size, GFP_ATOMIC); 493 if (!iod->sg) 494 return BLK_STS_RESOURCE; 495 } else { ··· 532 } 533 534 if (iod->sg != iod->inline_sg) 535 - kfree(iod->sg); 536 } 537 538 #ifdef CONFIG_BLK_DEV_INTEGRITY ··· 2286 blk_put_queue(dev->ctrl.admin_q); 2287 kfree(dev->queues); 2288 free_opal_dev(dev->ctrl.opal_dev); 2289 kfree(dev); 2290 } 2291 ··· 2296 2297 nvme_get_ctrl(&dev->ctrl); 2298 nvme_dev_disable(dev, false); 2299 if (!queue_work(nvme_wq, &dev->remove_work)) 2300 nvme_put_ctrl(&dev->ctrl); 2301 } ··· 2340 result = nvme_alloc_admin_tags(dev); 2341 if (result) 2342 goto out; 2343 2344 result = nvme_init_identify(&dev->ctrl); 2345 if (result) ··· 2420 struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); 2421 struct pci_dev *pdev = to_pci_dev(dev->dev); 2422 2423 - nvme_kill_queues(&dev->ctrl); 2424 if (pci_get_drvdata(pdev)) 2425 device_release_driver(&pdev->dev); 2426 nvme_put_ctrl(&dev->ctrl); ··· 2523 int node, result = -ENOMEM; 2524 struct nvme_dev *dev; 2525 unsigned long quirks = id->driver_data; 2526 2527 node = dev_to_node(&pdev->dev); 2528 if (node == NUMA_NO_NODE) ··· 2560 quirks); 2561 if (result) 2562 goto release_pools; 2563 2564 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 2565
··· 38 39 #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 40 41 + /* 42 + * These can be higher, but we need to ensure that any command doesn't 43 + * require an sg allocation that needs more than a page of data. 44 + */ 45 + #define NVME_MAX_KB_SZ 4096 46 + #define NVME_MAX_SEGS 127 47 + 48 static int use_threaded_interrupts; 49 module_param(use_threaded_interrupts, int, 0); 50 ··· 99 u32 cmbloc; 100 struct nvme_ctrl ctrl; 101 struct completion ioq_wait; 102 + 103 + mempool_t *iod_mempool; 104 105 /* shadow doorbell buffer support: */ 106 u32 *dbbuf_dbs; ··· 477 iod->use_sgl = nvme_pci_use_sgls(dev, rq); 478 479 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 480 + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); 481 if (!iod->sg) 482 return BLK_STS_RESOURCE; 483 } else { ··· 526 } 527 528 if (iod->sg != iod->inline_sg) 529 + mempool_free(iod->sg, dev->iod_mempool); 530 } 531 532 #ifdef CONFIG_BLK_DEV_INTEGRITY ··· 2280 blk_put_queue(dev->ctrl.admin_q); 2281 kfree(dev->queues); 2282 free_opal_dev(dev->ctrl.opal_dev); 2283 + mempool_destroy(dev->iod_mempool); 2284 kfree(dev); 2285 } 2286 ··· 2289 2290 nvme_get_ctrl(&dev->ctrl); 2291 nvme_dev_disable(dev, false); 2292 + nvme_kill_queues(&dev->ctrl); 2293 if (!queue_work(nvme_wq, &dev->remove_work)) 2294 nvme_put_ctrl(&dev->ctrl); 2295 } ··· 2332 result = nvme_alloc_admin_tags(dev); 2333 if (result) 2334 goto out; 2335 + 2336 + /* 2337 + * Limit the max command size to prevent iod->sg allocations going 2338 + * over a single page. 2339 + */ 2340 + dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; 2341 + dev->ctrl.max_segments = NVME_MAX_SEGS; 2342 2343 result = nvme_init_identify(&dev->ctrl); 2344 if (result) ··· 2405 struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); 2406 struct pci_dev *pdev = to_pci_dev(dev->dev); 2407 2408 if (pci_get_drvdata(pdev)) 2409 device_release_driver(&pdev->dev); 2410 nvme_put_ctrl(&dev->ctrl); ··· 2509 int node, result = -ENOMEM; 2510 struct nvme_dev *dev; 2511 unsigned long quirks = id->driver_data; 2512 + size_t alloc_size; 2513 2514 node = dev_to_node(&pdev->dev); 2515 if (node == NUMA_NO_NODE) ··· 2545 quirks); 2546 if (result) 2547 goto release_pools; 2548 + 2549 + /* 2550 + * Double check that our mempool alloc size will cover the biggest 2551 + * command we support. 2552 + */ 2553 + alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, 2554 + NVME_MAX_SEGS, true); 2555 + WARN_ON_ONCE(alloc_size > PAGE_SIZE); 2556 + 2557 + dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, 2558 + mempool_kfree, 2559 + (void *) alloc_size, 2560 + GFP_KERNEL, node); 2561 + if (!dev->iod_mempool) { 2562 + result = -ENOMEM; 2563 + goto release_pools; 2564 + } 2565 2566 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 2567
+37 -36
drivers/nvme/host/rdma.c
··· 560 if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 561 return; 562 563 - if (nvme_rdma_queue_idx(queue) == 0) { 564 - nvme_rdma_free_qe(queue->device->dev, 565 - &queue->ctrl->async_event_sqe, 566 - sizeof(struct nvme_command), DMA_TO_DEVICE); 567 - } 568 - 569 nvme_rdma_destroy_queue_ib(queue); 570 rdma_destroy_id(queue->cm_id); 571 } ··· 692 set = &ctrl->tag_set; 693 memset(set, 0, sizeof(*set)); 694 set->ops = &nvme_rdma_mq_ops; 695 - set->queue_depth = nctrl->opts->queue_size; 696 set->reserved_tags = 1; /* fabric connect */ 697 set->numa_node = NUMA_NO_NODE; 698 set->flags = BLK_MQ_F_SHOULD_MERGE; ··· 728 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, 729 bool remove) 730 { 731 - nvme_rdma_stop_queue(&ctrl->queues[0]); 732 if (remove) { 733 blk_cleanup_queue(ctrl->ctrl.admin_q); 734 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 735 } 736 nvme_rdma_free_queue(&ctrl->queues[0]); 737 } 738 ··· 750 751 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); 752 753 if (new) { 754 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); 755 if (IS_ERR(ctrl->ctrl.admin_tagset)) { 756 error = PTR_ERR(ctrl->ctrl.admin_tagset); 757 - goto out_free_queue; 758 } 759 760 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); ··· 795 if (error) 796 goto out_stop_queue; 797 798 - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, 799 - &ctrl->async_event_sqe, sizeof(struct nvme_command), 800 - DMA_TO_DEVICE); 801 - if (error) 802 - goto out_stop_queue; 803 - 804 return 0; 805 806 out_stop_queue: ··· 805 out_free_tagset: 806 if (new) 807 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 808 out_free_queue: 809 nvme_rdma_free_queue(&ctrl->queues[0]); 810 return error; ··· 816 static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, 817 bool remove) 818 { 819 - nvme_rdma_stop_io_queues(ctrl); 820 if (remove) { 821 blk_cleanup_queue(ctrl->ctrl.connect_q); 822 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); ··· 884 list_del(&ctrl->list); 885 mutex_unlock(&nvme_rdma_ctrl_mutex); 886 887 - kfree(ctrl->queues); 888 nvmf_free_options(nctrl->opts); 889 free_ctrl: 890 kfree(ctrl); 891 } 892 ··· 945 return; 946 947 destroy_admin: 948 nvme_rdma_destroy_admin_queue(ctrl, false); 949 requeue: 950 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ··· 962 963 if (ctrl->ctrl.queue_count > 1) { 964 nvme_stop_queues(&ctrl->ctrl); 965 blk_mq_tagset_busy_iter(&ctrl->tag_set, 966 nvme_cancel_request, &ctrl->ctrl); 967 nvme_rdma_destroy_io_queues(ctrl, false); 968 } 969 970 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 971 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 972 nvme_cancel_request, &ctrl->ctrl); 973 nvme_rdma_destroy_admin_queue(ctrl, false); ··· 1735 { 1736 if (ctrl->ctrl.queue_count > 1) { 1737 nvme_stop_queues(&ctrl->ctrl); 1738 blk_mq_tagset_busy_iter(&ctrl->tag_set, 1739 nvme_cancel_request, &ctrl->ctrl); 1740 nvme_rdma_destroy_io_queues(ctrl, shutdown); ··· 1747 nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 1748 1749 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 1750 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 1751 nvme_cancel_request, &ctrl->ctrl); 1752 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ··· 1933 goto out_free_ctrl; 1934 } 1935 1936 - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 1937 - 0 /* no quirks, we're perfect! */); 1938 - if (ret) 1939 - goto out_free_ctrl; 1940 - 1941 INIT_DELAYED_WORK(&ctrl->reconnect_work, 1942 nvme_rdma_reconnect_ctrl_work); 1943 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); ··· 1946 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), 1947 GFP_KERNEL); 1948 if (!ctrl->queues) 1949 - goto out_uninit_ctrl; 1950 1951 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); 1952 WARN_ON_ONCE(!changed); 1953 1954 ret = nvme_rdma_configure_admin_queue(ctrl, true); 1955 if (ret) 1956 - goto out_kfree_queues; 1957 1958 /* sanity check icdoff */ 1959 if (ctrl->ctrl.icdoff) { ··· 1975 goto out_remove_admin_queue; 1976 } 1977 1978 - if (opts->queue_size > ctrl->ctrl.maxcmd) { 1979 - /* warn if maxcmd is lower than queue_size */ 1980 - dev_warn(ctrl->ctrl.device, 1981 - "queue_size %zu > ctrl maxcmd %u, clamping down\n", 1982 - opts->queue_size, ctrl->ctrl.maxcmd); 1983 - opts->queue_size = ctrl->ctrl.maxcmd; 1984 - } 1985 - 1986 if (opts->queue_size > ctrl->ctrl.sqsize + 1) { 1987 - /* warn if sqsize is lower than queue_size */ 1988 dev_warn(ctrl->ctrl.device, 1989 "queue_size %zu > ctrl sqsize %u, clamping down\n", 1990 opts->queue_size, ctrl->ctrl.sqsize + 1); 1991 - opts->queue_size = ctrl->ctrl.sqsize + 1; 1992 } 1993 1994 if (opts->nr_io_queues) { ··· 2013 return &ctrl->ctrl; 2014 2015 out_remove_admin_queue: 2016 nvme_rdma_destroy_admin_queue(ctrl, true); 2017 - out_kfree_queues: 2018 - kfree(ctrl->queues); 2019 out_uninit_ctrl: 2020 nvme_uninit_ctrl(&ctrl->ctrl); 2021 nvme_put_ctrl(&ctrl->ctrl); 2022 if (ret > 0) 2023 ret = -EIO; 2024 return ERR_PTR(ret); 2025 out_free_ctrl: 2026 kfree(ctrl); 2027 return ERR_PTR(ret);
··· 560 if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 561 return; 562 563 nvme_rdma_destroy_queue_ib(queue); 564 rdma_destroy_id(queue->cm_id); 565 } ··· 698 set = &ctrl->tag_set; 699 memset(set, 0, sizeof(*set)); 700 set->ops = &nvme_rdma_mq_ops; 701 + set->queue_depth = nctrl->sqsize + 1; 702 set->reserved_tags = 1; /* fabric connect */ 703 set->numa_node = NUMA_NO_NODE; 704 set->flags = BLK_MQ_F_SHOULD_MERGE; ··· 734 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, 735 bool remove) 736 { 737 if (remove) { 738 blk_cleanup_queue(ctrl->ctrl.admin_q); 739 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 740 } 741 + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 742 + sizeof(struct nvme_command), DMA_TO_DEVICE); 743 nvme_rdma_free_queue(&ctrl->queues[0]); 744 } 745 ··· 755 756 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); 757 758 + error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, 759 + sizeof(struct nvme_command), DMA_TO_DEVICE); 760 + if (error) 761 + goto out_free_queue; 762 + 763 if (new) { 764 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); 765 if (IS_ERR(ctrl->ctrl.admin_tagset)) { 766 error = PTR_ERR(ctrl->ctrl.admin_tagset); 767 + goto out_free_async_qe; 768 } 769 770 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); ··· 795 if (error) 796 goto out_stop_queue; 797 798 return 0; 799 800 out_stop_queue: ··· 811 out_free_tagset: 812 if (new) 813 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 814 + out_free_async_qe: 815 + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 816 + sizeof(struct nvme_command), DMA_TO_DEVICE); 817 out_free_queue: 818 nvme_rdma_free_queue(&ctrl->queues[0]); 819 return error; ··· 819 static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, 820 bool remove) 821 { 822 if (remove) { 823 blk_cleanup_queue(ctrl->ctrl.connect_q); 824 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); ··· 888 list_del(&ctrl->list); 889 mutex_unlock(&nvme_rdma_ctrl_mutex); 890 891 nvmf_free_options(nctrl->opts); 892 free_ctrl: 893 + kfree(ctrl->queues); 894 kfree(ctrl); 895 } 896 ··· 949 return; 950 951 destroy_admin: 952 + nvme_rdma_stop_queue(&ctrl->queues[0]); 953 nvme_rdma_destroy_admin_queue(ctrl, false); 954 requeue: 955 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ··· 965 966 if (ctrl->ctrl.queue_count > 1) { 967 nvme_stop_queues(&ctrl->ctrl); 968 + nvme_rdma_stop_io_queues(ctrl); 969 blk_mq_tagset_busy_iter(&ctrl->tag_set, 970 nvme_cancel_request, &ctrl->ctrl); 971 nvme_rdma_destroy_io_queues(ctrl, false); 972 } 973 974 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 975 + nvme_rdma_stop_queue(&ctrl->queues[0]); 976 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 977 nvme_cancel_request, &ctrl->ctrl); 978 nvme_rdma_destroy_admin_queue(ctrl, false); ··· 1736 { 1737 if (ctrl->ctrl.queue_count > 1) { 1738 nvme_stop_queues(&ctrl->ctrl); 1739 + nvme_rdma_stop_io_queues(ctrl); 1740 blk_mq_tagset_busy_iter(&ctrl->tag_set, 1741 nvme_cancel_request, &ctrl->ctrl); 1742 nvme_rdma_destroy_io_queues(ctrl, shutdown); ··· 1747 nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 1748 1749 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 1750 + nvme_rdma_stop_queue(&ctrl->queues[0]); 1751 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 1752 nvme_cancel_request, &ctrl->ctrl); 1753 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ··· 1932 goto out_free_ctrl; 1933 } 1934 1935 INIT_DELAYED_WORK(&ctrl->reconnect_work, 1936 nvme_rdma_reconnect_ctrl_work); 1937 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); ··· 1950 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), 1951 GFP_KERNEL); 1952 if (!ctrl->queues) 1953 + goto out_free_ctrl; 1954 + 1955 + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 1956 + 0 /* no quirks, we're perfect! */); 1957 + if (ret) 1958 + goto out_kfree_queues; 1959 1960 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); 1961 WARN_ON_ONCE(!changed); 1962 1963 ret = nvme_rdma_configure_admin_queue(ctrl, true); 1964 if (ret) 1965 + goto out_uninit_ctrl; 1966 1967 /* sanity check icdoff */ 1968 if (ctrl->ctrl.icdoff) { ··· 1974 goto out_remove_admin_queue; 1975 } 1976 1977 + /* only warn if argument is too large here, will clamp later */ 1978 if (opts->queue_size > ctrl->ctrl.sqsize + 1) { 1979 dev_warn(ctrl->ctrl.device, 1980 "queue_size %zu > ctrl sqsize %u, clamping down\n", 1981 opts->queue_size, ctrl->ctrl.sqsize + 1); 1982 + } 1983 + 1984 + /* warn if maxcmd is lower than sqsize+1 */ 1985 + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { 1986 + dev_warn(ctrl->ctrl.device, 1987 + "sqsize %u > ctrl maxcmd %u, clamping down\n", 1988 + ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); 1989 + ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; 1990 } 1991 1992 if (opts->nr_io_queues) { ··· 2013 return &ctrl->ctrl; 2014 2015 out_remove_admin_queue: 2016 + nvme_rdma_stop_queue(&ctrl->queues[0]); 2017 nvme_rdma_destroy_admin_queue(ctrl, true); 2018 out_uninit_ctrl: 2019 nvme_uninit_ctrl(&ctrl->ctrl); 2020 nvme_put_ctrl(&ctrl->ctrl); 2021 if (ret > 0) 2022 ret = -EIO; 2023 return ERR_PTR(ret); 2024 + out_kfree_queues: 2025 + kfree(ctrl->queues); 2026 out_free_ctrl: 2027 kfree(ctrl); 2028 return ERR_PTR(ret);
+8
drivers/nvme/target/core.c
··· 686 } 687 688 ctrl->csts = NVME_CSTS_RDY; 689 } 690 691 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
··· 686 } 687 688 ctrl->csts = NVME_CSTS_RDY; 689 + 690 + /* 691 + * Controllers that are not yet enabled should not really enforce the 692 + * keep alive timeout, but we still want to track a timeout and cleanup 693 + * in case a host died before it enabled the controller. Hence, simply 694 + * reset the keep alive timer when the controller is enabled. 695 + */ 696 + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 697 } 698 699 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
+1 -1
drivers/scsi/scsi_transport_fc.c
··· 3592 3593 /* the blk_end_sync_io() doesn't check the error */ 3594 if (inflight) 3595 - blk_mq_complete_request(req); 3596 return BLK_EH_DONE; 3597 } 3598
··· 3592 3593 /* the blk_end_sync_io() doesn't check the error */ 3594 if (inflight) 3595 + __blk_complete_request(req); 3596 return BLK_EH_DONE; 3597 } 3598
+1 -1
include/linux/backing-dev-defs.h
··· 22 */ 23 enum wb_state { 24 WB_registered, /* bdi_register() was done */ 25 - WB_shutting_down, /* wb_shutdown() in progress */ 26 WB_writeback_running, /* Writeback is in progress */ 27 WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ 28 WB_start_all, /* nr_pages == 0 (all) work pending */ ··· 188 #ifdef CONFIG_CGROUP_WRITEBACK 189 struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ 190 struct rb_root cgwb_congested_tree; /* their congested states */ 191 #else 192 struct bdi_writeback_congested *wb_congested; 193 #endif
··· 22 */ 23 enum wb_state { 24 WB_registered, /* bdi_register() was done */ 25 WB_writeback_running, /* Writeback is in progress */ 26 WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ 27 WB_start_all, /* nr_pages == 0 (all) work pending */ ··· 189 #ifdef CONFIG_CGROUP_WRITEBACK 190 struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ 191 struct rb_root cgwb_congested_tree; /* their congested states */ 192 + struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ 193 #else 194 struct bdi_writeback_congested *wb_congested; 195 #endif
+3
include/uapi/linux/nbd.h
··· 53 /* These are client behavior specific flags. */ 54 #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on 55 disconnect. */ 56 57 /* userspace doesn't need the nbd_device structure */ 58
··· 53 /* These are client behavior specific flags. */ 54 #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on 55 disconnect. */ 56 + #define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on 57 + * close by last opener. 58 + */ 59 60 /* userspace doesn't need the nbd_device structure */ 61
+7 -13
mm/backing-dev.c
··· 359 spin_lock_bh(&wb->work_lock); 360 if (!test_and_clear_bit(WB_registered, &wb->state)) { 361 spin_unlock_bh(&wb->work_lock); 362 - /* 363 - * Wait for wb shutdown to finish if someone else is just 364 - * running wb_shutdown(). Otherwise we could proceed to wb / 365 - * bdi destruction before wb_shutdown() is finished. 366 - */ 367 - wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); 368 return; 369 } 370 - set_bit(WB_shutting_down, &wb->state); 371 spin_unlock_bh(&wb->work_lock); 372 373 cgwb_remove_from_bdi_list(wb); ··· 372 mod_delayed_work(bdi_wq, &wb->dwork, 0); 373 flush_delayed_work(&wb->dwork); 374 WARN_ON(!list_empty(&wb->work_list)); 375 - /* 376 - * Make sure bit gets cleared after shutdown is finished. Matches with 377 - * the barrier provided by test_and_clear_bit() above. 378 - */ 379 - smp_wmb(); 380 - clear_and_wake_up_bit(WB_shutting_down, &wb->state); 381 } 382 383 static void wb_exit(struct bdi_writeback *wb) ··· 495 struct bdi_writeback *wb = container_of(work, struct bdi_writeback, 496 release_work); 497 498 wb_shutdown(wb); 499 500 css_put(wb->memcg_css); 501 css_put(wb->blkcg_css); 502 503 fprop_local_destroy_percpu(&wb->memcg_completions); 504 percpu_ref_exit(&wb->refcnt); ··· 686 687 INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); 688 bdi->cgwb_congested_tree = RB_ROOT; 689 690 ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); 691 if (!ret) { ··· 707 spin_lock_irq(&cgwb_lock); 708 radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) 709 cgwb_kill(*slot); 710 711 while (!list_empty(&bdi->wb_list)) { 712 wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, 713 bdi_node); ··· 719 spin_lock_irq(&cgwb_lock); 720 } 721 spin_unlock_irq(&cgwb_lock); 722 } 723 724 /**
··· 359 spin_lock_bh(&wb->work_lock); 360 if (!test_and_clear_bit(WB_registered, &wb->state)) { 361 spin_unlock_bh(&wb->work_lock); 362 return; 363 } 364 spin_unlock_bh(&wb->work_lock); 365 366 cgwb_remove_from_bdi_list(wb); ··· 379 mod_delayed_work(bdi_wq, &wb->dwork, 0); 380 flush_delayed_work(&wb->dwork); 381 WARN_ON(!list_empty(&wb->work_list)); 382 } 383 384 static void wb_exit(struct bdi_writeback *wb) ··· 508 struct bdi_writeback *wb = container_of(work, struct bdi_writeback, 509 release_work); 510 511 + mutex_lock(&wb->bdi->cgwb_release_mutex); 512 wb_shutdown(wb); 513 514 css_put(wb->memcg_css); 515 css_put(wb->blkcg_css); 516 + mutex_unlock(&wb->bdi->cgwb_release_mutex); 517 518 fprop_local_destroy_percpu(&wb->memcg_completions); 519 percpu_ref_exit(&wb->refcnt); ··· 697 698 INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); 699 bdi->cgwb_congested_tree = RB_ROOT; 700 + mutex_init(&bdi->cgwb_release_mutex); 701 702 ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); 703 if (!ret) { ··· 717 spin_lock_irq(&cgwb_lock); 718 radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) 719 cgwb_kill(*slot); 720 + spin_unlock_irq(&cgwb_lock); 721 722 + mutex_lock(&bdi->cgwb_release_mutex); 723 + spin_lock_irq(&cgwb_lock); 724 while (!list_empty(&bdi->wb_list)) { 725 wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, 726 bdi_node); ··· 726 spin_lock_irq(&cgwb_lock); 727 } 728 spin_unlock_irq(&cgwb_lock); 729 + mutex_unlock(&bdi->cgwb_release_mutex); 730 } 731 732 /**