commit 77072ca59fddcb8cefd2cbf5382cfa39c5a10442 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'for-linus-20180623' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- Further timeout fixes. We aren't quite there yet, so expect another
round of fixes for that to completely close some of the IRQ vs
completion races. (Christoph/Bart)

- Set of NVMe fixes from the usual suspects, mostly error handling

- Two off-by-one fixes (Dan)

- Another bdi race fix (Jan)

- Fix nbd reconfigure with NBD_DISCONNECT_ON_CLOSE (Doron)

* tag 'for-linus-20180623' of git://git.kernel.dk/linux-block:
blk-mq: Fix timeout handling in case the timeout handler returns BLK_EH_DONE
bdi: Fix another oops in wb_workfn()
lightnvm: Remove depends on HAS_DMA in case of platform dependency
nvme-pci: limit max IO size and segments to avoid high order allocations
nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl
nvme-fc: release io queues to allow fast fail
nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.
block: sed-opal: Fix a couple off by one bugs
blk-mq-debugfs: Off by one in blk_mq_rq_state_name()
nvmet: reset keep alive timer in controller enable
nvme-rdma: don't override opts->queue_size
nvme-rdma: Fix command completion race at error recovery
nvme-rdma: fix possible free of a non-allocated async event buffer
nvme-rdma: fix possible double free condition when failing to create a controller
Revert "block: Add warning for bi_next not NULL in bio_endio()"
block: fix timeout changes for legacy request drivers

Linus Torvalds 7 years ago 77072ca5 2dd3f7c9

+142 -84

20 changed files

expand all

unified split

block

bio.c

blk-core.c

blk-mq-debugfs.c

blk-mq.c

blk-softirq.c

blk-timeout.c

sed-opal.c

drivers

block

nbd.c

null_blk.c

lightnvm

Kconfig

nvme

host

core.c

fc.c

nvme.h

pci.c

rdma.c

target

core.c

scsi

scsi_transport_fc.c

include

linux

backing-dev-defs.h

uapi

linux

nbd.h

backing-dev.c

-3

block/bio.c

··· 1807 1807 if (!bio_integrity_endio(bio)) 1808 1808 return; 1809 1809 1810 - if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL")) 1811 - bio->bi_next = NULL; 1812 - 1813 1810 /* 1814 1811 * Need to have a real endio function for chained bios, otherwise 1815 1812 * various corner cases will break (like stacking block devices that

+1 -7

block/blk-core.c

··· 273 273 bio_advance(bio, nbytes); 274 274 275 275 /* don't actually finish bio if it's part of flush sequence */ 276 - /* 277 - * XXX this code looks suspicious - it's not consistent with advancing 278 - * req->bio in caller 279 - */ 280 276 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) 281 277 bio_endio(bio); 282 278 } ··· 3077 3081 struct bio *bio = req->bio; 3078 3082 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); 3079 3083 3080 - if (bio_bytes == bio->bi_iter.bi_size) { 3084 + if (bio_bytes == bio->bi_iter.bi_size) 3081 3085 req->bio = bio->bi_next; 3082 - bio->bi_next = NULL; 3083 - } 3084 3086 3085 3087 /* Completion has already been traced */ 3086 3088 bio_clear_flag(bio, BIO_TRACE_COMPLETION);

+1 -1

block/blk-mq-debugfs.c

··· 356 356 357 357 static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state) 358 358 { 359 - if (WARN_ON_ONCE((unsigned int)rq_state > 359 + if (WARN_ON_ONCE((unsigned int)rq_state >= 360 360 ARRAY_SIZE(blk_mq_rq_state_name_array))) 361 361 return "(?)"; 362 362 return blk_mq_rq_state_name_array[rq_state];

-1

block/blk-mq.c

··· 781 781 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); 782 782 } 783 783 784 - req->rq_flags &= ~RQF_TIMED_OUT; 785 784 blk_add_timer(req); 786 785 } 787 786

block/blk-softirq.c

··· 144 144 145 145 local_irq_restore(flags); 146 146 } 147 + EXPORT_SYMBOL(__blk_complete_request); 147 148 148 149 /** 149 150 * blk_complete_request - end I/O on a request

block/blk-timeout.c

··· 210 210 if (!req->timeout) 211 211 req->timeout = q->rq_timeout; 212 212 213 + req->rq_flags &= ~RQF_TIMED_OUT; 213 214 blk_rq_set_deadline(req, jiffies + req->timeout); 214 215 215 216 /*

+2 -2

block/sed-opal.c

··· 877 877 return 0; 878 878 } 879 879 880 - if (n > resp->num) { 880 + if (n >= resp->num) { 881 881 pr_debug("Response has %d tokens. Can't access %d\n", 882 882 resp->num, n); 883 883 return 0; ··· 916 916 return 0; 917 917 } 918 918 919 - if (n > resp->num) { 919 + if (n >= resp->num) { 920 920 pr_debug("Response has %d tokens. Can't access %d\n", 921 921 resp->num, n); 922 922 return 0;

+34 -8

drivers/block/nbd.c

··· 76 76 #define NBD_HAS_CONFIG_REF 4 77 77 #define NBD_BOUND 5 78 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 + #define NBD_DISCONNECT_ON_CLOSE 7 79 80 80 81 struct nbd_config { 81 82 u32 flags; ··· 139 138 static void nbd_connect_reply(struct genl_info *info, int index); 140 139 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 141 140 static void nbd_dead_link_work(struct work_struct *work); 141 + static void nbd_disconnect_and_put(struct nbd_device *nbd); 142 142 143 143 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 144 144 { ··· 1307 1305 static void nbd_release(struct gendisk *disk, fmode_t mode) 1308 1306 { 1309 1307 struct nbd_device *nbd = disk->private_data; 1308 + struct block_device *bdev = bdget_disk(disk, 0); 1309 + 1310 + if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1311 + bdev->bd_openers == 0) 1312 + nbd_disconnect_and_put(nbd); 1313 + 1310 1314 nbd_config_put(nbd); 1311 1315 nbd_put(nbd); 1312 1316 } ··· 1713 1705 &config->runtime_flags); 1714 1706 put_dev = true; 1715 1707 } 1708 + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1709 + set_bit(NBD_DISCONNECT_ON_CLOSE, 1710 + &config->runtime_flags); 1711 + } 1716 1712 } 1717 1713 1718 1714 if (info->attrs[NBD_ATTR_SOCKETS]) { ··· 1761 1749 return ret; 1762 1750 } 1763 1751 1752 + static void nbd_disconnect_and_put(struct nbd_device *nbd) 1753 + { 1754 + mutex_lock(&nbd->config_lock); 1755 + nbd_disconnect(nbd); 1756 + nbd_clear_sock(nbd); 1757 + mutex_unlock(&nbd->config_lock); 1758 + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1759 + &nbd->config->runtime_flags)) 1760 + nbd_config_put(nbd); 1761 + } 1762 + 1764 1763 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1765 1764 { 1766 1765 struct nbd_device *nbd; ··· 1804 1781 nbd_put(nbd); 1805 1782 return 0; 1806 1783 } 1807 - mutex_lock(&nbd->config_lock); 1808 - nbd_disconnect(nbd); 1809 - nbd_clear_sock(nbd); 1810 - mutex_unlock(&nbd->config_lock); 1811 - if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1812 - &nbd->config->runtime_flags)) 1813 - nbd_config_put(nbd); 1784 + nbd_disconnect_and_put(nbd); 1814 1785 nbd_config_put(nbd); 1815 1786 nbd_put(nbd); 1816 1787 return 0; ··· 1815 1798 struct nbd_device *nbd = NULL; 1816 1799 struct nbd_config *config; 1817 1800 int index; 1818 - int ret = -EINVAL; 1801 + int ret = 0; 1819 1802 bool put_dev = false; 1820 1803 1821 1804 if (!netlink_capable(skb, CAP_SYS_ADMIN)) ··· 1855 1838 !nbd->task_recv) { 1856 1839 dev_err(nbd_to_dev(nbd), 1857 1840 "not configured, cannot reconfigure\n"); 1841 + ret = -EINVAL; 1858 1842 goto out; 1859 1843 } 1860 1844 ··· 1879 1861 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1880 1862 &config->runtime_flags)) 1881 1863 refcount_inc(&nbd->refs); 1864 + } 1865 + 1866 + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1867 + set_bit(NBD_DISCONNECT_ON_CLOSE, 1868 + &config->runtime_flags); 1869 + } else { 1870 + clear_bit(NBD_DISCONNECT_ON_CLOSE, 1871 + &config->runtime_flags); 1882 1872 } 1883 1873 } 1884 1874

+1 -1

drivers/block/null_blk.c

··· 1365 1365 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) 1366 1366 { 1367 1367 pr_info("null: rq %p timed out\n", rq); 1368 - blk_mq_complete_request(rq); 1368 + __blk_complete_request(rq); 1369 1369 return BLK_EH_DONE; 1370 1370 } 1371 1371

+1 -1

drivers/lightnvm/Kconfig

··· 4 4 5 5 menuconfig NVM 6 6 bool "Open-Channel SSD target support" 7 - depends on BLOCK && HAS_DMA && PCI 7 + depends on BLOCK && PCI 8 8 select BLK_DEV_NVME 9 9 help 10 10 Say Y here to get to enable Open-channel SSDs.

drivers/nvme/host/core.c

··· 1808 1808 u32 max_segments = 1809 1809 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; 1810 1810 1811 + max_segments = min_not_zero(max_segments, ctrl->max_segments); 1811 1812 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1812 1813 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1813 1814 }

+3 -3

drivers/nvme/host/fc.c

··· 2790 2790 /* re-enable the admin_q so anything new can fast fail */ 2791 2791 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2792 2792 2793 + /* resume the io queues so that things will fast fail */ 2794 + nvme_start_queues(&ctrl->ctrl); 2795 + 2793 2796 nvme_fc_ctlr_inactive_on_rport(ctrl); 2794 2797 } 2795 2798 ··· 2807 2804 * waiting for io to terminate 2808 2805 */ 2809 2806 nvme_fc_delete_association(ctrl); 2810 - 2811 - /* resume the io queues so that things will fast fail */ 2812 - nvme_start_queues(nctrl); 2813 2807 } 2814 2808 2815 2809 static void

drivers/nvme/host/nvme.h

··· 170 170 u64 cap; 171 171 u32 page_size; 172 172 u32 max_hw_sectors; 173 + u32 max_segments; 173 174 u16 oncs; 174 175 u16 oacs; 175 176 u16 nssa;

+38 -6

drivers/nvme/host/pci.c

··· 38 38 39 39 #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 40 40 41 + /* 42 + * These can be higher, but we need to ensure that any command doesn't 43 + * require an sg allocation that needs more than a page of data. 44 + */ 45 + #define NVME_MAX_KB_SZ 4096 46 + #define NVME_MAX_SEGS 127 47 + 41 48 static int use_threaded_interrupts; 42 49 module_param(use_threaded_interrupts, int, 0); 43 50 ··· 106 99 u32 cmbloc; 107 100 struct nvme_ctrl ctrl; 108 101 struct completion ioq_wait; 102 + 103 + mempool_t *iod_mempool; 109 104 110 105 /* shadow doorbell buffer support: */ 111 106 u32 *dbbuf_dbs; ··· 486 477 iod->use_sgl = nvme_pci_use_sgls(dev, rq); 487 478 488 479 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 489 - size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, 490 - iod->use_sgl); 491 - 492 - iod->sg = kmalloc(alloc_size, GFP_ATOMIC); 480 + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); 493 481 if (!iod->sg) 494 482 return BLK_STS_RESOURCE; 495 483 } else { ··· 532 526 } 533 527 534 528 if (iod->sg != iod->inline_sg) 535 - kfree(iod->sg); 529 + mempool_free(iod->sg, dev->iod_mempool); 536 530 } 537 531 538 532 #ifdef CONFIG_BLK_DEV_INTEGRITY ··· 2286 2280 blk_put_queue(dev->ctrl.admin_q); 2287 2281 kfree(dev->queues); 2288 2282 free_opal_dev(dev->ctrl.opal_dev); 2283 + mempool_destroy(dev->iod_mempool); 2289 2284 kfree(dev); 2290 2285 } 2291 2286 ··· 2296 2289 2297 2290 nvme_get_ctrl(&dev->ctrl); 2298 2291 nvme_dev_disable(dev, false); 2292 + nvme_kill_queues(&dev->ctrl); 2299 2293 if (!queue_work(nvme_wq, &dev->remove_work)) 2300 2294 nvme_put_ctrl(&dev->ctrl); 2301 2295 } ··· 2340 2332 result = nvme_alloc_admin_tags(dev); 2341 2333 if (result) 2342 2334 goto out; 2335 + 2336 + /* 2337 + * Limit the max command size to prevent iod->sg allocations going 2338 + * over a single page. 2339 + */ 2340 + dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; 2341 + dev->ctrl.max_segments = NVME_MAX_SEGS; 2343 2342 2344 2343 result = nvme_init_identify(&dev->ctrl); 2345 2344 if (result) ··· 2420 2405 struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); 2421 2406 struct pci_dev *pdev = to_pci_dev(dev->dev); 2422 2407 2423 - nvme_kill_queues(&dev->ctrl); 2424 2408 if (pci_get_drvdata(pdev)) 2425 2409 device_release_driver(&pdev->dev); 2426 2410 nvme_put_ctrl(&dev->ctrl); ··· 2523 2509 int node, result = -ENOMEM; 2524 2510 struct nvme_dev *dev; 2525 2511 unsigned long quirks = id->driver_data; 2512 + size_t alloc_size; 2526 2513 2527 2514 node = dev_to_node(&pdev->dev); 2528 2515 if (node == NUMA_NO_NODE) ··· 2560 2545 quirks); 2561 2546 if (result) 2562 2547 goto release_pools; 2548 + 2549 + /* 2550 + * Double check that our mempool alloc size will cover the biggest 2551 + * command we support. 2552 + */ 2553 + alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, 2554 + NVME_MAX_SEGS, true); 2555 + WARN_ON_ONCE(alloc_size > PAGE_SIZE); 2556 + 2557 + dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, 2558 + mempool_kfree, 2559 + (void *) alloc_size, 2560 + GFP_KERNEL, node); 2561 + if (!dev->iod_mempool) { 2562 + result = -ENOMEM; 2563 + goto release_pools; 2564 + } 2563 2565 2564 2566 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 2565 2567

+37 -36

drivers/nvme/host/rdma.c

··· 560 560 if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 561 561 return; 562 562 563 - if (nvme_rdma_queue_idx(queue) == 0) { 564 - nvme_rdma_free_qe(queue->device->dev, 565 - &queue->ctrl->async_event_sqe, 566 - sizeof(struct nvme_command), DMA_TO_DEVICE); 567 - } 568 - 569 563 nvme_rdma_destroy_queue_ib(queue); 570 564 rdma_destroy_id(queue->cm_id); 571 565 } ··· 692 698 set = &ctrl->tag_set; 693 699 memset(set, 0, sizeof(*set)); 694 700 set->ops = &nvme_rdma_mq_ops; 695 - set->queue_depth = nctrl->opts->queue_size; 701 + set->queue_depth = nctrl->sqsize + 1; 696 702 set->reserved_tags = 1; /* fabric connect */ 697 703 set->numa_node = NUMA_NO_NODE; 698 704 set->flags = BLK_MQ_F_SHOULD_MERGE; ··· 728 734 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, 729 735 bool remove) 730 736 { 731 - nvme_rdma_stop_queue(&ctrl->queues[0]); 732 737 if (remove) { 733 738 blk_cleanup_queue(ctrl->ctrl.admin_q); 734 739 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 735 740 } 741 + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 742 + sizeof(struct nvme_command), DMA_TO_DEVICE); 736 743 nvme_rdma_free_queue(&ctrl->queues[0]); 737 744 } 738 745 ··· 750 755 751 756 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); 752 757 758 + error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, 759 + sizeof(struct nvme_command), DMA_TO_DEVICE); 760 + if (error) 761 + goto out_free_queue; 762 + 753 763 if (new) { 754 764 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); 755 765 if (IS_ERR(ctrl->ctrl.admin_tagset)) { 756 766 error = PTR_ERR(ctrl->ctrl.admin_tagset); 757 - goto out_free_queue; 767 + goto out_free_async_qe; 758 768 } 759 769 760 770 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); ··· 795 795 if (error) 796 796 goto out_stop_queue; 797 797 798 - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, 799 - &ctrl->async_event_sqe, sizeof(struct nvme_command), 800 - DMA_TO_DEVICE); 801 - if (error) 802 - goto out_stop_queue; 803 - 804 798 return 0; 805 799 806 800 out_stop_queue: ··· 805 811 out_free_tagset: 806 812 if (new) 807 813 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 814 + out_free_async_qe: 815 + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 816 + sizeof(struct nvme_command), DMA_TO_DEVICE); 808 817 out_free_queue: 809 818 nvme_rdma_free_queue(&ctrl->queues[0]); 810 819 return error; ··· 816 819 static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, 817 820 bool remove) 818 821 { 819 - nvme_rdma_stop_io_queues(ctrl); 820 822 if (remove) { 821 823 blk_cleanup_queue(ctrl->ctrl.connect_q); 822 824 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); ··· 884 888 list_del(&ctrl->list); 885 889 mutex_unlock(&nvme_rdma_ctrl_mutex); 886 890 887 - kfree(ctrl->queues); 888 891 nvmf_free_options(nctrl->opts); 889 892 free_ctrl: 893 + kfree(ctrl->queues); 890 894 kfree(ctrl); 891 895 } 892 896 ··· 945 949 return; 946 950 947 951 destroy_admin: 952 + nvme_rdma_stop_queue(&ctrl->queues[0]); 948 953 nvme_rdma_destroy_admin_queue(ctrl, false); 949 954 requeue: 950 955 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ··· 962 965 963 966 if (ctrl->ctrl.queue_count > 1) { 964 967 nvme_stop_queues(&ctrl->ctrl); 968 + nvme_rdma_stop_io_queues(ctrl); 965 969 blk_mq_tagset_busy_iter(&ctrl->tag_set, 966 970 nvme_cancel_request, &ctrl->ctrl); 967 971 nvme_rdma_destroy_io_queues(ctrl, false); 968 972 } 969 973 970 974 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 975 + nvme_rdma_stop_queue(&ctrl->queues[0]); 971 976 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 972 977 nvme_cancel_request, &ctrl->ctrl); 973 978 nvme_rdma_destroy_admin_queue(ctrl, false); ··· 1735 1736 { 1736 1737 if (ctrl->ctrl.queue_count > 1) { 1737 1738 nvme_stop_queues(&ctrl->ctrl); 1739 + nvme_rdma_stop_io_queues(ctrl); 1738 1740 blk_mq_tagset_busy_iter(&ctrl->tag_set, 1739 1741 nvme_cancel_request, &ctrl->ctrl); 1740 1742 nvme_rdma_destroy_io_queues(ctrl, shutdown); ··· 1747 1747 nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 1748 1748 1749 1749 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 1750 + nvme_rdma_stop_queue(&ctrl->queues[0]); 1750 1751 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 1751 1752 nvme_cancel_request, &ctrl->ctrl); 1752 1753 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ··· 1933 1932 goto out_free_ctrl; 1934 1933 } 1935 1934 1936 - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 1937 - 0 /* no quirks, we're perfect! */); 1938 - if (ret) 1939 - goto out_free_ctrl; 1940 - 1941 1935 INIT_DELAYED_WORK(&ctrl->reconnect_work, 1942 1936 nvme_rdma_reconnect_ctrl_work); 1943 1937 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); ··· 1946 1950 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), 1947 1951 GFP_KERNEL); 1948 1952 if (!ctrl->queues) 1949 - goto out_uninit_ctrl; 1953 + goto out_free_ctrl; 1954 + 1955 + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 1956 + 0 /* no quirks, we're perfect! */); 1957 + if (ret) 1958 + goto out_kfree_queues; 1950 1959 1951 1960 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); 1952 1961 WARN_ON_ONCE(!changed); 1953 1962 1954 1963 ret = nvme_rdma_configure_admin_queue(ctrl, true); 1955 1964 if (ret) 1956 - goto out_kfree_queues; 1965 + goto out_uninit_ctrl; 1957 1966 1958 1967 /* sanity check icdoff */ 1959 1968 if (ctrl->ctrl.icdoff) { ··· 1975 1974 goto out_remove_admin_queue; 1976 1975 } 1977 1976 1978 - if (opts->queue_size > ctrl->ctrl.maxcmd) { 1979 - /* warn if maxcmd is lower than queue_size */ 1980 - dev_warn(ctrl->ctrl.device, 1981 - "queue_size %zu > ctrl maxcmd %u, clamping down\n", 1982 - opts->queue_size, ctrl->ctrl.maxcmd); 1983 - opts->queue_size = ctrl->ctrl.maxcmd; 1984 - } 1985 - 1977 + /* only warn if argument is too large here, will clamp later */ 1986 1978 if (opts->queue_size > ctrl->ctrl.sqsize + 1) { 1987 - /* warn if sqsize is lower than queue_size */ 1988 1979 dev_warn(ctrl->ctrl.device, 1989 1980 "queue_size %zu > ctrl sqsize %u, clamping down\n", 1990 1981 opts->queue_size, ctrl->ctrl.sqsize + 1); 1991 - opts->queue_size = ctrl->ctrl.sqsize + 1; 1982 + } 1983 + 1984 + /* warn if maxcmd is lower than sqsize+1 */ 1985 + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { 1986 + dev_warn(ctrl->ctrl.device, 1987 + "sqsize %u > ctrl maxcmd %u, clamping down\n", 1988 + ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); 1989 + ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; 1992 1990 } 1993 1991 1994 1992 if (opts->nr_io_queues) { ··· 2013 2013 return &ctrl->ctrl; 2014 2014 2015 2015 out_remove_admin_queue: 2016 + nvme_rdma_stop_queue(&ctrl->queues[0]); 2016 2017 nvme_rdma_destroy_admin_queue(ctrl, true); 2017 - out_kfree_queues: 2018 - kfree(ctrl->queues); 2019 2018 out_uninit_ctrl: 2020 2019 nvme_uninit_ctrl(&ctrl->ctrl); 2021 2020 nvme_put_ctrl(&ctrl->ctrl); 2022 2021 if (ret > 0) 2023 2022 ret = -EIO; 2024 2023 return ERR_PTR(ret); 2024 + out_kfree_queues: 2025 + kfree(ctrl->queues); 2025 2026 out_free_ctrl: 2026 2027 kfree(ctrl); 2027 2028 return ERR_PTR(ret);

drivers/nvme/target/core.c

··· 686 686 } 687 687 688 688 ctrl->csts = NVME_CSTS_RDY; 689 + 690 + /* 691 + * Controllers that are not yet enabled should not really enforce the 692 + * keep alive timeout, but we still want to track a timeout and cleanup 693 + * in case a host died before it enabled the controller. Hence, simply 694 + * reset the keep alive timer when the controller is enabled. 695 + */ 696 + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 689 697 } 690 698 691 699 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)

+1 -1

drivers/scsi/scsi_transport_fc.c

··· 3592 3592 3593 3593 /* the blk_end_sync_io() doesn't check the error */ 3594 3594 if (inflight) 3595 - blk_mq_complete_request(req); 3595 + __blk_complete_request(req); 3596 3596 return BLK_EH_DONE; 3597 3597 } 3598 3598

+1 -1

include/linux/backing-dev-defs.h

··· 22 22 */ 23 23 enum wb_state { 24 24 WB_registered, /* bdi_register() was done */ 25 - WB_shutting_down, /* wb_shutdown() in progress */ 26 25 WB_writeback_running, /* Writeback is in progress */ 27 26 WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ 28 27 WB_start_all, /* nr_pages == 0 (all) work pending */ ··· 188 189 #ifdef CONFIG_CGROUP_WRITEBACK 189 190 struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ 190 191 struct rb_root cgwb_congested_tree; /* their congested states */ 192 + struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ 191 193 #else 192 194 struct bdi_writeback_congested *wb_congested; 193 195 #endif

include/uapi/linux/nbd.h

··· 53 53 /* These are client behavior specific flags. */ 54 54 #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on 55 55 disconnect. */ 56 + #define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on 57 + * close by last opener. 58 + */ 56 59 57 60 /* userspace doesn't need the nbd_device structure */ 58 61

+7 -13

mm/backing-dev.c

··· 359 359 spin_lock_bh(&wb->work_lock); 360 360 if (!test_and_clear_bit(WB_registered, &wb->state)) { 361 361 spin_unlock_bh(&wb->work_lock); 362 - /* 363 - * Wait for wb shutdown to finish if someone else is just 364 - * running wb_shutdown(). Otherwise we could proceed to wb / 365 - * bdi destruction before wb_shutdown() is finished. 366 - */ 367 - wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); 368 362 return; 369 363 } 370 - set_bit(WB_shutting_down, &wb->state); 371 364 spin_unlock_bh(&wb->work_lock); 372 365 373 366 cgwb_remove_from_bdi_list(wb); ··· 372 379 mod_delayed_work(bdi_wq, &wb->dwork, 0); 373 380 flush_delayed_work(&wb->dwork); 374 381 WARN_ON(!list_empty(&wb->work_list)); 375 - /* 376 - * Make sure bit gets cleared after shutdown is finished. Matches with 377 - * the barrier provided by test_and_clear_bit() above. 378 - */ 379 - smp_wmb(); 380 - clear_and_wake_up_bit(WB_shutting_down, &wb->state); 381 382 } 382 383 383 384 static void wb_exit(struct bdi_writeback *wb) ··· 495 508 struct bdi_writeback *wb = container_of(work, struct bdi_writeback, 496 509 release_work); 497 510 511 + mutex_lock(&wb->bdi->cgwb_release_mutex); 498 512 wb_shutdown(wb); 499 513 500 514 css_put(wb->memcg_css); 501 515 css_put(wb->blkcg_css); 516 + mutex_unlock(&wb->bdi->cgwb_release_mutex); 502 517 503 518 fprop_local_destroy_percpu(&wb->memcg_completions); 504 519 percpu_ref_exit(&wb->refcnt); ··· 686 697 687 698 INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); 688 699 bdi->cgwb_congested_tree = RB_ROOT; 700 + mutex_init(&bdi->cgwb_release_mutex); 689 701 690 702 ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); 691 703 if (!ret) { ··· 707 717 spin_lock_irq(&cgwb_lock); 708 718 radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) 709 719 cgwb_kill(*slot); 720 + spin_unlock_irq(&cgwb_lock); 710 721 722 + mutex_lock(&bdi->cgwb_release_mutex); 723 + spin_lock_irq(&cgwb_lock); 711 724 while (!list_empty(&bdi->wb_list)) { 712 725 wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, 713 726 bdi_node); ··· 719 726 spin_lock_irq(&cgwb_lock); 720 727 } 721 728 spin_unlock_irq(&cgwb_lock); 729 + mutex_unlock(&bdi->cgwb_release_mutex); 722 730 } 723 731 724 732 /**