Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'block-6.5-2023-08-11' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

- NVMe pull request via Keith:
- Fixes for request_queue state (Ming)
- Another uuid quirk (August)

- RCU poll fix for NVMe (Ming)

- Fix for an IO stall with polled IO (me)

- Fix for blk-iocost stats enable/disable accounting (Chengming)

- Regression fix for large pages for zram (Christoph)

* tag 'block-6.5-2023-08-11' of git://git.kernel.dk/linux:
nvme: core: don't hold rcu read lock in nvme_ns_chr_uring_cmd_iopoll
blk-iocost: fix queue stats accounting
block: don't make REQ_POLLED imply REQ_NOWAIT
block: get rid of unused plug->nowait flag
zram: take device and not only bvec offset into account
nvme-pci: add NVME_QUIRK_BOGUS_NID for Samsung PM9B1 256G and 512G
nvme-rdma: fix potential unbalanced freeze & unfreeze
nvme-tcp: fix potential unbalanced freeze & unfreeze
nvme: fix possible hang when removing a controller during error recovery

+41 -33
-6
block/blk-core.c
··· 722 722 struct block_device *bdev = bio->bi_bdev; 723 723 struct request_queue *q = bdev_get_queue(bdev); 724 724 blk_status_t status = BLK_STS_IOERR; 725 - struct blk_plug *plug; 726 725 727 726 might_sleep(); 728 - 729 - plug = blk_mq_plug(bio); 730 - if (plug && plug->nowait) 731 - bio->bi_opf |= REQ_NOWAIT; 732 727 733 728 /* 734 729 * For a REQ_NOWAIT based request, return -EOPNOTSUPP ··· 1054 1059 plug->rq_count = 0; 1055 1060 plug->multiple_queues = false; 1056 1061 plug->has_elevator = false; 1057 - plug->nowait = false; 1058 1062 INIT_LIST_HEAD(&plug->cb_list); 1059 1063 1060 1064 /*
+3 -2
block/blk-iocost.c
··· 3301 3301 if (qos[QOS_MIN] > qos[QOS_MAX]) 3302 3302 goto einval; 3303 3303 3304 - if (enable) { 3304 + if (enable && !ioc->enabled) { 3305 3305 blk_stat_enable_accounting(disk->queue); 3306 3306 blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); 3307 3307 ioc->enabled = true; 3308 - } else { 3308 + } else if (!enable && ioc->enabled) { 3309 + blk_stat_disable_accounting(disk->queue); 3309 3310 blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); 3310 3311 ioc->enabled = false; 3311 3312 }
+4 -3
block/fops.c
··· 358 358 task_io_account_write(bio->bi_iter.bi_size); 359 359 } 360 360 361 + if (iocb->ki_flags & IOCB_NOWAIT) 362 + bio->bi_opf |= REQ_NOWAIT; 363 + 361 364 if (iocb->ki_flags & IOCB_HIPRI) { 362 - bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; 365 + bio->bi_opf |= REQ_POLLED; 363 366 submit_bio(bio); 364 367 WRITE_ONCE(iocb->private, bio); 365 368 } else { 366 - if (iocb->ki_flags & IOCB_NOWAIT) 367 - bio->bi_opf |= REQ_NOWAIT; 368 369 submit_bio(bio); 369 370 } 370 371 return -EIOCBQUEUED;
+20 -12
drivers/block/zram/zram_drv.c
··· 1870 1870 1871 1871 static void zram_bio_read(struct zram *zram, struct bio *bio) 1872 1872 { 1873 - struct bvec_iter iter; 1874 - struct bio_vec bv; 1875 - unsigned long start_time; 1873 + unsigned long start_time = bio_start_io_acct(bio); 1874 + struct bvec_iter iter = bio->bi_iter; 1876 1875 1877 - start_time = bio_start_io_acct(bio); 1878 - bio_for_each_segment(bv, bio, iter) { 1876 + do { 1879 1877 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1880 1878 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 1881 1879 SECTOR_SHIFT; 1880 + struct bio_vec bv = bio_iter_iovec(bio, iter); 1881 + 1882 + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 1882 1883 1883 1884 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 1884 1885 atomic64_inc(&zram->stats.failed_reads); ··· 1891 1890 zram_slot_lock(zram, index); 1892 1891 zram_accessed(zram, index); 1893 1892 zram_slot_unlock(zram, index); 1894 - } 1893 + 1894 + bio_advance_iter_single(bio, &iter, bv.bv_len); 1895 + } while (iter.bi_size); 1896 + 1895 1897 bio_end_io_acct(bio, start_time); 1896 1898 bio_endio(bio); 1897 1899 } 1898 1900 1899 1901 static void zram_bio_write(struct zram *zram, struct bio *bio) 1900 1902 { 1901 - struct bvec_iter iter; 1902 - struct bio_vec bv; 1903 - unsigned long start_time; 1903 + unsigned long start_time = bio_start_io_acct(bio); 1904 + struct bvec_iter iter = bio->bi_iter; 1904 1905 1905 - start_time = bio_start_io_acct(bio); 1906 - bio_for_each_segment(bv, bio, iter) { 1906 + do { 1907 1907 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1908 1908 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 1909 1909 SECTOR_SHIFT; 1910 + struct bio_vec bv = bio_iter_iovec(bio, iter); 1911 + 1912 + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 1910 1913 1911 1914 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 1912 1915 atomic64_inc(&zram->stats.failed_writes); ··· 1921 1916 zram_slot_lock(zram, index); 1922 1917 zram_accessed(zram, index); 1923 1918 zram_slot_unlock(zram, index); 1924 - } 1919 + 1920 + bio_advance_iter_single(bio, &iter, bv.bv_len); 1921 + } while (iter.bi_size); 1922 + 1925 1923 bio_end_io_acct(bio, start_time); 1926 1924 bio_endio(bio); 1927 1925 }
+7 -3
drivers/nvme/host/core.c
··· 3933 3933 */ 3934 3934 nvme_mpath_clear_ctrl_paths(ctrl); 3935 3935 3936 + /* 3937 + * Unquiesce io queues so any pending IO won't hang, especially 3938 + * those submitted from scan work 3939 + */ 3940 + nvme_unquiesce_io_queues(ctrl); 3941 + 3936 3942 /* prevent racing with ns scanning */ 3937 3943 flush_work(&ctrl->scan_work); 3938 3944 ··· 3948 3942 * removing the namespaces' disks; fail all the queues now to avoid 3949 3943 * potentially having to clean up the failed sync later. 3950 3944 */ 3951 - if (ctrl->state == NVME_CTRL_DEAD) { 3945 + if (ctrl->state == NVME_CTRL_DEAD) 3952 3946 nvme_mark_namespaces_dead(ctrl); 3953 - nvme_unquiesce_io_queues(ctrl); 3954 - } 3955 3947 3956 3948 /* this is a no-op when called from the controller reset handler */ 3957 3949 nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
-2
drivers/nvme/host/ioctl.c
··· 786 786 if (!(ioucmd->flags & IORING_URING_CMD_POLLED)) 787 787 return 0; 788 788 789 - rcu_read_lock(); 790 789 req = READ_ONCE(ioucmd->cookie); 791 790 if (req && blk_rq_is_poll(req)) 792 791 ret = blk_rq_poll(req, iob, poll_flags); 793 - rcu_read_unlock(); 794 792 return ret; 795 793 } 796 794 #ifdef CONFIG_NVME_MULTIPATH
+2 -1
drivers/nvme/host/pci.c
··· 3402 3402 { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ 3403 3403 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 3404 3404 { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */ 3405 - .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 3405 + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES | 3406 + NVME_QUIRK_BOGUS_NID, }, 3406 3407 { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */ 3407 3408 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 3408 3409 { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */
+2 -1
drivers/nvme/host/rdma.c
··· 883 883 goto out_cleanup_tagset; 884 884 885 885 if (!new) { 886 + nvme_start_freeze(&ctrl->ctrl); 886 887 nvme_unquiesce_io_queues(&ctrl->ctrl); 887 888 if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { 888 889 /* ··· 892 891 * to be safe. 893 892 */ 894 893 ret = -ENODEV; 894 + nvme_unfreeze(&ctrl->ctrl); 895 895 goto out_wait_freeze_timed_out; 896 896 } 897 897 blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset, ··· 942 940 bool remove) 943 941 { 944 942 if (ctrl->ctrl.queue_count > 1) { 945 - nvme_start_freeze(&ctrl->ctrl); 946 943 nvme_quiesce_io_queues(&ctrl->ctrl); 947 944 nvme_sync_io_queues(&ctrl->ctrl); 948 945 nvme_rdma_stop_io_queues(ctrl);
+2 -1
drivers/nvme/host/tcp.c
··· 1868 1868 goto out_cleanup_connect_q; 1869 1869 1870 1870 if (!new) { 1871 + nvme_start_freeze(ctrl); 1871 1872 nvme_unquiesce_io_queues(ctrl); 1872 1873 if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { 1873 1874 /* ··· 1877 1876 * to be safe. 1878 1877 */ 1879 1878 ret = -ENODEV; 1879 + nvme_unfreeze(ctrl); 1880 1880 goto out_wait_freeze_timed_out; 1881 1881 } 1882 1882 blk_mq_update_nr_hw_queues(ctrl->tagset, ··· 1982 1980 if (ctrl->queue_count <= 1) 1983 1981 return; 1984 1982 nvme_quiesce_admin_queue(ctrl); 1985 - nvme_start_freeze(ctrl); 1986 1983 nvme_quiesce_io_queues(ctrl); 1987 1984 nvme_sync_io_queues(ctrl); 1988 1985 nvme_tcp_stop_io_queues(ctrl);
+1 -1
include/linux/bio.h
··· 791 791 static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) 792 792 { 793 793 bio->bi_opf |= REQ_POLLED; 794 - if (!is_sync_kiocb(kiocb)) 794 + if (kiocb->ki_flags & IOCB_NOWAIT) 795 795 bio->bi_opf |= REQ_NOWAIT; 796 796 } 797 797
-1
include/linux/blkdev.h
··· 969 969 970 970 bool multiple_queues; 971 971 bool has_elevator; 972 - bool nowait; 973 972 974 973 struct list_head cb_list; /* md requires an unplug callback */ 975 974 };