Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-6.12/block-20240925' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe:

- Improve blk-integrity segment counting and merging (Keith)

- NVMe pull request via Keith:
- Multipath fixes (Hannes)
- Sysfs attribute list NULL terminate fix (Shin'ichiro)
- Remove problematic read-back (Keith)

- Fix for a regression with the IO scheduler switching freezing from
6.11 (Damien)

- Use a raw spinlock for sbitmap, as it may get called from preempt
disabled context (Ming)

- Cleanup for bd_claiming waiting, using var_waitqueue() rather than
the bit waitqueues, as that more accurately describes that it does
(Neil)

- Various cleanups (Kanchan, Qiu-ji, David)

* tag 'for-6.12/block-20240925' of git://git.kernel.dk/linux:
nvme: remove CC register read-back during enabling
nvme: null terminate nvme_tls_attrs
nvme-multipath: avoid hang on inaccessible namespaces
nvme-multipath: system fails to create generic nvme device
lib/sbitmap: define swap_lock as raw_spinlock_t
block: Remove unused blk_limits_io_{min,opt}
drbd: Fix atomicity violation in drbd_uuid_set_bm()
block: Fix elv_iosched_local_module handling of "none" scheduler
block: remove bogus union
block: change wait on bd_claiming to use a var_waitqueue
blk-integrity: improved sg segment mapping
block: unexport blk_rq_count_integrity_sg
nvme-rdma: use request to get integrity segments
scsi: use request to get integrity segments
block: provide a request helper for user integrity segments
blk-integrity: consider entire bio list for merging
blk-integrity: properly account for segments
blk-mq: set the nr_integrity_segments from bio
blk-mq: unconditional nr_integrity_segments

+77 -101
+2 -2
block/bdev.c
··· 555 555 556 556 /* if claiming is already in progress, wait for it to finish */ 557 557 if (whole->bd_claiming) { 558 - wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); 558 + wait_queue_head_t *wq = __var_waitqueue(&whole->bd_claiming); 559 559 DEFINE_WAIT(wait); 560 560 561 561 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); ··· 578 578 /* tell others that we're done */ 579 579 BUG_ON(whole->bd_claiming != holder); 580 580 whole->bd_claiming = NULL; 581 - wake_up_bit(&whole->bd_claiming, 0); 581 + wake_up_var(&whole->bd_claiming); 582 582 } 583 583 584 584 /**
-1
block/bio-integrity.c
··· 367 367 kfree(bvec); 368 368 return ret; 369 369 } 370 - EXPORT_SYMBOL_GPL(bio_integrity_map_user); 371 370 372 371 /** 373 372 * bio_integrity_prep - Prepare bio for integrity I/O
+25 -11
block/blk-integrity.c
··· 53 53 54 54 return segments; 55 55 } 56 - EXPORT_SYMBOL(blk_rq_count_integrity_sg); 57 56 58 57 /** 59 58 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist ··· 62 63 * 63 64 * Description: Map the integrity vectors in request into a 64 65 * scatterlist. The scatterlist must be big enough to hold all 65 - * elements. I.e. sized using blk_rq_count_integrity_sg(). 66 + * elements. I.e. sized using blk_rq_count_integrity_sg() or 67 + * rq->nr_integrity_segments. 66 68 */ 67 - int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, 68 - struct scatterlist *sglist) 69 + int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 69 70 { 70 71 struct bio_vec iv, ivprv = { NULL }; 72 + struct request_queue *q = rq->q; 71 73 struct scatterlist *sg = NULL; 74 + struct bio *bio = rq->bio; 72 75 unsigned int segments = 0; 73 76 struct bvec_iter iter; 74 77 int prev = 0; 75 78 76 79 bio_for_each_integrity_vec(iv, bio, iter) { 77 - 78 80 if (prev) { 79 81 if (!biovec_phys_mergeable(q, &ivprv, &iv)) 80 82 goto new_segment; ··· 103 103 if (sg) 104 104 sg_mark_end(sg); 105 105 106 + /* 107 + * Something must have been wrong if the figured number of segment 108 + * is bigger than number of req's physical integrity segments 109 + */ 110 + BUG_ON(segments > rq->nr_integrity_segments); 111 + BUG_ON(segments > queue_max_integrity_segments(q)); 106 112 return segments; 107 113 } 108 114 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 115 + 116 + int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, 117 + ssize_t bytes, u32 seed) 118 + { 119 + int ret = bio_integrity_map_user(rq->bio, ubuf, bytes, seed); 120 + 121 + if (ret) 122 + return ret; 123 + 124 + rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, rq->bio); 125 + rq->cmd_flags |= REQ_INTEGRITY; 126 + return 0; 127 + } 128 + EXPORT_SYMBOL_GPL(blk_rq_integrity_map_user); 109 129 110 130 bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, 111 131 struct request *next) ··· 154 134 struct bio *bio) 155 135 { 156 136 int nr_integrity_segs; 157 - struct bio *next = bio->bi_next; 158 137 159 138 if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL) 160 139 return true; ··· 164 145 if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags) 165 146 return false; 166 147 167 - bio->bi_next = NULL; 168 148 nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); 169 - bio->bi_next = next; 170 - 171 149 if (req->nr_integrity_segments + nr_integrity_segs > 172 150 q->limits.max_integrity_segments) 173 151 return false; 174 - 175 - req->nr_integrity_segments += nr_integrity_segs; 176 152 177 153 return true; 178 154 }
+4
block/blk-merge.c
··· 639 639 * counters. 640 640 */ 641 641 req->nr_phys_segments += nr_phys_segs; 642 + if (bio_integrity(bio)) 643 + req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q, 644 + bio); 642 645 return 1; 643 646 644 647 no_merge: ··· 734 731 735 732 /* Merge is OK... */ 736 733 req->nr_phys_segments = total_phys_segments; 734 + req->nr_integrity_segments += next->nr_integrity_segments; 737 735 return 1; 738 736 } 739 737
+3 -2
block/blk-mq.c
··· 376 376 rq->io_start_time_ns = 0; 377 377 rq->stats_sectors = 0; 378 378 rq->nr_phys_segments = 0; 379 - #if defined(CONFIG_BLK_DEV_INTEGRITY) 380 379 rq->nr_integrity_segments = 0; 381 - #endif 382 380 rq->end_io = NULL; 383 381 rq->end_io_data = NULL; 384 382 ··· 2544 2546 rq->__sector = bio->bi_iter.bi_sector; 2545 2547 rq->write_hint = bio->bi_write_hint; 2546 2548 blk_rq_bio_prep(rq, bio, nr_segs); 2549 + if (bio_integrity(bio)) 2550 + rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, 2551 + bio); 2547 2552 2548 2553 /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */ 2549 2554 err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
-42
block/blk-settings.c
··· 437 437 } 438 438 EXPORT_SYMBOL_GPL(queue_limits_set); 439 439 440 - /** 441 - * blk_limits_io_min - set minimum request size for a device 442 - * @limits: the queue limits 443 - * @min: smallest I/O size in bytes 444 - * 445 - * Description: 446 - * Some devices have an internal block size bigger than the reported 447 - * hardware sector size. This function can be used to signal the 448 - * smallest I/O the device can perform without incurring a performance 449 - * penalty. 450 - */ 451 - void blk_limits_io_min(struct queue_limits *limits, unsigned int min) 452 - { 453 - limits->io_min = min; 454 - 455 - if (limits->io_min < limits->logical_block_size) 456 - limits->io_min = limits->logical_block_size; 457 - 458 - if (limits->io_min < limits->physical_block_size) 459 - limits->io_min = limits->physical_block_size; 460 - } 461 - EXPORT_SYMBOL(blk_limits_io_min); 462 - 463 - /** 464 - * blk_limits_io_opt - set optimal request size for a device 465 - * @limits: the queue limits 466 - * @opt: smallest I/O size in bytes 467 - * 468 - * Description: 469 - * Storage devices may report an optimal I/O size, which is the 470 - * device's preferred unit for sustained I/O. This is rarely reported 471 - * for disk drives. For RAID arrays it is usually the stripe width or 472 - * the internal track size. A properly aligned multiple of 473 - * optimal_io_size is the preferred request size for workloads where 474 - * sustained throughput is desired. 475 - */ 476 - void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt) 477 - { 478 - limits->io_opt = opt; 479 - } 480 - EXPORT_SYMBOL(blk_limits_io_opt); 481 - 482 440 static int queue_limit_alignment_offset(const struct queue_limits *lim, 483 441 sector_t sector) 484 442 {
+3 -1
block/elevator.c
··· 715 715 716 716 strscpy(elevator_name, buf, sizeof(elevator_name)); 717 717 718 - return request_module("%s-iosched", strstrip(elevator_name)); 718 + request_module("%s-iosched", strstrip(elevator_name)); 719 + 720 + return 0; 719 721 } 720 722 721 723 ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
+5 -3
drivers/block/drbd/drbd_main.c
··· 3399 3399 void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) 3400 3400 { 3401 3401 unsigned long flags; 3402 - if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) 3403 - return; 3404 - 3405 3402 spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); 3403 + if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) { 3404 + spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); 3405 + return; 3406 + } 3407 + 3406 3408 if (val == 0) { 3407 3409 drbd_uuid_move_history(device); 3408 3410 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
-5
drivers/nvme/host/core.c
··· 2468 2468 if (ret) 2469 2469 return ret; 2470 2470 2471 - /* Flush write to device (required if transport is PCI) */ 2472 - ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CC, &ctrl->ctrl_config); 2473 - if (ret) 2474 - return ret; 2475 - 2476 2471 /* CAP value may change after initial CC write */ 2477 2472 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); 2478 2473 if (ret)
+2 -4
drivers/nvme/host/ioctl.c
··· 3 3 * Copyright (c) 2011-2014, Intel Corporation. 4 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 5 */ 6 - #include <linux/bio-integrity.h> 7 6 #include <linux/blk-integrity.h> 8 7 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 9 8 #include <linux/nvme_ioctl.h> ··· 152 153 bio_set_dev(bio, bdev); 153 154 154 155 if (has_metadata) { 155 - ret = bio_integrity_map_user(bio, meta_buffer, meta_len, 156 - meta_seed); 156 + ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len, 157 + meta_seed); 157 158 if (ret) 158 159 goto out_unmap; 159 - req->cmd_flags |= REQ_INTEGRITY; 160 160 } 161 161 162 162 return ret;
+11 -3
drivers/nvme/host/multipath.c
··· 421 421 { 422 422 struct nvme_ns *ns; 423 423 424 + if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) 425 + return NULL; 426 + 424 427 list_for_each_entry_rcu(ns, &head->list, siblings) { 425 428 if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) 426 429 continue; ··· 651 648 rc = device_add_disk(&head->subsys->dev, head->disk, 652 649 nvme_ns_attr_groups); 653 650 if (rc) { 654 - clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); 651 + clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags); 655 652 return; 656 653 } 657 654 nvme_add_ns_head_cdev(head); ··· 972 969 { 973 970 if (!head->disk) 974 971 return; 975 - kblockd_schedule_work(&head->requeue_work); 976 - if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { 972 + if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { 977 973 nvme_cdev_del(&head->cdev, &head->cdev_device); 978 974 del_gendisk(head->disk); 979 975 } 976 + /* 977 + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared 978 + * to allow multipath to fail all I/O. 979 + */ 980 + synchronize_srcu(&head->srcu); 981 + kblockd_schedule_work(&head->requeue_work); 980 982 } 981 983 982 984 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+3 -3
drivers/nvme/host/rdma.c
··· 1496 1496 req->metadata_sgl->sg_table.sgl = 1497 1497 (struct scatterlist *)(req->metadata_sgl + 1); 1498 1498 ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, 1499 - blk_rq_count_integrity_sg(rq->q, rq->bio), 1499 + rq->nr_integrity_segments, 1500 1500 req->metadata_sgl->sg_table.sgl, 1501 1501 NVME_INLINE_METADATA_SG_CNT); 1502 1502 if (unlikely(ret)) { ··· 1504 1504 goto out_unmap_sg; 1505 1505 } 1506 1506 1507 - req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, 1508 - rq->bio, req->metadata_sgl->sg_table.sgl); 1507 + req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq, 1508 + req->metadata_sgl->sg_table.sgl); 1509 1509 *pi_count = ib_dma_map_sg(ibdev, 1510 1510 req->metadata_sgl->sg_table.sgl, 1511 1511 req->metadata_sgl->nents,
+1
drivers/nvme/host/sysfs.c
··· 767 767 &dev_attr_tls_key.attr, 768 768 &dev_attr_tls_configured_key.attr, 769 769 &dev_attr_tls_keyring.attr, 770 + NULL, 770 771 }; 771 772 772 773 static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
+3 -9
drivers/scsi/scsi_lib.c
··· 1163 1163 1164 1164 if (blk_integrity_rq(rq)) { 1165 1165 struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; 1166 - int ivecs; 1167 1166 1168 1167 if (WARN_ON_ONCE(!prot_sdb)) { 1169 1168 /* ··· 1174 1175 goto out_free_sgtables; 1175 1176 } 1176 1177 1177 - ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); 1178 - 1179 - if (sg_alloc_table_chained(&prot_sdb->table, ivecs, 1178 + if (sg_alloc_table_chained(&prot_sdb->table, 1179 + rq->nr_integrity_segments, 1180 1180 prot_sdb->table.sgl, 1181 1181 SCSI_INLINE_PROT_SG_CNT)) { 1182 1182 ret = BLK_STS_RESOURCE; 1183 1183 goto out_free_sgtables; 1184 1184 } 1185 1185 1186 - count = blk_rq_map_integrity_sg(rq->q, rq->bio, 1187 - prot_sdb->table.sgl); 1188 - BUG_ON(count > ivecs); 1189 - BUG_ON(count > queue_max_integrity_segments(rq->q)); 1190 - 1186 + count = blk_rq_map_integrity_sg(rq, prot_sdb->table.sgl); 1191 1187 cmd->prot_sdb = prot_sdb; 1192 1188 cmd->prot_sdb->table.nents = count; 1193 1189 }
+11 -4
include/linux/blk-integrity.h
··· 25 25 } 26 26 27 27 #ifdef CONFIG_BLK_DEV_INTEGRITY 28 - int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, 29 - struct scatterlist *); 28 + int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 30 29 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); 30 + int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, 31 + ssize_t bytes, u32 seed); 31 32 32 33 static inline bool 33 34 blk_integrity_queue_supports_integrity(struct request_queue *q) ··· 97 96 { 98 97 return 0; 99 98 } 100 - static inline int blk_rq_map_integrity_sg(struct request_queue *q, 101 - struct bio *b, 99 + static inline int blk_rq_map_integrity_sg(struct request *q, 102 100 struct scatterlist *s) 103 101 { 104 102 return 0; 103 + } 104 + static inline int blk_rq_integrity_map_user(struct request *rq, 105 + void __user *ubuf, 106 + ssize_t bytes, 107 + u32 seed) 108 + { 109 + return -EINVAL; 105 110 } 106 111 static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) 107 112 {
-3
include/linux/blk-mq.h
··· 149 149 * physical address coalescing is performed. 150 150 */ 151 151 unsigned short nr_phys_segments; 152 - 153 - #ifdef CONFIG_BLK_DEV_INTEGRITY 154 152 unsigned short nr_integrity_segments; 155 - #endif 156 153 157 154 #ifdef CONFIG_BLK_INLINE_ENCRYPTION 158 155 struct bio_crypt_ctx *crypt_ctx;
+1 -3
include/linux/blk_types.h
··· 251 251 struct bio_crypt_ctx *bi_crypt_context; 252 252 #endif 253 253 254 - union { 255 254 #if defined(CONFIG_BLK_DEV_INTEGRITY) 256 - struct bio_integrity_payload *bi_integrity; /* data integrity */ 255 + struct bio_integrity_payload *bi_integrity; /* data integrity */ 257 256 #endif 258 - }; 259 257 260 258 unsigned short bi_vcnt; /* how many bio_vec's */ 261 259
-2
include/linux/blkdev.h
··· 968 968 /* 969 969 * Access functions for manipulating queue properties 970 970 */ 971 - extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); 972 - extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); 973 971 extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); 974 972 extern void blk_set_stacking_limits(struct queue_limits *lim); 975 973 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+1 -1
include/linux/sbitmap.h
··· 40 40 /** 41 41 * @swap_lock: serializes simultaneous updates of ->word and ->cleared 42 42 */ 43 - spinlock_t swap_lock; 43 + raw_spinlock_t swap_lock; 44 44 } ____cacheline_aligned_in_smp; 45 45 46 46 /**
+2 -2
lib/sbitmap.c
··· 65 65 { 66 66 unsigned long mask, word_mask; 67 67 68 - guard(spinlock_irqsave)(&map->swap_lock); 68 + guard(raw_spinlock_irqsave)(&map->swap_lock); 69 69 70 70 if (!map->cleared) { 71 71 if (depth == 0) ··· 136 136 } 137 137 138 138 for (i = 0; i < sb->map_nr; i++) 139 - spin_lock_init(&sb->map[i].swap_lock); 139 + raw_spin_lock_init(&sb->map[i].swap_lock); 140 140 141 141 return 0; 142 142 }