Merge tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull more block updates from Jens Axboe:

- Fix partial IOVA mapping cleanup in error handling

- Minor prep series ignoring discard return value, as
the inline value is always known

- Ensure BLK_FEAT_STABLE_WRITES is set for drbd

- Fix leak of folio in bio_iov_iter_bounce_read()

- Allow IOC_PR_READ_* for read-only open

- Another debugfs deadlock fix

- A few doc updates

* tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
blk-mq: use NOIO context to prevent deadlock during debugfs creation
blk-stat: convert struct blk_stat_callback to kernel-doc
block: fix enum descriptions kernel-doc
block: update docs for bio and bvec_iter
block: change return type to void
nvmet: ignore discard return value
md: ignore discard return value
block: fix partial IOVA mapping cleanup in blk_rq_dma_map_iova
block: fix folio leak in bio_iov_iter_bounce_read()
block: allow IOC_PR_READ_* ioctls with BLK_OPEN_READ
drbd: always set BLK_FEAT_STABLE_WRITES

Linus Torvalds 1 month ago 99dfe2d4 7b751b01

+181 -104

18 changed files

expand all collapse all

block

bio.c

blk-lib.c

blk-mq-debugfs.c

blk-mq-dma.c

blk-mq-sched.c

blk-stat.h

blk-sysfs.c

blk-wbt.c

blk.h

ioctl.c

drivers

block

drbd

drbd_main.c

drbd_nl.c

md.c

nvme

target

io-cmd-bdev.c

include

linux

blk_types.h

blkdev.h

bvec.h

kernel

trace

blktrace.c

+3 -1

block/bio.c

reviewed

··· 1382 1382 ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len, 1383 1383 &bio->bi_vcnt, bio->bi_max_vecs - 1, 0); 1384 1384 if (ret <= 0) { 1385 1385 - if (!bio->bi_vcnt) 1385 1385 + if (!bio->bi_vcnt) { 1386 1386 + folio_put(folio); 1386 1387 return ret; 1388 1388 + } 1387 1389 break; 1388 1390 } 1389 1391 len -= ret;

+1 -2

block/blk-lib.c

reviewed

··· 60 60 return bio; 61 61 } 62 62 63 63 - int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 63 63 + void __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 64 64 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) 65 65 { 66 66 struct bio *bio; ··· 68 68 while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects, 69 69 gfp_mask))) 70 70 *biop = bio_chain_and_submit(*biop, bio); 71 71 - return 0; 72 71 } 73 72 EXPORT_SYMBOL(__blkdev_issue_discard); 74 73

+3 -7

block/blk-mq-debugfs.c

reviewed

··· 614 614 { 615 615 lockdep_assert_held(&q->debugfs_mutex); 616 616 /* 617 617 - * Creating new debugfs entries with queue freezed has the risk of 618 618 - * deadlock. 619 619 - */ 620 620 - WARN_ON_ONCE(q->mq_freeze_depth != 0); 621 621 - /* 622 617 * debugfs_mutex should not be nested under other locks that can be 623 618 * grabbed while queue is frozen. 624 619 */ ··· 688 693 void blk_mq_debugfs_register_hctxs(struct request_queue *q) 689 694 { 690 695 struct blk_mq_hw_ctx *hctx; 696 696 + unsigned int memflags; 691 697 unsigned long i; 692 698 693 693 - mutex_lock(&q->debugfs_mutex); 699 699 + memflags = blk_debugfs_lock(q); 694 700 queue_for_each_hw_ctx(q, hctx, i) 695 701 blk_mq_debugfs_register_hctx(q, hctx); 696 696 - mutex_unlock(&q->debugfs_mutex); 702 702 + blk_debugfs_unlock(q, memflags); 697 703 } 698 704 699 705 void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)

+8 -5

block/blk-mq-dma.c

reviewed

··· 121 121 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 122 122 vec->len, dir, attrs); 123 123 if (error) 124 124 - break; 124 124 + goto out_unlink; 125 125 mapped += vec->len; 126 126 } while (blk_map_iter_next(req, &iter->iter, vec)); 127 127 128 128 error = dma_iova_sync(dma_dev, state, 0, mapped); 129 129 - if (error) { 130 130 - iter->status = errno_to_blk_status(error); 131 131 - return false; 132 132 - } 129 129 + if (error) 130 130 + goto out_unlink; 133 131 134 132 return true; 133 133 + 134 134 + out_unlink: 135 135 + dma_iova_destroy(dma_dev, state, mapped, dir, attrs); 136 136 + iter->status = errno_to_blk_status(error); 137 137 + return false; 135 138 } 136 139 137 140 static inline void blk_rq_map_iter_init(struct request *rq,

+5 -4

block/blk-mq-sched.c

reviewed

··· 390 390 void blk_mq_sched_reg_debugfs(struct request_queue *q) 391 391 { 392 392 struct blk_mq_hw_ctx *hctx; 393 393 + unsigned int memflags; 393 394 unsigned long i; 394 395 395 395 - mutex_lock(&q->debugfs_mutex); 396 396 + memflags = blk_debugfs_lock(q); 396 397 blk_mq_debugfs_register_sched(q); 397 398 queue_for_each_hw_ctx(q, hctx, i) 398 399 blk_mq_debugfs_register_sched_hctx(q, hctx); 399 399 - mutex_unlock(&q->debugfs_mutex); 400 400 + blk_debugfs_unlock(q, memflags); 400 401 } 401 402 402 403 void blk_mq_sched_unreg_debugfs(struct request_queue *q) ··· 405 404 struct blk_mq_hw_ctx *hctx; 406 405 unsigned long i; 407 406 408 408 - mutex_lock(&q->debugfs_mutex); 407 407 + blk_debugfs_lock_nomemsave(q); 409 408 queue_for_each_hw_ctx(q, hctx, i) 410 409 blk_mq_debugfs_unregister_sched_hctx(hctx); 411 410 blk_mq_debugfs_unregister_sched(q); 412 412 - mutex_unlock(&q->debugfs_mutex); 411 411 + blk_debugfs_unlock_nomemrestore(q); 413 412 } 414 413 415 414 void blk_mq_free_sched_tags(struct elevator_tags *et,

+7 -2

block/blk-stat.h

reviewed

··· 17 17 * timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked. 18 18 */ 19 19 struct blk_stat_callback { 20 20 - /* 20 20 + /** 21 21 * @list: RCU list of callbacks for a &struct request_queue. 22 22 */ 23 23 struct list_head list; ··· 50 50 struct blk_rq_stat *stat; 51 51 52 52 /** 53 53 - * @fn: Callback function. 53 53 + * @timer_fn: Callback function. 54 54 */ 55 55 void (*timer_fn)(struct blk_stat_callback *); 56 56 ··· 59 59 */ 60 60 void *data; 61 61 62 62 + /** 63 63 + * @rcu: rcu list head 64 64 + */ 62 65 struct rcu_head rcu; 63 66 }; 64 67 ··· 129 126 * blk_stat_is_active() - Check if a block statistics callback is currently 130 127 * gathering statistics. 131 128 * @cb: The callback. 129 129 + * 130 130 + * Returns: %true iff the callback is active. 132 131 */ 133 132 static inline bool blk_stat_is_active(struct blk_stat_callback *cb) 134 133 {

+5 -4

block/blk-sysfs.c

reviewed

··· 892 892 { 893 893 struct request_queue *q = disk->queue; 894 894 895 895 - mutex_lock(&q->debugfs_mutex); 895 895 + blk_debugfs_lock_nomemsave(q); 896 896 blk_trace_shutdown(q); 897 897 debugfs_remove_recursive(q->debugfs_dir); 898 898 q->debugfs_dir = NULL; 899 899 q->sched_debugfs_dir = NULL; 900 900 q->rqos_debugfs_dir = NULL; 901 901 - mutex_unlock(&q->debugfs_mutex); 901 901 + blk_debugfs_unlock_nomemrestore(q); 902 902 } 903 903 904 904 /** ··· 908 908 int blk_register_queue(struct gendisk *disk) 909 909 { 910 910 struct request_queue *q = disk->queue; 911 911 + unsigned int memflags; 911 912 int ret; 912 913 913 914 ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); ··· 922 921 } 923 922 mutex_lock(&q->sysfs_lock); 924 923 925 925 - mutex_lock(&q->debugfs_mutex); 924 924 + memflags = blk_debugfs_lock(q); 926 925 q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); 927 926 if (queue_is_mq(q)) 928 927 blk_mq_debugfs_register(q); 929 929 - mutex_unlock(&q->debugfs_mutex); 928 928 + blk_debugfs_unlock(q, memflags); 930 929 931 930 ret = disk_register_independent_access_ranges(disk); 932 931 if (ret)

+6 -4

block/blk-wbt.c

reviewed

··· 776 776 { 777 777 struct request_queue *q = disk->queue; 778 778 struct rq_wb *rwb; 779 779 + unsigned int memflags; 779 780 780 781 if (!__wbt_enable_default(disk)) 781 782 return; ··· 790 789 return; 791 790 } 792 791 793 793 - mutex_lock(&q->debugfs_mutex); 792 792 + memflags = blk_debugfs_lock(q); 794 793 blk_mq_debugfs_register_rq_qos(q); 795 795 - mutex_unlock(&q->debugfs_mutex); 794 794 + blk_debugfs_unlock(q, memflags); 796 795 } 797 796 798 797 static u64 wbt_default_latency_nsec(struct request_queue *q) ··· 1016 1015 blk_mq_unquiesce_queue(q); 1017 1016 out: 1018 1017 blk_mq_unfreeze_queue(q, memflags); 1019 1019 - mutex_lock(&q->debugfs_mutex); 1018 1018 + 1019 1019 + memflags = blk_debugfs_lock(q); 1020 1020 blk_mq_debugfs_register_rq_qos(q); 1021 1021 - mutex_unlock(&q->debugfs_mutex); 1021 1021 + blk_debugfs_unlock(q, memflags); 1022 1022 1023 1023 return ret; 1024 1024 }

+31

block/blk.h

reviewed

··· 729 729 } 730 730 #endif 731 731 732 732 + /* 733 733 + * debugfs directory and file creation can trigger fs reclaim, which can enter 734 734 + * back into the block layer request_queue. This can cause deadlock if the 735 735 + * queue is frozen. Use NOIO context together with debugfs_mutex to prevent fs 736 736 + * reclaim from triggering block I/O. 737 737 + */ 738 738 + static inline void blk_debugfs_lock_nomemsave(struct request_queue *q) 739 739 + { 740 740 + mutex_lock(&q->debugfs_mutex); 741 741 + } 742 742 + 743 743 + static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q) 744 744 + { 745 745 + mutex_unlock(&q->debugfs_mutex); 746 746 + } 747 747 + 748 748 + static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q) 749 749 + { 750 750 + unsigned int memflags = memalloc_noio_save(); 751 751 + 752 752 + blk_debugfs_lock_nomemsave(q); 753 753 + return memflags; 754 754 + } 755 755 + 756 756 + static inline void blk_debugfs_unlock(struct request_queue *q, 757 757 + unsigned int memflags) 758 758 + { 759 759 + blk_debugfs_unlock_nomemrestore(q); 760 760 + memalloc_noio_restore(memflags); 761 761 + } 762 762 + 732 763 #endif /* BLK_INTERNAL_H */

+23 -11

block/ioctl.c

reviewed

··· 318 318 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 319 319 #endif 320 320 321 321 - static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 321 321 + enum pr_direction { 322 322 + PR_IN, /* read from device */ 323 323 + PR_OUT, /* write to device */ 324 324 + }; 325 325 + 326 326 + static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode, 327 327 + enum pr_direction dir) 322 328 { 323 329 /* no sense to make reservations for partitions */ 324 330 if (bdev_is_partition(bdev)) ··· 332 326 333 327 if (capable(CAP_SYS_ADMIN)) 334 328 return true; 329 329 + 335 330 /* 336 336 - * Only allow unprivileged reservations if the file descriptor is open 337 337 - * for writing. 331 331 + * Only allow unprivileged reservation _out_ commands if the file 332 332 + * descriptor is open for writing. Allow reservation _in_ commands if 333 333 + * the file descriptor is open for reading since they do not modify the 334 334 + * device. 338 335 */ 339 339 - return mode & BLK_OPEN_WRITE; 336 336 + if (dir == PR_IN) 337 337 + return mode & BLK_OPEN_READ; 338 338 + else 339 339 + return mode & BLK_OPEN_WRITE; 340 340 } 341 341 342 342 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, ··· 351 339 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 352 340 struct pr_registration reg; 353 341 354 354 - if (!blkdev_pr_allowed(bdev, mode)) 342 342 + if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 355 343 return -EPERM; 356 344 if (!ops || !ops->pr_register) 357 345 return -EOPNOTSUPP; ··· 369 357 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 370 358 struct pr_reservation rsv; 371 359 372 372 - if (!blkdev_pr_allowed(bdev, mode)) 360 360 + if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 373 361 return -EPERM; 374 362 if (!ops || !ops->pr_reserve) 375 363 return -EOPNOTSUPP; ··· 387 375 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 388 376 struct pr_reservation rsv; 389 377 390 390 - if (!blkdev_pr_allowed(bdev, mode)) 378 378 + if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 391 379 return -EPERM; 392 380 if (!ops || !ops->pr_release) 393 381 return -EOPNOTSUPP; ··· 405 393 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 406 394 struct pr_preempt p; 407 395 408 408 - if (!blkdev_pr_allowed(bdev, mode)) 396 396 + if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 409 397 return -EPERM; 410 398 if (!ops || !ops->pr_preempt) 411 399 return -EOPNOTSUPP; ··· 423 411 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 424 412 struct pr_clear c; 425 413 426 426 - if (!blkdev_pr_allowed(bdev, mode)) 414 414 + if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 427 415 return -EPERM; 428 416 if (!ops || !ops->pr_clear) 429 417 return -EOPNOTSUPP; ··· 446 434 size_t keys_copy_len; 447 435 int ret; 448 436 449 449 - if (!blkdev_pr_allowed(bdev, mode)) 437 437 + if (!blkdev_pr_allowed(bdev, mode, PR_IN)) 450 438 return -EPERM; 451 439 if (!ops || !ops->pr_read_keys) 452 440 return -EOPNOTSUPP; ··· 498 486 struct pr_read_reservation out = {}; 499 487 int ret; 500 488 501 501 - if (!blkdev_pr_allowed(bdev, mode)) 489 489 + if (!blkdev_pr_allowed(bdev, mode, PR_IN)) 502 490 return -EPERM; 503 491 if (!ops || !ops->pr_read_reservation) 504 492 return -EOPNOTSUPP;

-3

drivers/block/drbd/drbd_main.c

reviewed

··· 2659 2659 * connect. 2660 2660 */ 2661 2661 .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, 2662 2662 - .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | 2663 2663 - BLK_FEAT_ROTATIONAL | 2664 2664 - BLK_FEAT_STABLE_WRITES, 2665 2662 }; 2666 2663 2667 2664 device = minor_to_device(minor);

+19 -1

drivers/block/drbd/drbd_nl.c

reviewed

··· 1296 1296 lim.max_segments = drbd_backing_dev_max_segments(device); 1297 1297 } else { 1298 1298 lim.max_segments = BLK_MAX_SEGMENTS; 1299 1299 + lim.features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | 1300 1300 + BLK_FEAT_ROTATIONAL | BLK_FEAT_STABLE_WRITES; 1299 1301 } 1300 1302 1301 1303 lim.max_hw_sectors = new >> SECTOR_SHIFT; ··· 1320 1318 lim.max_hw_discard_sectors = 0; 1321 1319 } 1322 1320 1323 1323 - if (bdev) 1321 1321 + if (bdev) { 1324 1322 blk_stack_limits(&lim, &b->limits, 0); 1323 1323 + /* 1324 1324 + * blk_set_stacking_limits() cleared the features, and 1325 1325 + * blk_stack_limits() may or may not have inherited 1326 1326 + * BLK_FEAT_STABLE_WRITES from the backing device. 1327 1327 + * 1328 1328 + * DRBD always requires stable writes because: 1329 1329 + * 1. The same bio data is read for both local disk I/O and 1330 1330 + * network transmission. If the page changes mid-flight, 1331 1331 + * the local and remote copies could diverge. 1332 1332 + * 2. When data integrity is enabled, DRBD calculates a 1333 1333 + * checksum before sending the data. If the page changes 1334 1334 + * between checksum calculation and transmission, the 1335 1335 + * receiver will detect a checksum mismatch. 1336 1336 + */ 1337 1337 + lim.features |= BLK_FEAT_STABLE_WRITES; 1338 1338 + } 1325 1339 1326 1340 /* 1327 1341 * If we can handle "zeroes" efficiently on the protocol, we want to do

+2 -2

drivers/md/md.c

reviewed

··· 9179 9179 { 9180 9180 struct bio *discard_bio = NULL; 9181 9181 9182 9182 - if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 9183 9183 - &discard_bio) || !discard_bio) 9182 9182 + __blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, &discard_bio); 9183 9183 + if (!discard_bio) 9184 9184 return; 9185 9185 9186 9186 bio_chain(discard_bio, bio);

+7 -21

drivers/nvme/target/io-cmd-bdev.c

reviewed

··· 363 363 return 0; 364 364 } 365 365 366 366 - static u16 nvmet_bdev_discard_range(struct nvmet_req *req, 367 367 - struct nvme_dsm_range *range, struct bio **bio) 368 368 - { 369 369 - struct nvmet_ns *ns = req->ns; 370 370 - int ret; 371 371 - 372 372 - ret = __blkdev_issue_discard(ns->bdev, 373 373 - nvmet_lba_to_sect(ns, range->slba), 374 374 - le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 375 375 - GFP_KERNEL, bio); 376 376 - if (ret && ret != -EOPNOTSUPP) { 377 377 - req->error_slba = le64_to_cpu(range->slba); 378 378 - return errno_to_nvme_status(req, ret); 379 379 - } 380 380 - return NVME_SC_SUCCESS; 381 381 - } 382 382 - 383 366 static void nvmet_bdev_execute_discard(struct nvmet_req *req) 384 367 { 368 368 + struct nvmet_ns *ns = req->ns; 385 369 struct nvme_dsm_range range; 386 370 struct bio *bio = NULL; 371 371 + sector_t nr_sects; 387 372 int i; 388 388 - u16 status; 373 373 + u16 status = NVME_SC_SUCCESS; 389 374 390 375 for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { 391 376 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, ··· 378 393 if (status) 379 394 break; 380 395 381 381 - status = nvmet_bdev_discard_range(req, &range, &bio); 382 382 - if (status) 383 383 - break; 396 396 + nr_sects = le32_to_cpu(range.nlb) << (ns->blksize_shift - 9); 397 397 + __blkdev_issue_discard(ns->bdev, 398 398 + nvmet_lba_to_sect(ns, range.slba), nr_sects, 399 399 + GFP_KERNEL, &bio); 384 400 } 385 401 386 402 if (bio) {

+20 -13

include/linux/blk_types.h

reviewed

··· 273 273 * Everything starting with bi_max_vecs will be preserved by bio_reset() 274 274 */ 275 275 276 276 - unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ 276 276 + /* 277 277 + * Number of elements in `bi_io_vec` that were allocated for this bio. 278 278 + * Only used by the bio submitter to make `bio_add_page` fail once full 279 279 + * and to free the `bi_io_vec` allocation. Must not be used in drivers 280 280 + * and does not hold a useful value for cloned bios. 281 281 + */ 282 282 + unsigned short bi_max_vecs; 277 283 278 284 atomic_t __bi_cnt; /* pin count */ 279 285 ··· 345 339 * meaning. 346 340 */ 347 341 enum req_op { 348 348 - /* read sectors from the device */ 342 342 + /** @REQ_OP_READ: read sectors from the device */ 349 343 REQ_OP_READ = (__force blk_opf_t)0, 350 350 - /* write sectors to the device */ 344 344 + /** @REQ_OP_WRITE: write sectors to the device */ 351 345 REQ_OP_WRITE = (__force blk_opf_t)1, 352 352 - /* flush the volatile write cache */ 346 346 + /** @REQ_OP_FLUSH: flush the volatile write cache */ 353 347 REQ_OP_FLUSH = (__force blk_opf_t)2, 354 354 - /* discard sectors */ 348 348 + /** @REQ_OP_DISCARD: discard sectors */ 355 349 REQ_OP_DISCARD = (__force blk_opf_t)3, 356 356 - /* securely erase sectors */ 350 350 + /** @REQ_OP_SECURE_ERASE: securely erase sectors */ 357 351 REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, 358 358 - /* write data at the current zone write pointer */ 352 352 + /** @REQ_OP_ZONE_APPEND: write data at the current zone write pointer */ 359 353 REQ_OP_ZONE_APPEND = (__force blk_opf_t)7, 360 360 - /* write the zero filled sector many times */ 354 354 + /** @REQ_OP_WRITE_ZEROES: write the zero filled sector many times */ 361 355 REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, 362 362 - /* Open a zone */ 356 356 + /** @REQ_OP_ZONE_OPEN: Open a zone */ 363 357 REQ_OP_ZONE_OPEN = (__force blk_opf_t)11, 364 364 - /* Close a zone */ 358 358 + /** @REQ_OP_ZONE_CLOSE: Close a zone */ 365 359 REQ_OP_ZONE_CLOSE = (__force blk_opf_t)13, 366 366 - /* Transition a zone to full */ 360 360 + /** @REQ_OP_ZONE_FINISH: Transition a zone to full */ 367 361 REQ_OP_ZONE_FINISH = (__force blk_opf_t)15, 368 368 - /* reset a zone write pointer */ 362 362 + /** @REQ_OP_ZONE_RESET: reset a zone write pointer */ 369 363 REQ_OP_ZONE_RESET = (__force blk_opf_t)17, 370 370 - /* reset all the zone present on the device */ 364 364 + /** @REQ_OP_ZONE_RESET_ALL: reset all the zone present on the device */ 371 365 REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)19, 372 366 373 367 /* Driver private requests */ 368 368 + /* private: */ 374 369 REQ_OP_DRV_IN = (__force blk_opf_t)34, 375 370 REQ_OP_DRV_OUT = (__force blk_opf_t)35, 376 371

+1 -1

include/linux/blkdev.h

reviewed

··· 1259 1259 1260 1260 int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1261 1261 sector_t nr_sects, gfp_t gfp_mask); 1262 1262 - int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1262 1262 + void __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1263 1263 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop); 1264 1264 int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, 1265 1265 sector_t nr_sects, gfp_t gfp);

+19 -6

include/linux/bvec.h

reviewed

··· 75 75 } 76 76 77 77 struct bvec_iter { 78 78 - sector_t bi_sector; /* device address in 512 byte 79 79 - sectors */ 80 80 - unsigned int bi_size; /* residual I/O count */ 78 78 + /* 79 79 + * Current device address in 512 byte sectors. Only updated by the bio 80 80 + * iter wrappers and not the bvec iterator helpers themselves. 81 81 + */ 82 82 + sector_t bi_sector; 81 83 82 82 - unsigned int bi_idx; /* current index into bvl_vec */ 84 84 + /* 85 85 + * Remaining size in bytes. 86 86 + */ 87 87 + unsigned int bi_size; 83 88 84 84 - unsigned int bi_bvec_done; /* number of bytes completed in 85 85 - current bvec */ 89 89 + /* 90 90 + * Current index into the bvec array. This indexes into `bi_io_vec` when 91 91 + * iterating a bvec array that is part of a `bio`. 92 92 + */ 93 93 + unsigned int bi_idx; 94 94 + 95 95 + /* 96 96 + * Current offset in the bvec entry pointed to by `bi_idx`. 97 97 + */ 98 98 + unsigned int bi_bvec_done; 86 99 } __packed __aligned(4); 87 100 88 101 struct bvec_iter_all {

+21 -17

kernel/trace/blktrace.c

reviewed

··· 559 559 { 560 560 int ret; 561 561 562 562 - mutex_lock(&q->debugfs_mutex); 562 562 + blk_debugfs_lock_nomemsave(q); 563 563 ret = __blk_trace_remove(q); 564 564 - mutex_unlock(&q->debugfs_mutex); 564 564 + blk_debugfs_unlock_nomemrestore(q); 565 565 566 566 return ret; 567 567 } ··· 767 767 struct blk_user_trace_setup2 buts2; 768 768 struct blk_user_trace_setup buts; 769 769 struct blk_trace *bt; 770 770 + unsigned int memflags; 770 771 int ret; 771 772 772 773 ret = copy_from_user(&buts, arg, sizeof(buts)); ··· 786 785 .pid = buts.pid, 787 786 }; 788 787 789 789 - mutex_lock(&q->debugfs_mutex); 788 788 + memflags = blk_debugfs_lock(q); 790 789 bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr, 791 790 bdev); 792 791 if (IS_ERR(bt)) { 793 793 - mutex_unlock(&q->debugfs_mutex); 792 792 + blk_debugfs_unlock(q, memflags); 794 793 return PTR_ERR(bt); 795 794 } 796 795 blk_trace_setup_finalize(q, name, 1, bt, &buts2); 797 796 strscpy(buts.name, buts2.name, BLKTRACE_BDEV_SIZE); 798 798 - mutex_unlock(&q->debugfs_mutex); 797 797 + blk_debugfs_unlock(q, memflags); 799 798 800 799 if (copy_to_user(arg, &buts, sizeof(buts))) { 801 800 blk_trace_remove(q); ··· 810 809 { 811 810 struct blk_user_trace_setup2 buts2; 812 811 struct blk_trace *bt; 812 812 + unsigned int memflags; 813 813 814 814 if (copy_from_user(&buts2, arg, sizeof(buts2))) 815 815 return -EFAULT; ··· 821 819 if (buts2.flags != 0) 822 820 return -EINVAL; 823 821 824 824 - mutex_lock(&q->debugfs_mutex); 822 822 + memflags = blk_debugfs_lock(q); 825 823 bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr, 826 824 bdev); 827 825 if (IS_ERR(bt)) { 828 828 - mutex_unlock(&q->debugfs_mutex); 826 826 + blk_debugfs_unlock(q, memflags); 829 827 return PTR_ERR(bt); 830 828 } 831 829 blk_trace_setup_finalize(q, name, 2, bt, &buts2); 832 832 - mutex_unlock(&q->debugfs_mutex); 830 830 + blk_debugfs_unlock(q, memflags); 833 831 834 832 if (copy_to_user(arg, &buts2, sizeof(buts2))) { 835 833 blk_trace_remove(q); ··· 846 844 struct blk_user_trace_setup2 buts2; 847 845 struct compat_blk_user_trace_setup cbuts; 848 846 struct blk_trace *bt; 847 847 + unsigned int memflags; 849 848 850 849 if (copy_from_user(&cbuts, arg, sizeof(cbuts))) 851 850 return -EFAULT; ··· 863 860 .pid = cbuts.pid, 864 861 }; 865 862 866 866 - mutex_lock(&q->debugfs_mutex); 863 863 + memflags = blk_debugfs_lock(q); 867 864 bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr, 868 865 bdev); 869 866 if (IS_ERR(bt)) { 870 870 - mutex_unlock(&q->debugfs_mutex); 867 867 + blk_debugfs_unlock(q, memflags); 871 868 return PTR_ERR(bt); 872 869 } 873 870 blk_trace_setup_finalize(q, name, 1, bt, &buts2); 874 874 - mutex_unlock(&q->debugfs_mutex); 871 871 + blk_debugfs_unlock(q, memflags); 875 872 876 873 if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) { 877 874 blk_trace_remove(q); ··· 901 898 { 902 899 int ret; 903 900 904 904 - mutex_lock(&q->debugfs_mutex); 901 901 + blk_debugfs_lock_nomemsave(q); 905 902 ret = __blk_trace_startstop(q, start); 906 906 - mutex_unlock(&q->debugfs_mutex); 903 903 + blk_debugfs_unlock_nomemrestore(q); 907 904 908 905 return ret; 909 906 } ··· 2044 2041 struct blk_trace *bt; 2045 2042 ssize_t ret = -ENXIO; 2046 2043 2047 2047 - mutex_lock(&q->debugfs_mutex); 2044 2044 + blk_debugfs_lock_nomemsave(q); 2048 2045 2049 2046 bt = rcu_dereference_protected(q->blk_trace, 2050 2047 lockdep_is_held(&q->debugfs_mutex)); ··· 2065 2062 ret = sprintf(buf, "%llu\n", bt->end_lba); 2066 2063 2067 2064 out_unlock_bdev: 2068 2068 - mutex_unlock(&q->debugfs_mutex); 2065 2065 + blk_debugfs_unlock_nomemrestore(q); 2069 2066 return ret; 2070 2067 } 2071 2068 ··· 2076 2073 struct block_device *bdev = dev_to_bdev(dev); 2077 2074 struct request_queue *q = bdev_get_queue(bdev); 2078 2075 struct blk_trace *bt; 2076 2076 + unsigned int memflags; 2079 2077 u64 value; 2080 2078 ssize_t ret = -EINVAL; 2081 2079 ··· 2096 2092 goto out; 2097 2093 } 2098 2094 2099 2099 - mutex_lock(&q->debugfs_mutex); 2095 2095 + memflags = blk_debugfs_lock(q); 2100 2096 2101 2097 bt = rcu_dereference_protected(q->blk_trace, 2102 2098 lockdep_is_held(&q->debugfs_mutex)); ··· 2131 2127 } 2132 2128 2133 2129 out_unlock_bdev: 2134 2134 - mutex_unlock(&q->debugfs_mutex); 2130 2130 + blk_debugfs_unlock(q, memflags); 2135 2131 out: 2136 2132 return ret ? ret : count; 2137 2133 }