Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: rework bio splitting

The current setup with bio_may_exceed_limit and __bio_split_to_limits
is a bit of a mess.

Change it so that __bio_split_to_limits does all the work and is just
a variant of bio_split_to_limits that returns nr_segs. This is done
by inlining it and instead have the various bio_split_* helpers directly
submit the potentially split bios.

To support btrfs, the rw version has a lower level helper split out
that just returns the offset to split. This turns out to nicely clean
up the btrfs flow as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Link: https://lore.kernel.org/r/20240826173820.1690925-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Christoph Hellwig and committed by
Jens Axboe
b35243a4 f6f84be0

+128 -132
+56 -90
block/blk-merge.c
··· 105 105 return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT; 106 106 } 107 107 108 - static struct bio *bio_split_discard(struct bio *bio, 109 - const struct queue_limits *lim, 110 - unsigned *nsegs, struct bio_set *bs) 108 + static struct bio *bio_submit_split(struct bio *bio, int split_sectors) 109 + { 110 + if (unlikely(split_sectors < 0)) { 111 + bio->bi_status = errno_to_blk_status(split_sectors); 112 + bio_endio(bio); 113 + return NULL; 114 + } 115 + 116 + if (split_sectors) { 117 + struct bio *split; 118 + 119 + split = bio_split(bio, split_sectors, GFP_NOIO, 120 + &bio->bi_bdev->bd_disk->bio_split); 121 + split->bi_opf |= REQ_NOMERGE; 122 + blkcg_bio_issue_init(split); 123 + bio_chain(split, bio); 124 + trace_block_split(split, bio->bi_iter.bi_sector); 125 + WARN_ON_ONCE(bio_zone_write_plugging(bio)); 126 + submit_bio_noacct(bio); 127 + return split; 128 + } 129 + 130 + return bio; 131 + } 132 + 133 + struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, 134 + unsigned *nsegs) 111 135 { 112 136 unsigned int max_discard_sectors, granularity; 113 137 sector_t tmp; ··· 145 121 min(lim->max_discard_sectors, bio_allowed_max_sectors(lim)); 146 122 max_discard_sectors -= max_discard_sectors % granularity; 147 123 if (unlikely(!max_discard_sectors)) 148 - return NULL; 124 + return bio; 149 125 150 126 if (bio_sectors(bio) <= max_discard_sectors) 151 - return NULL; 127 + return bio; 152 128 153 129 split_sectors = max_discard_sectors; 154 130 ··· 163 139 if (split_sectors > tmp) 164 140 split_sectors -= tmp; 165 141 166 - return bio_split(bio, split_sectors, GFP_NOIO, bs); 142 + return bio_submit_split(bio, split_sectors); 167 143 } 168 144 169 - static struct bio *bio_split_write_zeroes(struct bio *bio, 170 - const struct queue_limits *lim, 171 - unsigned *nsegs, struct bio_set *bs) 145 + struct bio *bio_split_write_zeroes(struct bio *bio, 146 + const struct queue_limits *lim, unsigned *nsegs) 172 147 { 173 148 *nsegs = 0; 174 149 if (!lim->max_write_zeroes_sectors) 175 - return NULL; 150 + return bio; 176 151 if (bio_sectors(bio) <= lim->max_write_zeroes_sectors) 177 - return NULL; 178 - return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs); 152 + return bio; 153 + return bio_submit_split(bio, lim->max_write_zeroes_sectors); 179 154 } 180 155 181 156 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim, ··· 297 274 } 298 275 299 276 /** 300 - * bio_split_rw - split a bio in two bios 277 + * bio_split_rw_at - check if and where to split a read/write bio 301 278 * @bio: [in] bio to be split 302 279 * @lim: [in] queue limits to split based on 303 280 * @segs: [out] number of segments in the bio with the first half of the sectors 304 - * @bs: [in] bio set to allocate the clone from 305 281 * @max_bytes: [in] maximum number of bytes per bio 306 282 * 307 - * Clone @bio, update the bi_iter of the clone to represent the first sectors 308 - * of @bio and update @bio->bi_iter to represent the remaining sectors. The 309 - * following is guaranteed for the cloned bio: 310 - * - That it has at most @max_bytes worth of data 311 - * - That it has at most queue_max_segments(@q) segments. 312 - * 313 - * Except for discard requests the cloned bio will point at the bi_io_vec of 314 - * the original bio. It is the responsibility of the caller to ensure that the 315 - * original bio is not freed before the cloned bio. The caller is also 316 - * responsible for ensuring that @bs is only destroyed after processing of the 317 - * split bio has finished. 283 + * Find out if @bio needs to be split to fit the queue limits in @lim and a 284 + * maximum size of @max_bytes. Returns a negative error number if @bio can't be 285 + * split, 0 if the bio doesn't have to be split, or a positive sector offset if 286 + * @bio needs to be split. 318 287 */ 319 - struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, 320 - unsigned *segs, struct bio_set *bs, unsigned max_bytes) 288 + int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, 289 + unsigned *segs, unsigned max_bytes) 321 290 { 322 291 struct bio_vec bv, bvprv, *bvprvp = NULL; 323 292 struct bvec_iter iter; ··· 339 324 } 340 325 341 326 *segs = nsegs; 342 - return NULL; 327 + return 0; 343 328 split: 344 - if (bio->bi_opf & REQ_ATOMIC) { 345 - bio->bi_status = BLK_STS_INVAL; 346 - bio_endio(bio); 347 - return ERR_PTR(-EINVAL); 348 - } 329 + if (bio->bi_opf & REQ_ATOMIC) 330 + return -EINVAL; 331 + 349 332 /* 350 333 * We can't sanely support splitting for a REQ_NOWAIT bio. End it 351 334 * with EAGAIN if splitting is required and return an error pointer. 352 335 */ 353 - if (bio->bi_opf & REQ_NOWAIT) { 354 - bio->bi_status = BLK_STS_AGAIN; 355 - bio_endio(bio); 356 - return ERR_PTR(-EAGAIN); 357 - } 336 + if (bio->bi_opf & REQ_NOWAIT) 337 + return -EAGAIN; 358 338 359 339 *segs = nsegs; 360 340 ··· 366 356 * big IO can be trival, disable iopoll when split needed. 367 357 */ 368 358 bio_clear_polled(bio); 369 - return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs); 359 + return bytes >> SECTOR_SHIFT; 370 360 } 371 - EXPORT_SYMBOL_GPL(bio_split_rw); 361 + EXPORT_SYMBOL_GPL(bio_split_rw_at); 372 362 373 - /** 374 - * __bio_split_to_limits - split a bio to fit the queue limits 375 - * @bio: bio to be split 376 - * @lim: queue limits to split based on 377 - * @nr_segs: returns the number of segments in the returned bio 378 - * 379 - * Check if @bio needs splitting based on the queue limits, and if so split off 380 - * a bio fitting the limits from the beginning of @bio and return it. @bio is 381 - * shortened to the remainder and re-submitted. 382 - * 383 - * The split bio is allocated from @q->bio_split, which is provided by the 384 - * block layer. 385 - */ 386 - struct bio *__bio_split_to_limits(struct bio *bio, 387 - const struct queue_limits *lim, 388 - unsigned int *nr_segs) 363 + struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, 364 + unsigned *nr_segs) 389 365 { 390 - struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split; 391 - struct bio *split; 392 - 393 - switch (bio_op(bio)) { 394 - case REQ_OP_DISCARD: 395 - case REQ_OP_SECURE_ERASE: 396 - split = bio_split_discard(bio, lim, nr_segs, bs); 397 - break; 398 - case REQ_OP_WRITE_ZEROES: 399 - split = bio_split_write_zeroes(bio, lim, nr_segs, bs); 400 - break; 401 - default: 402 - split = bio_split_rw(bio, lim, nr_segs, bs, 403 - get_max_io_size(bio, lim) << SECTOR_SHIFT); 404 - if (IS_ERR(split)) 405 - return NULL; 406 - break; 407 - } 408 - 409 - if (split) { 410 - /* there isn't chance to merge the split bio */ 411 - split->bi_opf |= REQ_NOMERGE; 412 - 413 - blkcg_bio_issue_init(split); 414 - bio_chain(split, bio); 415 - trace_block_split(split, bio->bi_iter.bi_sector); 416 - WARN_ON_ONCE(bio_zone_write_plugging(bio)); 417 - submit_bio_noacct(bio); 418 - return split; 419 - } 420 - return bio; 366 + return bio_submit_split(bio, 367 + bio_split_rw_at(bio, lim, nr_segs, 368 + get_max_io_size(bio, lim) << SECTOR_SHIFT)); 421 369 } 422 370 423 371 /** ··· 394 426 const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits; 395 427 unsigned int nr_segs; 396 428 397 - if (bio_may_exceed_limits(bio, lim)) 398 - return __bio_split_to_limits(bio, lim, &nr_segs); 399 - return bio; 429 + return __bio_split_to_limits(bio, lim, &nr_segs); 400 430 } 401 431 EXPORT_SYMBOL(bio_split_to_limits); 402 432
+5 -6
block/blk-mq.c
··· 2939 2939 struct blk_plug *plug = current->plug; 2940 2940 const int is_sync = op_is_sync(bio->bi_opf); 2941 2941 struct blk_mq_hw_ctx *hctx; 2942 - unsigned int nr_segs = 1; 2942 + unsigned int nr_segs; 2943 2943 struct request *rq; 2944 2944 blk_status_t ret; 2945 2945 ··· 2981 2981 goto queue_exit; 2982 2982 } 2983 2983 2984 - if (unlikely(bio_may_exceed_limits(bio, &q->limits))) { 2985 - bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); 2986 - if (!bio) 2987 - goto queue_exit; 2988 - } 2984 + bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); 2985 + if (!bio) 2986 + goto queue_exit; 2987 + 2989 2988 if (!bio_integrity_prep(bio)) 2990 2989 goto queue_exit; 2991 2990
+47 -22
block/blk.h
··· 331 331 ssize_t part_timeout_store(struct device *, struct device_attribute *, 332 332 const char *, size_t); 333 333 334 - static inline bool bio_may_exceed_limits(struct bio *bio, 335 - const struct queue_limits *lim) 336 - { 337 - switch (bio_op(bio)) { 338 - case REQ_OP_DISCARD: 339 - case REQ_OP_SECURE_ERASE: 340 - case REQ_OP_WRITE_ZEROES: 341 - return true; /* non-trivial splitting decisions */ 342 - default: 343 - break; 344 - } 334 + struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, 335 + unsigned *nsegs); 336 + struct bio *bio_split_write_zeroes(struct bio *bio, 337 + const struct queue_limits *lim, unsigned *nsegs); 338 + struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, 339 + unsigned *nr_segs); 345 340 346 - /* 347 - * All drivers must accept single-segments bios that are <= PAGE_SIZE. 348 - * This is a quick and dirty check that relies on the fact that 349 - * bi_io_vec[0] is always valid if a bio has data. The check might 350 - * lead to occasional false negatives when bios are cloned, but compared 351 - * to the performance impact of cloned bios themselves the loop below 352 - * doesn't matter anyway. 353 - */ 341 + /* 342 + * All drivers must accept single-segments bios that are smaller than PAGE_SIZE. 343 + * 344 + * This is a quick and dirty check that relies on the fact that bi_io_vec[0] is 345 + * always valid if a bio has data. The check might lead to occasional false 346 + * positives when bios are cloned, but compared to the performance impact of 347 + * cloned bios themselves the loop below doesn't matter anyway. 348 + */ 349 + static inline bool bio_may_need_split(struct bio *bio, 350 + const struct queue_limits *lim) 351 + { 354 352 return lim->chunk_sectors || bio->bi_vcnt != 1 || 355 353 bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; 356 354 } 357 355 358 - struct bio *__bio_split_to_limits(struct bio *bio, 359 - const struct queue_limits *lim, 360 - unsigned int *nr_segs); 356 + /** 357 + * __bio_split_to_limits - split a bio to fit the queue limits 358 + * @bio: bio to be split 359 + * @lim: queue limits to split based on 360 + * @nr_segs: returns the number of segments in the returned bio 361 + * 362 + * Check if @bio needs splitting based on the queue limits, and if so split off 363 + * a bio fitting the limits from the beginning of @bio and return it. @bio is 364 + * shortened to the remainder and re-submitted. 365 + * 366 + * The split bio is allocated from @q->bio_split, which is provided by the 367 + * block layer. 368 + */ 369 + static inline struct bio *__bio_split_to_limits(struct bio *bio, 370 + const struct queue_limits *lim, unsigned int *nr_segs) 371 + { 372 + switch (bio_op(bio)) { 373 + default: 374 + if (bio_may_need_split(bio, lim)) 375 + return bio_split_rw(bio, lim, nr_segs); 376 + *nr_segs = 1; 377 + return bio; 378 + case REQ_OP_DISCARD: 379 + case REQ_OP_SECURE_ERASE: 380 + return bio_split_discard(bio, lim, nr_segs); 381 + case REQ_OP_WRITE_ZEROES: 382 + return bio_split_write_zeroes(bio, lim, nr_segs); 383 + } 384 + } 385 + 361 386 int ll_back_merge_fn(struct request *req, struct bio *bio, 362 387 unsigned int nr_segs); 363 388 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+18 -12
fs/btrfs/bio.c
··· 73 73 74 74 static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, 75 75 struct btrfs_bio *orig_bbio, 76 - u64 map_length, bool use_append) 76 + u64 map_length) 77 77 { 78 78 struct btrfs_bio *bbio; 79 79 struct bio *bio; 80 80 81 - if (use_append) { 82 - unsigned int nr_segs; 83 - 84 - bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs, 85 - &btrfs_clone_bioset, map_length); 86 - } else { 87 - bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, 88 - GFP_NOFS, &btrfs_clone_bioset); 89 - } 81 + bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, GFP_NOFS, 82 + &btrfs_clone_bioset); 90 83 bbio = btrfs_bio(bio); 91 84 btrfs_bio_init(bbio, fs_info, NULL, orig_bbio); 92 85 bbio->inode = orig_bbio->inode; ··· 657 664 return true; 658 665 } 659 666 667 + static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length) 668 + { 669 + unsigned int nr_segs; 670 + int sector_offset; 671 + 672 + map_length = min(map_length, bbio->fs_info->max_zone_append_size); 673 + sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits, 674 + &nr_segs, map_length); 675 + if (sector_offset) 676 + return sector_offset << SECTOR_SHIFT; 677 + return map_length; 678 + } 679 + 660 680 static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) 661 681 { 662 682 struct btrfs_inode *inode = bbio->inode; ··· 697 691 698 692 map_length = min(map_length, length); 699 693 if (use_append) 700 - map_length = min(map_length, fs_info->max_zone_append_size); 694 + map_length = btrfs_append_map_length(bbio, map_length); 701 695 702 696 if (map_length < length) { 703 - bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append); 697 + bbio = btrfs_split_bio(fs_info, bbio, map_length); 704 698 bio = &bbio->bio; 705 699 } 706 700
+2 -2
include/linux/bio.h
··· 324 324 void bio_trim(struct bio *bio, sector_t offset, sector_t size); 325 325 extern struct bio *bio_split(struct bio *bio, int sectors, 326 326 gfp_t gfp, struct bio_set *bs); 327 - struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, 328 - unsigned *segs, struct bio_set *bs, unsigned max_bytes); 327 + int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, 328 + unsigned *segs, unsigned max_bytes); 329 329 330 330 /** 331 331 * bio_next_split - get next @sectors from a bio, splitting if necessary