Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: Generic bio chaining

This adds a generic mechanism for chaining bio completions. This is
going to be used for a bio_split() replacement, and it turns out to be
very useful in a fair amount of driver code - a fair number of drivers
were implementing this in their own roundabout ways, often painfully.

Note that this means it's no longer to call bio_endio() more than once
on the same bio! This can cause problems for drivers that save/restore
bi_end_io. Arguably they shouldn't be saving/restoring bi_end_io at all
- in all but the simplest cases they'd be better off just cloning the
bio, and immutable biovecs is making bio cloning cheaper. But for now,
we add a bio_endio_nodec() for these cases.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>

+90 -11
+1 -1
drivers/md/bcache/io.c
··· 133 133 134 134 s->bio->bi_end_io = s->bi_end_io; 135 135 s->bio->bi_private = s->bi_private; 136 - bio_endio(s->bio, 0); 136 + bio_endio_nodec(s->bio, 0); 137 137 138 138 closure_debug_destroy(&s->cl); 139 139 mempool_free(s, s->p->bio_split_hook);
+6
drivers/md/dm-cache-target.c
··· 765 765 766 766 dm_unhook_bio(&pb->hook_info, bio); 767 767 768 + /* 769 + * Must bump bi_remaining to allow bio to complete with 770 + * restored bi_end_io. 771 + */ 772 + atomic_inc(&bio->bi_remaining); 773 + 768 774 if (err) { 769 775 bio_endio(bio, err); 770 776 return;
+1
drivers/md/dm-snap.c
··· 1415 1415 if (full_bio) { 1416 1416 full_bio->bi_end_io = pe->full_bio_end_io; 1417 1417 full_bio->bi_private = pe->full_bio_private; 1418 + atomic_inc(&full_bio->bi_remaining); 1418 1419 } 1419 1420 free_pending_exception(pe); 1420 1421
+6 -2
drivers/md/dm-thin.c
··· 611 611 612 612 static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) 613 613 { 614 - if (m->bio) 614 + if (m->bio) { 615 615 m->bio->bi_end_io = m->saved_bi_end_io; 616 + atomic_inc(&m->bio->bi_remaining); 617 + } 616 618 cell_error(m->tc->pool, m->cell); 617 619 list_del(&m->list); 618 620 mempool_free(m, m->tc->pool->mapping_pool); ··· 628 626 int r; 629 627 630 628 bio = m->bio; 631 - if (bio) 629 + if (bio) { 632 630 bio->bi_end_io = m->saved_bi_end_io; 631 + atomic_inc(&bio->bi_remaining); 632 + } 633 633 634 634 if (m->err) { 635 635 cell_error(pool, m->cell);
+1 -1
drivers/md/dm-verity.c
··· 385 385 bio->bi_end_io = io->orig_bi_end_io; 386 386 bio->bi_private = io->orig_bi_private; 387 387 388 - bio_endio(bio, error); 388 + bio_endio_nodec(bio, error); 389 389 } 390 390 391 391 static void verity_work(struct work_struct *w)
+1 -1
fs/bio-integrity.c
··· 502 502 503 503 /* Restore original bio completion handler */ 504 504 bio->bi_end_io = bip->bip_end_io; 505 - bio_endio(bio, error); 505 + bio_endio_nodec(bio, error); 506 506 } 507 507 508 508 /**
+70 -6
fs/bio.c
··· 273 273 { 274 274 memset(bio, 0, sizeof(*bio)); 275 275 bio->bi_flags = 1 << BIO_UPTODATE; 276 + atomic_set(&bio->bi_remaining, 1); 276 277 atomic_set(&bio->bi_cnt, 1); 277 278 } 278 279 EXPORT_SYMBOL(bio_init); ··· 296 295 297 296 memset(bio, 0, BIO_RESET_BYTES); 298 297 bio->bi_flags = flags|(1 << BIO_UPTODATE); 298 + atomic_set(&bio->bi_remaining, 1); 299 299 } 300 300 EXPORT_SYMBOL(bio_reset); 301 + 302 + static void bio_chain_endio(struct bio *bio, int error) 303 + { 304 + bio_endio(bio->bi_private, error); 305 + bio_put(bio); 306 + } 307 + 308 + /** 309 + * bio_chain - chain bio completions 310 + * 311 + * The caller won't have a bi_end_io called when @bio completes - instead, 312 + * @parent's bi_end_io won't be called until both @parent and @bio have 313 + * completed; the chained bio will also be freed when it completes. 314 + * 315 + * The caller must not set bi_private or bi_end_io in @bio. 316 + */ 317 + void bio_chain(struct bio *bio, struct bio *parent) 318 + { 319 + BUG_ON(bio->bi_private || bio->bi_end_io); 320 + 321 + bio->bi_private = parent; 322 + bio->bi_end_io = bio_chain_endio; 323 + atomic_inc(&parent->bi_remaining); 324 + } 325 + EXPORT_SYMBOL(bio_chain); 301 326 302 327 static void bio_alloc_rescue(struct work_struct *work) 303 328 { ··· 1746 1719 **/ 1747 1720 void bio_endio(struct bio *bio, int error) 1748 1721 { 1749 - if (error) 1750 - clear_bit(BIO_UPTODATE, &bio->bi_flags); 1751 - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1752 - error = -EIO; 1722 + while (bio) { 1723 + BUG_ON(atomic_read(&bio->bi_remaining) <= 0); 1753 1724 1754 - if (bio->bi_end_io) 1755 - bio->bi_end_io(bio, error); 1725 + if (error) 1726 + clear_bit(BIO_UPTODATE, &bio->bi_flags); 1727 + else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1728 + error = -EIO; 1729 + 1730 + if (!atomic_dec_and_test(&bio->bi_remaining)) 1731 + return; 1732 + 1733 + /* 1734 + * Need to have a real endio function for chained bios, 1735 + * otherwise various corner cases will break (like stacking 1736 + * block devices that save/restore bi_end_io) - however, we want 1737 + * to avoid unbounded recursion and blowing the stack. Tail call 1738 + * optimization would handle this, but compiling with frame 1739 + * pointers also disables gcc's sibling call optimization. 1740 + */ 1741 + if (bio->bi_end_io == bio_chain_endio) { 1742 + struct bio *parent = bio->bi_private; 1743 + bio_put(bio); 1744 + bio = parent; 1745 + } else { 1746 + if (bio->bi_end_io) 1747 + bio->bi_end_io(bio, error); 1748 + bio = NULL; 1749 + } 1750 + } 1756 1751 } 1757 1752 EXPORT_SYMBOL(bio_endio); 1753 + 1754 + /** 1755 + * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining 1756 + * @bio: bio 1757 + * @error: error, if any 1758 + * 1759 + * For code that has saved and restored bi_end_io; thing hard before using this 1760 + * function, probably you should've cloned the entire bio. 1761 + **/ 1762 + void bio_endio_nodec(struct bio *bio, int error) 1763 + { 1764 + atomic_inc(&bio->bi_remaining); 1765 + bio_endio(bio, error); 1766 + } 1767 + EXPORT_SYMBOL(bio_endio_nodec); 1758 1768 1759 1769 void bio_pair_release(struct bio_pair *bp) 1760 1770 {
+2
include/linux/bio.h
··· 356 356 } 357 357 358 358 extern void bio_endio(struct bio *, int); 359 + extern void bio_endio_nodec(struct bio *, int); 359 360 struct request_queue; 360 361 extern int bio_phys_segments(struct request_queue *, struct bio *); 361 362 ··· 365 364 366 365 extern void bio_init(struct bio *); 367 366 extern void bio_reset(struct bio *); 367 + void bio_chain(struct bio *, struct bio *); 368 368 369 369 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); 370 370 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
+2
include/linux/blk_types.h
··· 65 65 unsigned int bi_seg_front_size; 66 66 unsigned int bi_seg_back_size; 67 67 68 + atomic_t bi_remaining; 69 + 68 70 bio_end_io_t *bi_end_io; 69 71 70 72 void *bi_private;