Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: remove management of bi_remaining when restoring original bi_end_io

Commit c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for
non-chains") regressed all existing callers that followed this pattern:
1) saving a bio's original bi_end_io
2) wiring up an intermediate bi_end_io
3) restoring the original bi_end_io from intermediate bi_end_io
4) calling bio_endio() to execute the restored original bi_end_io

The regression was due to BIO_CHAIN only ever getting set if
bio_inc_remaining() is called. For the above pattern it isn't set until
step 3 above (step 2 would've needed to establish BIO_CHAIN). As such
the first bio_endio(), in step 2 above, never decremented __bi_remaining
before calling the intermediate bi_end_io -- leaving __bi_remaining with
the value 1 instead of 0. When bio_inc_remaining() occurred during step
3 it brought it to a value of 2. When the second bio_endio() was
called, in step 4 above, it should've called the original bi_end_io but
it didn't because there was an extra reference that wasn't dropped (due
to atomic operations being optimized away since BIO_CHAIN wasn't set
upfront).

Fix this issue by removing the __bi_remaining management complexity for
all callers that use the above pattern -- bio_chain() is the only
interface that _needs_ to be concerned with __bi_remaining. For the
above pattern callers just expect the bi_end_io they set to get called!
Remove bio_endio_nodec() and also remove all bio_inc_remaining() calls
that aren't associated with the bio_chain() interface.

Also, the bio_inc_remaining() interface has been moved local to bio.c.

Fixes: c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

authored by

Mike Snitzer and committed by
Jens Axboe
326e1dbb b04a5636

+27 -66
+2 -2
block/bio-integrity.c
··· 361 361 362 362 /* Restore original bio completion handler */ 363 363 bio->bi_end_io = bip->bip_end_io; 364 - bio_endio_nodec(bio, error); 364 + bio_endio(bio, error); 365 365 } 366 366 367 367 /** ··· 388 388 */ 389 389 if (error) { 390 390 bio->bi_end_io = bip->bip_end_io; 391 - bio_endio_nodec(bio, error); 391 + bio_endio(bio, error); 392 392 393 393 return; 394 394 }
+14 -21
block/bio.c
··· 303 303 bio_put(bio); 304 304 } 305 305 306 + /* 307 + * Increment chain count for the bio. Make sure the CHAIN flag update 308 + * is visible before the raised count. 309 + */ 310 + static inline void bio_inc_remaining(struct bio *bio) 311 + { 312 + bio->bi_flags |= (1 << BIO_CHAIN); 313 + smp_mb__before_atomic(); 314 + atomic_inc(&bio->__bi_remaining); 315 + } 316 + 306 317 /** 307 318 * bio_chain - chain bio completions 308 319 * @bio: the target bio ··· 1767 1756 1768 1757 BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); 1769 1758 1770 - if (atomic_dec_and_test(&bio->__bi_remaining)) 1759 + if (atomic_dec_and_test(&bio->__bi_remaining)) { 1760 + clear_bit(BIO_CHAIN, &bio->bi_flags); 1771 1761 return true; 1762 + } 1772 1763 1773 1764 return false; 1774 1765 } ··· 1820 1807 } 1821 1808 } 1822 1809 EXPORT_SYMBOL(bio_endio); 1823 - 1824 - /** 1825 - * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining 1826 - * @bio: bio 1827 - * @error: error, if any 1828 - * 1829 - * For code that has saved and restored bi_end_io; thing hard before using this 1830 - * function, probably you should've cloned the entire bio. 1831 - **/ 1832 - void bio_endio_nodec(struct bio *bio, int error) 1833 - { 1834 - /* 1835 - * If it's not flagged as a chain, we are not going to dec the count 1836 - */ 1837 - if (bio_flagged(bio, BIO_CHAIN)) 1838 - bio_inc_remaining(bio); 1839 - 1840 - bio_endio(bio, error); 1841 - } 1842 - EXPORT_SYMBOL(bio_endio_nodec); 1843 1810 1844 1811 /** 1845 1812 * bio_split - split a bio
+1 -1
drivers/md/bcache/io.c
··· 55 55 56 56 s->bio->bi_end_io = s->bi_end_io; 57 57 s->bio->bi_private = s->bi_private; 58 - bio_endio_nodec(s->bio, 0); 58 + bio_endio(s->bio, 0); 59 59 60 60 closure_debug_destroy(&s->cl); 61 61 mempool_free(s, s->p->bio_split_hook);
-6
drivers/md/dm-cache-target.c
··· 86 86 { 87 87 bio->bi_end_io = h->bi_end_io; 88 88 bio->bi_private = h->bi_private; 89 - 90 - /* 91 - * Must bump bi_remaining to allow bio to complete with 92 - * restored bi_end_io. 93 - */ 94 - bio_inc_remaining(bio); 95 89 } 96 90 97 91 /*----------------------------------------------------------------*/
-2
drivers/md/dm-raid1.c
··· 1254 1254 dm_bio_restore(bd, bio); 1255 1255 bio_record->details.bi_bdev = NULL; 1256 1256 1257 - bio_inc_remaining(bio); 1258 - 1259 1257 queue_bio(ms, bio, rw); 1260 1258 return DM_ENDIO_INCOMPLETE; 1261 1259 }
-1
drivers/md/dm-snap.c
··· 1478 1478 if (full_bio) { 1479 1479 full_bio->bi_end_io = pe->full_bio_end_io; 1480 1480 full_bio->bi_private = pe->full_bio_private; 1481 - bio_inc_remaining(full_bio); 1482 1481 } 1483 1482 increment_pending_exceptions_done_count(); 1484 1483
+3 -6
drivers/md/dm-thin.c
··· 793 793 794 794 static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) 795 795 { 796 - if (m->bio) { 796 + if (m->bio) 797 797 m->bio->bi_end_io = m->saved_bi_end_io; 798 - bio_inc_remaining(m->bio); 799 - } 798 + 800 799 cell_error(m->tc->pool, m->cell); 801 800 list_del(&m->list); 802 801 mempool_free(m, m->tc->pool->mapping_pool); ··· 809 810 int r; 810 811 811 812 bio = m->bio; 812 - if (bio) { 813 + if (bio) 813 814 bio->bi_end_io = m->saved_bi_end_io; 814 - bio_inc_remaining(bio); 815 - } 816 815 817 816 if (m->err) { 818 817 cell_error(pool, m->cell);
+1 -1
drivers/md/dm-verity.c
··· 459 459 bio->bi_end_io = io->orig_bi_end_io; 460 460 bio->bi_private = io->orig_bi_private; 461 461 462 - bio_endio_nodec(bio, error); 462 + bio_endio(bio, error); 463 463 } 464 464 465 465 static void verity_work(struct work_struct *w)
+1 -1
fs/btrfs/disk-io.c
··· 1745 1745 bio->bi_private = end_io_wq->private; 1746 1746 bio->bi_end_io = end_io_wq->end_io; 1747 1747 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); 1748 - bio_endio_nodec(bio, error); 1748 + bio_endio(bio, error); 1749 1749 } 1750 1750 1751 1751 static int cleaner_kthread(void *arg)
+5 -11
fs/btrfs/volumes.c
··· 5585 5585 5586 5586 static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) 5587 5587 { 5588 - if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) 5589 - bio_endio_nodec(bio, err); 5590 - else 5591 - bio_endio(bio, err); 5588 + bio->bi_private = bbio->private; 5589 + bio->bi_end_io = bbio->end_io; 5590 + bio_endio(bio, err); 5591 + 5592 5592 btrfs_put_bbio(bbio); 5593 5593 } 5594 5594 ··· 5632 5632 bio = bbio->orig_bio; 5633 5633 } 5634 5634 5635 - bio->bi_private = bbio->private; 5636 - bio->bi_end_io = bbio->end_io; 5637 5635 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5638 5636 /* only send an error to the higher layers if it is 5639 5637 * beyond the tolerance of the btrfs bio ··· 5813 5815 /* Shoud be the original bio. */ 5814 5816 WARN_ON(bio != bbio->orig_bio); 5815 5817 5816 - bio->bi_private = bbio->private; 5817 - bio->bi_end_io = bbio->end_io; 5818 5818 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5819 5819 bio->bi_iter.bi_sector = logical >> 9; 5820 5820 ··· 5893 5897 if (dev_nr < total_devs - 1) { 5894 5898 bio = btrfs_bio_clone(first_bio, GFP_NOFS); 5895 5899 BUG_ON(!bio); /* -ENOMEM */ 5896 - } else { 5900 + } else 5897 5901 bio = first_bio; 5898 - bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; 5899 - } 5900 5902 5901 5903 submit_stripe_bio(root, bbio, bio, 5902 5904 bbio->stripes[dev_nr].physical, dev_nr, rw,
-2
fs/btrfs/volumes.h
··· 292 292 struct btrfs_bio; 293 293 typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); 294 294 295 - #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) 296 - 297 295 struct btrfs_bio { 298 296 atomic_t refs; 299 297 atomic_t stripes_pending;
-12
include/linux/bio.h
··· 427 427 } 428 428 429 429 extern void bio_endio(struct bio *, int); 430 - extern void bio_endio_nodec(struct bio *, int); 431 430 struct request_queue; 432 431 extern int bio_phys_segments(struct request_queue *, struct bio *); 433 432 ··· 655 656 bl->head = bl->tail = NULL; 656 657 657 658 return bio; 658 - } 659 - 660 - /* 661 - * Increment chain count for the bio. Make sure the CHAIN flag update 662 - * is visible before the raised count. 663 - */ 664 - static inline void bio_inc_remaining(struct bio *bio) 665 - { 666 - bio->bi_flags |= (1 << BIO_CHAIN); 667 - smp_mb__before_atomic(); 668 - atomic_inc(&bio->__bi_remaining); 669 659 } 670 660 671 661 /*