Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md/raid10: improve discard request for far layout

For far layout, the discard region is not continuous on disks. So it needs
far copies r10bio to cover all regions. It needs a way to know all r10bios
have finish or not. Similar with raid10_sync_request, only the first r10bio
master_bio records the discard bio. Other r10bios master_bio record the
first r10bio. The first r10bio can finish after other r10bios finish and
then return the discard bio.

Tested-by: Adrian Huang <ahuang12@lenovo.com>
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>

authored by

Xiao Ni and committed by
Song Liu
254c271d d30588b2

+61 -19
+60 -19
drivers/md/raid10.c
··· 1518 1518 raid10_write_request(mddev, bio, r10_bio); 1519 1519 } 1520 1520 1521 + static void raid_end_discard_bio(struct r10bio *r10bio) 1522 + { 1523 + struct r10conf *conf = r10bio->mddev->private; 1524 + struct r10bio *first_r10bio; 1525 + 1526 + while (atomic_dec_and_test(&r10bio->remaining)) { 1527 + 1528 + allow_barrier(conf); 1529 + 1530 + if (!test_bit(R10BIO_Discard, &r10bio->state)) { 1531 + first_r10bio = (struct r10bio *)r10bio->master_bio; 1532 + free_r10bio(r10bio); 1533 + r10bio = first_r10bio; 1534 + } else { 1535 + md_write_end(r10bio->mddev); 1536 + bio_endio(r10bio->master_bio); 1537 + free_r10bio(r10bio); 1538 + break; 1539 + } 1540 + } 1541 + } 1542 + 1521 1543 static void raid10_end_discard_request(struct bio *bio) 1522 1544 { 1523 1545 struct r10bio *r10_bio = bio->bi_private; ··· 1567 1545 rdev = conf->mirrors[dev].rdev; 1568 1546 } 1569 1547 1570 - if (atomic_dec_and_test(&r10_bio->remaining)) { 1571 - md_write_end(r10_bio->mddev); 1572 - raid_end_bio_io(r10_bio); 1573 - } 1574 - 1548 + raid_end_discard_bio(r10_bio); 1575 1549 rdev_dec_pending(rdev, conf->mddev); 1576 1550 } 1577 1551 ··· 1581 1563 { 1582 1564 struct r10conf *conf = mddev->private; 1583 1565 struct geom *geo = &conf->geo; 1584 - struct r10bio *r10_bio; 1566 + int far_copies = geo->far_copies; 1567 + bool first_copy = true; 1568 + struct r10bio *r10_bio, *first_r10bio; 1585 1569 struct bio *split; 1586 1570 int disk; 1587 1571 sector_t chunk; ··· 1657 1637 wait_barrier(conf); 1658 1638 } 1659 1639 1660 - r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); 1661 - r10_bio->mddev = mddev; 1662 - r10_bio->state = 0; 1663 - r10_bio->sectors = 0; 1664 - memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks); 1665 - 1666 - wait_blocked_dev(mddev, r10_bio); 1667 - 1668 - r10_bio->master_bio = bio; 1669 - 1670 1640 bio_start = bio->bi_iter.bi_sector; 1671 1641 bio_end = bio_end_sector(bio); 1672 1642 ··· 1682 1672 last_stripe_index *= geo->far_copies; 1683 1673 end_disk_offset = (bio_end & geo->chunk_mask) + 1684 1674 (last_stripe_index << geo->chunk_shift); 1675 + 1676 + retry_discard: 1677 + r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); 1678 + r10_bio->mddev = mddev; 1679 + r10_bio->state = 0; 1680 + r10_bio->sectors = 0; 1681 + memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks); 1682 + wait_blocked_dev(mddev, r10_bio); 1683 + 1684 + /* 1685 + * For far layout it needs more than one r10bio to cover all regions. 1686 + * Inspired by raid10_sync_request, we can use the first r10bio->master_bio 1687 + * to record the discard bio. Other r10bio->master_bio record the first 1688 + * r10bio. The first r10bio only release after all other r10bios finish. 1689 + * The discard bio returns only first r10bio finishes 1690 + */ 1691 + if (first_copy) { 1692 + r10_bio->master_bio = bio; 1693 + set_bit(R10BIO_Discard, &r10_bio->state); 1694 + first_copy = false; 1695 + first_r10bio = r10_bio; 1696 + } else 1697 + r10_bio->master_bio = (struct bio *)first_r10bio; 1685 1698 1686 1699 rcu_read_lock(); 1687 1700 for (disk = 0; disk < geo->raid_disks; disk++) { ··· 1797 1764 } 1798 1765 } 1799 1766 1800 - if (atomic_dec_and_test(&r10_bio->remaining)) { 1801 - md_write_end(r10_bio->mddev); 1802 - raid_end_bio_io(r10_bio); 1767 + if (!geo->far_offset && --far_copies) { 1768 + first_stripe_index += geo->stride >> geo->chunk_shift; 1769 + start_disk_offset += geo->stride; 1770 + last_stripe_index += geo->stride >> geo->chunk_shift; 1771 + end_disk_offset += geo->stride; 1772 + atomic_inc(&first_r10bio->remaining); 1773 + raid_end_discard_bio(r10_bio); 1774 + wait_barrier(conf); 1775 + goto retry_discard; 1803 1776 } 1777 + 1778 + raid_end_discard_bio(r10_bio); 1804 1779 1805 1780 return 0; 1806 1781 out:
+1
drivers/md/raid10.h
··· 179 179 R10BIO_Previous, 180 180 /* failfast devices did receive failfast requests. */ 181 181 R10BIO_FailFast, 182 + R10BIO_Discard, 182 183 }; 183 184 #endif