Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "md/raid10: improve discard request for far layout"

This reverts commit d3ee2d8415a6256c1c41e1be36e80e640c3e6359.

Matthew Ruffell reported data corruption in raid10 due to the changes
in discard handling [1]. Revert these changes before we find a proper fix.

[1] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1907262/
Cc: Matthew Ruffell <matthew.ruffell@canonical.com>
Cc: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>

Song Liu 82fe9af7 e2782f56

+23 -64
+23 -63
drivers/md/raid10.c
··· 1534 1534 return bio; 1535 1535 } 1536 1536 1537 - static void raid_end_discard_bio(struct r10bio *r10bio) 1538 - { 1539 - struct r10conf *conf = r10bio->mddev->private; 1540 - struct r10bio *first_r10bio; 1541 - 1542 - while (atomic_dec_and_test(&r10bio->remaining)) { 1543 - 1544 - allow_barrier(conf); 1545 - 1546 - if (!test_bit(R10BIO_Discard, &r10bio->state)) { 1547 - first_r10bio = (struct r10bio *)r10bio->master_bio; 1548 - free_r10bio(r10bio); 1549 - r10bio = first_r10bio; 1550 - } else { 1551 - md_write_end(r10bio->mddev); 1552 - bio_endio(r10bio->master_bio); 1553 - free_r10bio(r10bio); 1554 - break; 1555 - } 1556 - } 1557 - } 1558 - 1559 1537 static void raid10_end_discard_request(struct bio *bio) 1560 1538 { 1561 1539 struct r10bio *r10_bio = bio->bi_private; ··· 1560 1582 rdev = conf->mirrors[dev].rdev; 1561 1583 } 1562 1584 1563 - raid_end_discard_bio(r10_bio); 1585 + if (atomic_dec_and_test(&r10_bio->remaining)) { 1586 + md_write_end(r10_bio->mddev); 1587 + raid_end_bio_io(r10_bio); 1588 + } 1589 + 1564 1590 rdev_dec_pending(rdev, conf->mddev); 1565 1591 } 1566 1592 ··· 1577 1595 { 1578 1596 struct r10conf *conf = mddev->private; 1579 1597 struct geom *geo = &conf->geo; 1580 - struct r10bio *r10_bio, *first_r10bio; 1581 - int far_copies = geo->far_copies; 1582 - bool first_copy = true; 1598 + struct r10bio *r10_bio; 1583 1599 1584 1600 int disk; 1585 1601 sector_t chunk; ··· 1616 1636 if (bio_sectors(bio) < stripe_size*2) 1617 1637 goto out; 1618 1638 1619 - /* For far and far offset layout, if bio is not aligned with stripe size, 1620 - * it splits the part that is not aligned with strip size. 1639 + /* For far offset layout, if bio is not aligned with stripe size, it splits 1640 + * the part that is not aligned with strip size. 1621 1641 */ 1622 1642 div_u64_rem(bio_start, stripe_size, &remainder); 1623 - if ((far_copies > 1) && remainder) { 1643 + if (geo->far_offset && remainder) { 1624 1644 split_size = stripe_size - remainder; 1625 1645 bio = raid10_split_bio(conf, bio, split_size, false); 1626 1646 } 1627 1647 div_u64_rem(bio_end, stripe_size, &remainder); 1628 - if ((far_copies > 1) && remainder) { 1648 + if (geo->far_offset && remainder) { 1629 1649 split_size = bio_sectors(bio) - remainder; 1630 1650 bio = raid10_split_bio(conf, bio, split_size, true); 1631 1651 } 1652 + 1653 + r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); 1654 + r10_bio->mddev = mddev; 1655 + r10_bio->state = 0; 1656 + r10_bio->sectors = 0; 1657 + memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks); 1658 + 1659 + wait_blocked_dev(mddev, r10_bio); 1660 + 1661 + r10_bio->master_bio = bio; 1632 1662 1633 1663 bio_start = bio->bi_iter.bi_sector; 1634 1664 bio_end = bio_end_sector(bio); ··· 1664 1674 last_stripe_index *= geo->far_copies; 1665 1675 end_disk_offset = (bio_end & geo->chunk_mask) + 1666 1676 (last_stripe_index << geo->chunk_shift); 1667 - 1668 - retry_discard: 1669 - r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); 1670 - r10_bio->mddev = mddev; 1671 - r10_bio->state = 0; 1672 - r10_bio->sectors = 0; 1673 - memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks); 1674 - wait_blocked_dev(mddev, r10_bio); 1675 - 1676 - /* For far layout it needs more than one r10bio to cover all regions. 1677 - * Inspired by raid10_sync_request, we can use the first r10bio->master_bio 1678 - * to record the discard bio. Other r10bio->master_bio record the first 1679 - * r10bio. The first r10bio only release after all other r10bios finish. 1680 - * The discard bio returns only first r10bio finishes 1681 - */ 1682 - if (first_copy) { 1683 - r10_bio->master_bio = bio; 1684 - set_bit(R10BIO_Discard, &r10_bio->state); 1685 - first_copy = false; 1686 - first_r10bio = r10_bio; 1687 - } else 1688 - r10_bio->master_bio = (struct bio *)first_r10bio; 1689 1677 1690 1678 rcu_read_lock(); 1691 1679 for (disk = 0; disk < geo->raid_disks; disk++) { ··· 1755 1787 } 1756 1788 } 1757 1789 1758 - if (!geo->far_offset && --far_copies) { 1759 - first_stripe_index += geo->stride >> geo->chunk_shift; 1760 - start_disk_offset += geo->stride; 1761 - last_stripe_index += geo->stride >> geo->chunk_shift; 1762 - end_disk_offset += geo->stride; 1763 - atomic_inc(&first_r10bio->remaining); 1764 - raid_end_discard_bio(r10_bio); 1765 - wait_barrier(conf); 1766 - goto retry_discard; 1790 + if (atomic_dec_and_test(&r10_bio->remaining)) { 1791 + md_write_end(r10_bio->mddev); 1792 + raid_end_bio_io(r10_bio); 1767 1793 } 1768 - 1769 - raid_end_discard_bio(r10_bio); 1770 1794 1771 1795 return 0; 1772 1796 out:
-1
drivers/md/raid10.h
··· 179 179 R10BIO_Previous, 180 180 /* failfast devices did receive failfast requests. */ 181 181 R10BIO_FailFast, 182 - R10BIO_Discard, 183 182 }; 184 183 #endif