Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: fix reserved cluster accounting at page invalidation time

Add new code to count canceled pending cluster reservations on bigalloc
file systems and to reduce the cluster reservation count on all file
systems using delayed allocation. This replaces old code in
ext4_da_page_release_reservations that was incorrect.

Signed-off-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Eric Whitney and committed by
Theodore Ts'o
f456767d 9fe67149

+95 -19
+1
fs/ext4/ext4.h
··· 2491 2491 extern int ext4_filemap_fault(struct vm_fault *vmf); 2492 2492 extern qsize_t *ext4_get_reserved_space(struct inode *inode); 2493 2493 extern int ext4_get_projid(struct inode *inode, kprojid_t *projid); 2494 + extern void ext4_da_release_space(struct inode *inode, int to_free); 2494 2495 extern void ext4_da_update_reserve_space(struct inode *inode, 2495 2496 int used, int quota_claim); 2496 2497 extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+90
fs/ext4/extents_status.c
··· 1780 1780 __remove_pending(inode, last); 1781 1781 } 1782 1782 } 1783 + 1784 + /* 1785 + * ext4_es_remove_blks - remove block range from extents status tree and 1786 + * reduce reservation count or cancel pending 1787 + * reservation as needed 1788 + * 1789 + * @inode - file containing range 1790 + * @lblk - first block in range 1791 + * @len - number of blocks to remove 1792 + * 1793 + */ 1794 + void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk, 1795 + ext4_lblk_t len) 1796 + { 1797 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1798 + unsigned int clu_size, reserved = 0; 1799 + ext4_lblk_t last_lclu, first, length, remainder, last; 1800 + bool delonly; 1801 + int err = 0; 1802 + struct pending_reservation *pr; 1803 + struct ext4_pending_tree *tree; 1804 + 1805 + /* 1806 + * Process cluster by cluster for bigalloc - there may be up to 1807 + * two clusters in a 4k page with a 1k block size and two blocks 1808 + * per cluster. Also necessary for systems with larger page sizes 1809 + * and potentially larger block sizes. 1810 + */ 1811 + clu_size = sbi->s_cluster_ratio; 1812 + last_lclu = EXT4_B2C(sbi, lblk + len - 1); 1813 + 1814 + write_lock(&EXT4_I(inode)->i_es_lock); 1815 + 1816 + for (first = lblk, remainder = len; 1817 + remainder > 0; 1818 + first += length, remainder -= length) { 1819 + 1820 + if (EXT4_B2C(sbi, first) == last_lclu) 1821 + length = remainder; 1822 + else 1823 + length = clu_size - EXT4_LBLK_COFF(sbi, first); 1824 + 1825 + /* 1826 + * The BH_Delay flag, which triggers calls to this function, 1827 + * and the contents of the extents status tree can be 1828 + * inconsistent due to writepages activity. So, note whether 1829 + * the blocks to be removed actually belong to an extent with 1830 + * delayed only status. 1831 + */ 1832 + delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first); 1833 + 1834 + /* 1835 + * because of the writepages effect, written and unwritten 1836 + * blocks could be removed here 1837 + */ 1838 + last = first + length - 1; 1839 + err = __es_remove_extent(inode, first, last); 1840 + if (err) 1841 + ext4_warning(inode->i_sb, 1842 + "%s: couldn't remove page (err = %d)", 1843 + __func__, err); 1844 + 1845 + /* non-bigalloc case: simply count the cluster for release */ 1846 + if (sbi->s_cluster_ratio == 1 && delonly) { 1847 + reserved++; 1848 + continue; 1849 + } 1850 + 1851 + /* 1852 + * bigalloc case: if all delayed allocated only blocks have 1853 + * just been removed from a cluster, either cancel a pending 1854 + * reservation if it exists or count a cluster for release 1855 + */ 1856 + if (delonly && 1857 + !__es_scan_clu(inode, &ext4_es_is_delonly, first)) { 1858 + pr = __get_pending(inode, EXT4_B2C(sbi, first)); 1859 + if (pr != NULL) { 1860 + tree = &EXT4_I(inode)->i_pending_tree; 1861 + rb_erase(&pr->rb_node, &tree->root); 1862 + kmem_cache_free(ext4_pending_cachep, pr); 1863 + } else { 1864 + reserved++; 1865 + } 1866 + } 1867 + } 1868 + 1869 + write_unlock(&EXT4_I(inode)->i_es_lock); 1870 + 1871 + ext4_da_release_space(inode, reserved); 1872 + }
+4 -19
fs/ext4/inode.c
··· 1595 1595 return 0; /* success */ 1596 1596 } 1597 1597 1598 - static void ext4_da_release_space(struct inode *inode, int to_free) 1598 + void ext4_da_release_space(struct inode *inode, int to_free) 1599 1599 { 1600 1600 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1601 1601 struct ext4_inode_info *ei = EXT4_I(inode); ··· 1634 1634 unsigned int offset, 1635 1635 unsigned int length) 1636 1636 { 1637 - int to_release = 0, contiguous_blks = 0; 1637 + int contiguous_blks = 0; 1638 1638 struct buffer_head *head, *bh; 1639 1639 unsigned int curr_off = 0; 1640 1640 struct inode *inode = page->mapping->host; 1641 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1642 1641 unsigned int stop = offset + length; 1643 - int num_clusters; 1644 1642 ext4_fsblk_t lblk; 1645 1643 1646 1644 BUG_ON(stop > PAGE_SIZE || stop < length); ··· 1652 1654 break; 1653 1655 1654 1656 if ((offset <= curr_off) && (buffer_delay(bh))) { 1655 - to_release++; 1656 1657 contiguous_blks++; 1657 1658 clear_buffer_delay(bh); 1658 1659 } else if (contiguous_blks) { ··· 1659 1662 (PAGE_SHIFT - inode->i_blkbits); 1660 1663 lblk += (curr_off >> inode->i_blkbits) - 1661 1664 contiguous_blks; 1662 - ext4_es_remove_extent(inode, lblk, contiguous_blks); 1665 + ext4_es_remove_blks(inode, lblk, contiguous_blks); 1663 1666 contiguous_blks = 0; 1664 1667 } 1665 1668 curr_off = next_off; ··· 1668 1671 if (contiguous_blks) { 1669 1672 lblk = page->index << (PAGE_SHIFT - inode->i_blkbits); 1670 1673 lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; 1671 - ext4_es_remove_extent(inode, lblk, contiguous_blks); 1674 + ext4_es_remove_blks(inode, lblk, contiguous_blks); 1672 1675 } 1673 1676 1674 - /* If we have released all the blocks belonging to a cluster, then we 1675 - * need to release the reserved space for that cluster. */ 1676 - num_clusters = EXT4_NUM_B2C(sbi, to_release); 1677 - while (num_clusters > 0) { 1678 - lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) + 1679 - ((num_clusters - 1) << sbi->s_cluster_bits); 1680 - if (sbi->s_cluster_ratio == 1 || 1681 - !ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk)) 1682 - ext4_da_release_space(inode, 1); 1683 - 1684 - num_clusters--; 1685 - } 1686 1677 } 1687 1678 1688 1679 /*