ext4: journal credits reservation fixes for extent file writepage

This patch modified the writepage/write_begin credit calculation for
extent files, to use the credits caculation helper function.

The current calculation of how many index/leaf blocks should be
accounted is too conservetive, it always considered the worse case,
where the tree level is 5, and in the case of multiple chunk
allocations, it always assumed no blocks were dirtied in common across
the allocations. This path uses the accurate depth of the inode with
some extras to calculate the index blocks, and also less conservative in
the case of multiple allocation accounting.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

authored by Mingming Cao and committed by Theodore Ts'o ee12b630 a02908f1

+49 -62
+3 -1
fs/ext4/ext4_extents.h
··· 216 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 217 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 218 extern int ext4_extent_tree_init(handle_t *, struct inode *); 219 - extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 220 extern int ext4_ext_try_to_merge(struct inode *inode, 221 struct ext4_ext_path *path, 222 struct ext4_extent *);
··· 216 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 217 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 218 extern int ext4_extent_tree_init(handle_t *, struct inode *); 219 + extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 220 + int num, 221 + struct ext4_ext_path *path); 222 extern int ext4_ext_try_to_merge(struct inode *inode, 223 struct ext4_ext_path *path, 224 struct ext4_extent *);
+44 -60
fs/ext4/extents.c
··· 1747 } 1748 1749 /* 1750 - * ext4_ext_calc_credits_for_insert: 1751 - * This routine returns max. credits that the extent tree can consume. 1752 - * It should be OK for low-performance paths like ->writepage() 1753 - * To allow many writing processes to fit into a single transaction, 1754 - * the caller should calculate credits under i_data_sem and 1755 - * pass the actual path. 1756 */ 1757 - int ext4_ext_calc_credits_for_insert(struct inode *inode, 1758 struct ext4_ext_path *path) 1759 { 1760 - int depth, needed; 1761 - 1762 if (path) { 1763 /* probably there is space in leaf? */ 1764 - depth = ext_depth(inode); 1765 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 1766 - < le16_to_cpu(path[depth].p_hdr->eh_max)) 1767 - return 1; 1768 } 1769 1770 - /* 1771 - * given 32-bit logical block (4294967296 blocks), max. tree 1772 - * can be 4 levels in depth -- 4 * 340^4 == 53453440000. 1773 - * Let's also add one more level for imbalance. 1774 - */ 1775 - depth = 5; 1776 1777 - /* allocation of new data block(s) */ 1778 - needed = 2; 1779 1780 - /* 1781 - * tree can be full, so it would need to grow in depth: 1782 - * we need one credit to modify old root, credits for 1783 - * new root will be added in split accounting 1784 - */ 1785 - needed += 1; 1786 1787 - /* 1788 - * Index split can happen, we would need: 1789 - * allocate intermediate indexes (bitmap + group) 1790 - * + change two blocks at each level, but root (already included) 1791 - */ 1792 - needed += (depth * 2) + (depth * 2); 1793 - 1794 - /* any allocation modifies superblock */ 1795 - needed += 1; 1796 - 1797 - return needed; 1798 } 1799 1800 static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ··· 1928 correct_index = 1; 1929 credits += (ext_depth(inode)) + 1; 1930 } 1931 - #ifdef CONFIG_QUOTA 1932 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 1933 - #endif 1934 1935 err = ext4_ext_journal_restart(handle, credits); 1936 if (err) ··· 2861 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 2862 ext4_mark_inode_dirty(handle, inode); 2863 ext4_journal_stop(handle); 2864 - } 2865 - 2866 - /* 2867 - * ext4_ext_writepage_trans_blocks: 2868 - * calculate max number of blocks we could modify 2869 - * in order to allocate new block for an inode 2870 - */ 2871 - int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) 2872 - { 2873 - int needed; 2874 - 2875 - needed = ext4_ext_calc_credits_for_insert(inode, NULL); 2876 - 2877 - /* caller wants to allocate num blocks, but note it includes sb */ 2878 - needed = needed * num - (num - 1); 2879 - 2880 - #ifdef CONFIG_QUOTA 2881 - needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 2882 - #endif 2883 - 2884 - return needed; 2885 } 2886 2887 static void ext4_falloc_update_inode(struct inode *inode,
··· 1747 } 1748 1749 /* 1750 + * ext4_ext_calc_credits_for_single_extent: 1751 + * This routine returns max. credits that needed to insert an extent 1752 + * to the extent tree. 1753 + * When pass the actual path, the caller should calculate credits 1754 + * under i_data_sem. 1755 */ 1756 + int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, 1757 struct ext4_ext_path *path) 1758 { 1759 if (path) { 1760 + int depth = ext_depth(inode); 1761 + int ret; 1762 + 1763 /* probably there is space in leaf? */ 1764 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 1765 + < le16_to_cpu(path[depth].p_hdr->eh_max)) { 1766 + 1767 + /* 1768 + * There are some space in the leaf tree, no 1769 + * need to account for leaf block credit 1770 + * 1771 + * bitmaps and block group descriptor blocks 1772 + * and other metadat blocks still need to be 1773 + * accounted. 1774 + */ 1775 + /* 1 one bitmap, 1 block group descriptor */ 1776 + ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 1777 + } 1778 } 1779 1780 + return ext4_meta_trans_blocks(inode, num, 1); 1781 + } 1782 1783 + /* 1784 + * How many index/leaf blocks need to change/allocate to modify nrblocks? 1785 + * 1786 + * if nrblocks are fit in a single extent (chunk flag is 1), then 1787 + * in the worse case, each tree level index/leaf need to be changed 1788 + * if the tree split due to insert a new extent, then the old tree 1789 + * index/leaf need to be updated too 1790 + * 1791 + * If the nrblocks are discontiguous, they could cause 1792 + * the whole tree split more than once, but this is really rare. 1793 + */ 1794 + int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk) 1795 + { 1796 + int index; 1797 + int depth = ext_depth(inode); 1798 1799 + if (chunk) 1800 + index = depth * 2; 1801 + else 1802 + index = depth * 3; 1803 1804 + return index; 1805 } 1806 1807 static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ··· 1921 correct_index = 1; 1922 credits += (ext_depth(inode)) + 1; 1923 } 1924 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 1925 1926 err = ext4_ext_journal_restart(handle, credits); 1927 if (err) ··· 2856 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 2857 ext4_mark_inode_dirty(handle, inode); 2858 ext4_journal_stop(handle); 2859 } 2860 2861 static void ext4_falloc_update_inode(struct inode *inode,
+2 -1
fs/ext4/migrate.c
··· 53 * credit. But below we try to not accumalate too much 54 * of them by restarting the journal. 55 */ 56 - needed = ext4_ext_calc_credits_for_insert(inode, path); 57 58 /* 59 * Make sure the credit we accumalated is not really high
··· 53 * credit. But below we try to not accumalate too much 54 * of them by restarting the journal. 55 */ 56 + needed = ext4_ext_calc_credits_for_single_extent(inode, 57 + lb->last_block - lb->first_block + 1, path); 58 59 /* 60 * Make sure the credit we accumalated is not really high