Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Calculate metadata requirements more accurately
ext4: Fix accounting of reserved metadata blocks

+77 -48
+2
fs/ext4/ext4.h
··· 699 699 unsigned int i_reserved_meta_blocks; 700 700 unsigned int i_allocated_meta_blocks; 701 701 unsigned short i_delalloc_reserved_flag; 702 + sector_t i_da_metadata_calc_last_lblock; 703 + int i_da_metadata_calc_len; 702 704 703 705 /* on-disk additional length */ 704 706 __u16 i_extra_isize;
+2 -1
fs/ext4/ext4_extents.h
··· 225 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 226 226 } 227 227 228 - extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228 + extern int ext4_ext_calc_metadata_amount(struct inode *inode, 229 + sector_t lblocks); 229 230 extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230 231 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231 232 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
+32 -17
fs/ext4/extents.c
··· 296 296 * to allocate @blocks 297 297 * Worse case is one block per extent 298 298 */ 299 - int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299 + int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) 300 300 { 301 - int lcap, icap, rcap, leafs, idxs, num; 302 - int newextents = blocks; 301 + struct ext4_inode_info *ei = EXT4_I(inode); 302 + int idxs, num = 0; 303 303 304 - rcap = ext4_ext_space_root_idx(inode, 0); 305 - lcap = ext4_ext_space_block(inode, 0); 306 - icap = ext4_ext_space_block_idx(inode, 0); 307 - 308 - /* number of new leaf blocks needed */ 309 - num = leafs = (newextents + lcap - 1) / lcap; 304 + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 305 + / sizeof(struct ext4_extent_idx)); 310 306 311 307 /* 312 - * Worse case, we need separate index block(s) 313 - * to link all new leaf blocks 308 + * If the new delayed allocation block is contiguous with the 309 + * previous da block, it can share index blocks with the 310 + * previous block, so we only need to allocate a new index 311 + * block every idxs leaf blocks. At ldxs**2 blocks, we need 312 + * an additional index block, and at ldxs**3 blocks, yet 313 + * another index blocks. 314 314 */ 315 - idxs = (leafs + icap - 1) / icap; 316 - do { 317 - num += idxs; 318 - idxs = (idxs + icap - 1) / icap; 319 - } while (idxs > rcap); 315 + if (ei->i_da_metadata_calc_len && 316 + ei->i_da_metadata_calc_last_lblock+1 == lblock) { 317 + if ((ei->i_da_metadata_calc_len % idxs) == 0) 318 + num++; 319 + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 320 + num++; 321 + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { 322 + num++; 323 + ei->i_da_metadata_calc_len = 0; 324 + } else 325 + ei->i_da_metadata_calc_len++; 326 + ei->i_da_metadata_calc_last_lblock++; 327 + return num; 328 + } 320 329 321 - return num; 330 + /* 331 + * In the worst case we need a new set of index blocks at 332 + * every level of the inode's extent tree. 333 + */ 334 + ei->i_da_metadata_calc_len = 1; 335 + ei->i_da_metadata_calc_last_lblock = lblock; 336 + return ext_depth(inode) + 1; 322 337 } 323 338 324 339 static int
+40 -30
fs/ext4/inode.c
··· 1009 1009 return &EXT4_I(inode)->i_reserved_quota; 1010 1010 } 1011 1011 #endif 1012 + 1012 1013 /* 1013 1014 * Calculate the number of metadata blocks need to reserve 1014 - * to allocate @blocks for non extent file based file 1015 + * to allocate a new block at @lblocks for non extent file based file 1015 1016 */ 1016 - static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017 + static int ext4_indirect_calc_metadata_amount(struct inode *inode, 1018 + sector_t lblock) 1017 1019 { 1018 - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1019 - int ind_blks, dind_blks, tind_blks; 1020 + struct ext4_inode_info *ei = EXT4_I(inode); 1021 + int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; 1022 + int blk_bits; 1020 1023 1021 - /* number of new indirect blocks needed */ 1022 - ind_blks = (blocks + icap - 1) / icap; 1024 + if (lblock < EXT4_NDIR_BLOCKS) 1025 + return 0; 1023 1026 1024 - dind_blks = (ind_blks + icap - 1) / icap; 1027 + lblock -= EXT4_NDIR_BLOCKS; 1025 1028 1026 - tind_blks = 1; 1027 - 1028 - return ind_blks + dind_blks + tind_blks; 1029 + if (ei->i_da_metadata_calc_len && 1030 + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { 1031 + ei->i_da_metadata_calc_len++; 1032 + return 0; 1033 + } 1034 + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; 1035 + ei->i_da_metadata_calc_len = 1; 1036 + blk_bits = roundup_pow_of_two(lblock + 1); 1037 + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; 1029 1038 } 1030 1039 1031 1040 /* 1032 1041 * Calculate the number of metadata blocks need to reserve 1033 - * to allocate given number of blocks 1042 + * to allocate a block located at @lblock 1034 1043 */ 1035 - static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044 + static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) 1036 1045 { 1037 - if (!blocks) 1038 - return 0; 1039 - 1040 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1041 - return ext4_ext_calc_metadata_amount(inode, blocks); 1047 + return ext4_ext_calc_metadata_amount(inode, lblock); 1042 1048 1043 - return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 + return ext4_indirect_calc_metadata_amount(inode, lblock); 1044 1050 } 1045 1051 1046 1052 /* ··· 1082 1076 * only when we have written all of the delayed 1083 1077 * allocation blocks. 1084 1078 */ 1085 - mdb_free = ei->i_allocated_meta_blocks; 1079 + mdb_free = ei->i_reserved_meta_blocks; 1080 + ei->i_reserved_meta_blocks = 0; 1081 + ei->i_da_metadata_calc_len = 0; 1086 1082 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1087 - ei->i_allocated_meta_blocks = 0; 1088 1083 } 1089 1084 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 1085 ··· 1809 1802 return ret ? ret : copied; 1810 1803 } 1811 1804 1812 - static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1805 + /* 1806 + * Reserve a single block located at lblock 1807 + */ 1808 + static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) 1813 1809 { 1814 1810 int retries = 0; 1815 1811 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1816 1812 struct ext4_inode_info *ei = EXT4_I(inode); 1817 - unsigned long md_needed, md_reserved, total = 0; 1813 + unsigned long md_needed, md_reserved; 1818 1814 1819 1815 /* 1820 1816 * recalculate the amount of metadata blocks to reserve ··· 1827 1817 repeat: 1828 1818 spin_lock(&ei->i_block_reservation_lock); 1829 1819 md_reserved = ei->i_reserved_meta_blocks; 1830 - md_needed = ext4_calc_metadata_amount(inode, nrblocks); 1831 - total = md_needed + nrblocks; 1820 + md_needed = ext4_calc_metadata_amount(inode, lblock); 1832 1821 spin_unlock(&ei->i_block_reservation_lock); 1833 1822 1834 1823 /* ··· 1835 1826 * later. Real quota accounting is done at pages writeout 1836 1827 * time. 1837 1828 */ 1838 - if (vfs_dq_reserve_block(inode, total)) { 1829 + if (vfs_dq_reserve_block(inode, md_needed + 1)) { 1839 1830 /* 1840 1831 * We tend to badly over-estimate the amount of 1841 1832 * metadata blocks which are needed, so if we have ··· 1847 1838 return -EDQUOT; 1848 1839 } 1849 1840 1850 - if (ext4_claim_free_blocks(sbi, total)) { 1851 - vfs_dq_release_reservation_block(inode, total); 1841 + if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1842 + vfs_dq_release_reservation_block(inode, md_needed + 1); 1852 1843 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1853 1844 retry: 1854 1845 if (md_reserved) ··· 1859 1850 return -ENOSPC; 1860 1851 } 1861 1852 spin_lock(&ei->i_block_reservation_lock); 1862 - ei->i_reserved_data_blocks += nrblocks; 1853 + ei->i_reserved_data_blocks++; 1863 1854 ei->i_reserved_meta_blocks += md_needed; 1864 1855 spin_unlock(&ei->i_block_reservation_lock); 1865 1856 ··· 1898 1889 * only when we have written all of the delayed 1899 1890 * allocation blocks. 1900 1891 */ 1901 - to_free += ei->i_allocated_meta_blocks; 1902 - ei->i_allocated_meta_blocks = 0; 1892 + to_free += ei->i_reserved_meta_blocks; 1893 + ei->i_reserved_meta_blocks = 0; 1894 + ei->i_da_metadata_calc_len = 0; 1903 1895 } 1904 1896 1905 1897 /* update fs dirty blocks counter */ ··· 2514 2504 * XXX: __block_prepare_write() unmaps passed block, 2515 2505 * is it OK? 2516 2506 */ 2517 - ret = ext4_da_reserve_space(inode, 1); 2507 + ret = ext4_da_reserve_space(inode, iblock); 2518 2508 if (ret) 2519 2509 /* not enough space to reserve */ 2520 2510 return ret;
+1
fs/ext4/super.c
··· 702 702 ei->i_reserved_data_blocks = 0; 703 703 ei->i_reserved_meta_blocks = 0; 704 704 ei->i_allocated_meta_blocks = 0; 705 + ei->i_da_metadata_calc_len = 0; 705 706 ei->i_delalloc_reserved_flag = 0; 706 707 spin_lock_init(&(ei->i_block_reservation_lock)); 707 708 #ifdef CONFIG_QUOTA