Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag
ext4: Fix quota accounting error with fallocate
ext4: Handle -EDQUOT error on write

+77 -35
+4 -5
fs/ext4/ext4.h
··· 361 361 so set the magic i_delalloc_reserve_flag after taking the 362 362 inode allocation semaphore for */ 363 363 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 364 - /* Call ext4_da_update_reserve_space() after successfully 365 - allocating the blocks */ 366 - #define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 367 364 /* caller is from the direct IO path, request to creation of an 368 365 unitialized extents if not allocated, split the uninitialized 369 366 extent if blocks has been preallocated already*/ 370 - #define EXT4_GET_BLOCKS_DIO 0x0010 371 - #define EXT4_GET_BLOCKS_CONVERT 0x0020 367 + #define EXT4_GET_BLOCKS_DIO 0x0008 368 + #define EXT4_GET_BLOCKS_CONVERT 0x0010 372 369 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 373 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 374 371 /* Convert extent to initialized after direct IO complete */ ··· 1440 1443 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1441 1444 extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1442 1445 extern int flush_aio_dio_completed_IO(struct inode *inode); 1446 + extern void ext4_da_update_reserve_space(struct inode *inode, 1447 + int used, int quota_claim); 1443 1448 /* ioctl.c */ 1444 1449 extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1445 1450 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
+21
fs/ext4/extents.c
··· 3132 3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3133 3133 newblock + max_blocks, 3134 3134 allocated - max_blocks); 3135 + allocated = max_blocks; 3135 3136 } 3137 + 3138 + /* 3139 + * If we have done fallocate with the offset that is already 3140 + * delayed allocated, we would have block reservation 3141 + * and quota reservation done in the delayed write path. 3142 + * But fallocate would have already updated quota and block 3143 + * count for this offset. So cancel these reservation 3144 + */ 3145 + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3146 + ext4_da_update_reserve_space(inode, allocated, 0); 3147 + 3136 3148 map_out: 3137 3149 set_buffer_mapped(bh_result); 3138 3150 out1: ··· 3380 3368 /* previous routine could use block we allocated */ 3381 3369 newblock = ext_pblock(&newex); 3382 3370 allocated = ext4_ext_get_actual_len(&newex); 3371 + if (allocated > max_blocks) 3372 + allocated = max_blocks; 3383 3373 set_buffer_new(bh_result); 3374 + 3375 + /* 3376 + * Update reserved blocks/metadata blocks after successful 3377 + * block allocation which had been deferred till now. 3378 + */ 3379 + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3380 + ext4_da_update_reserve_space(inode, allocated, 1); 3384 3381 3385 3382 /* 3386 3383 * Cache the extent and update transaction to commit on fdatasync only
+52 -30
fs/ext4/inode.c
··· 1053 1053 * Called with i_data_sem down, which is important since we can call 1054 1054 * ext4_discard_preallocations() from here. 1055 1055 */ 1056 - static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056 + void ext4_da_update_reserve_space(struct inode *inode, 1057 + int used, int quota_claim) 1057 1058 { 1058 1059 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1059 1060 struct ext4_inode_info *ei = EXT4_I(inode); 1060 - int mdb_free = 0; 1061 + int mdb_free = 0, allocated_meta_blocks = 0; 1061 1062 1062 1063 spin_lock(&ei->i_block_reservation_lock); 1063 1064 if (unlikely(used > ei->i_reserved_data_blocks)) { ··· 1074 1073 ei->i_reserved_data_blocks -= used; 1075 1074 used += ei->i_allocated_meta_blocks; 1076 1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 1076 + allocated_meta_blocks = ei->i_allocated_meta_blocks; 1077 1077 ei->i_allocated_meta_blocks = 0; 1078 1078 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); 1079 1079 ··· 1092 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1093 1091 1094 1092 /* Update quota subsystem */ 1095 - vfs_dq_claim_block(inode, used); 1096 - if (mdb_free) 1097 - vfs_dq_release_reservation_block(inode, mdb_free); 1093 + if (quota_claim) { 1094 + vfs_dq_claim_block(inode, used); 1095 + if (mdb_free) 1096 + vfs_dq_release_reservation_block(inode, mdb_free); 1097 + } else { 1098 + /* 1099 + * We did fallocate with an offset that is already delayed 1100 + * allocated. So on delayed allocated writeback we should 1101 + * not update the quota for allocated blocks. But then 1102 + * converting an fallocate region to initialized region would 1103 + * have caused a metadata allocation. So claim quota for 1104 + * that 1105 + */ 1106 + if (allocated_meta_blocks) 1107 + vfs_dq_claim_block(inode, allocated_meta_blocks); 1108 + vfs_dq_release_reservation_block(inode, mdb_free + used); 1109 + } 1098 1110 1099 1111 /* 1100 1112 * If we have done all the pending block allocations and if ··· 1308 1292 */ 1309 1293 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1310 1294 } 1311 - } 1312 1295 1296 + /* 1297 + * Update reserved blocks/metadata blocks after successful 1298 + * block allocation which had been deferred till now. We don't 1299 + * support fallocate for non extent files. So we can update 1300 + * reserve space here. 1301 + */ 1302 + if ((retval > 0) && 1303 + (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 1304 + ext4_da_update_reserve_space(inode, retval, 1); 1305 + } 1313 1306 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1314 1307 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1315 - 1316 - /* 1317 - * Update reserved blocks/metadata blocks after successful 1318 - * block allocation which had been deferred till now. 1319 - */ 1320 - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) 1321 - ext4_da_update_reserve_space(inode, retval); 1322 1308 1323 1309 up_write((&EXT4_I(inode)->i_data_sem)); 1324 1310 if (retval > 0 && buffer_mapped(bh)) { ··· 1853 1835 * later. Real quota accounting is done at pages writeout 1854 1836 * time. 1855 1837 */ 1856 - if (vfs_dq_reserve_block(inode, md_needed + 1)) { 1857 - /* 1858 - * We tend to badly over-estimate the amount of 1859 - * metadata blocks which are needed, so if we have 1860 - * reserved any metadata blocks, try to force out the 1861 - * inode and see if we have any better luck. 1862 - */ 1863 - if (md_reserved && retries++ <= 3) 1864 - goto retry; 1838 + if (vfs_dq_reserve_block(inode, md_needed + 1)) 1865 1839 return -EDQUOT; 1866 - } 1867 1840 1868 1841 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1869 1842 vfs_dq_release_reservation_block(inode, md_needed + 1); 1870 1843 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1871 - retry: 1872 - if (md_reserved) 1873 - write_inode_now(inode, (retries == 3)); 1874 1844 yield(); 1875 1845 goto repeat; 1876 1846 } ··· 2219 2213 * variables are updated after the blocks have been allocated. 2220 2214 */ 2221 2215 new.b_state = 0; 2222 - get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2223 - EXT4_GET_BLOCKS_DELALLOC_RESERVE); 2216 + get_blocks_flags = EXT4_GET_BLOCKS_CREATE; 2224 2217 if (mpd->b_state & (1 << BH_Delay)) 2225 - get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2218 + get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2219 + 2226 2220 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2227 2221 &new, get_blocks_flags); 2228 2222 if (blks < 0) { ··· 3038 3032 loff_t pos, unsigned len, unsigned flags, 3039 3033 struct page **pagep, void **fsdata) 3040 3034 { 3041 - int ret, retries = 0; 3035 + int ret, retries = 0, quota_retries = 0; 3042 3036 struct page *page; 3043 3037 pgoff_t index; 3044 3038 unsigned from, to; ··· 3097 3091 3098 3092 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3099 3093 goto retry; 3094 + 3095 + if ((ret == -EDQUOT) && 3096 + EXT4_I(inode)->i_reserved_meta_blocks && 3097 + (quota_retries++ < 3)) { 3098 + /* 3099 + * Since we often over-estimate the number of meta 3100 + * data blocks required, we may sometimes get a 3101 + * spurios out of quota error even though there would 3102 + * be enough space once we write the data blocks and 3103 + * find out how many meta data blocks were _really_ 3104 + * required. So try forcing the inode write to see if 3105 + * that helps. 3106 + */ 3107 + write_inode_now(inode, (quota_retries == 3)); 3108 + goto retry; 3109 + } 3100 3110 out: 3101 3111 return ret; 3102 3112 }