Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag
ext4: Fix quota accounting error with fallocate
ext4: Handle -EDQUOT error on write

+77 -35
+4 -5
fs/ext4/ext4.h
··· 361 so set the magic i_delalloc_reserve_flag after taking the 362 inode allocation semaphore for */ 363 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 364 - /* Call ext4_da_update_reserve_space() after successfully 365 - allocating the blocks */ 366 - #define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 367 /* caller is from the direct IO path, request to creation of an 368 unitialized extents if not allocated, split the uninitialized 369 extent if blocks has been preallocated already*/ 370 - #define EXT4_GET_BLOCKS_DIO 0x0010 371 - #define EXT4_GET_BLOCKS_CONVERT 0x0020 372 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 373 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 374 /* Convert extent to initialized after direct IO complete */ ··· 1440 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1441 extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1442 extern int flush_aio_dio_completed_IO(struct inode *inode); 1443 /* ioctl.c */ 1444 extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1445 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
··· 361 so set the magic i_delalloc_reserve_flag after taking the 362 inode allocation semaphore for */ 363 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 364 /* caller is from the direct IO path, request to creation of an 365 unitialized extents if not allocated, split the uninitialized 366 extent if blocks has been preallocated already*/ 367 + #define EXT4_GET_BLOCKS_DIO 0x0008 368 + #define EXT4_GET_BLOCKS_CONVERT 0x0010 369 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 371 /* Convert extent to initialized after direct IO complete */ ··· 1443 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1444 extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1445 extern int flush_aio_dio_completed_IO(struct inode *inode); 1446 + extern void ext4_da_update_reserve_space(struct inode *inode, 1447 + int used, int quota_claim); 1448 /* ioctl.c */ 1449 extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1450 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
+21
fs/ext4/extents.c
··· 3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3133 newblock + max_blocks, 3134 allocated - max_blocks); 3135 } 3136 map_out: 3137 set_buffer_mapped(bh_result); 3138 out1: ··· 3380 /* previous routine could use block we allocated */ 3381 newblock = ext_pblock(&newex); 3382 allocated = ext4_ext_get_actual_len(&newex); 3383 set_buffer_new(bh_result); 3384 3385 /* 3386 * Cache the extent and update transaction to commit on fdatasync only
··· 3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3133 newblock + max_blocks, 3134 allocated - max_blocks); 3135 + allocated = max_blocks; 3136 } 3137 + 3138 + /* 3139 + * If we have done fallocate with the offset that is already 3140 + * delayed allocated, we would have block reservation 3141 + * and quota reservation done in the delayed write path. 3142 + * But fallocate would have already updated quota and block 3143 + * count for this offset. So cancel these reservation 3144 + */ 3145 + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3146 + ext4_da_update_reserve_space(inode, allocated, 0); 3147 + 3148 map_out: 3149 set_buffer_mapped(bh_result); 3150 out1: ··· 3368 /* previous routine could use block we allocated */ 3369 newblock = ext_pblock(&newex); 3370 allocated = ext4_ext_get_actual_len(&newex); 3371 + if (allocated > max_blocks) 3372 + allocated = max_blocks; 3373 set_buffer_new(bh_result); 3374 + 3375 + /* 3376 + * Update reserved blocks/metadata blocks after successful 3377 + * block allocation which had been deferred till now. 3378 + */ 3379 + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3380 + ext4_da_update_reserve_space(inode, allocated, 1); 3381 3382 /* 3383 * Cache the extent and update transaction to commit on fdatasync only
+52 -30
fs/ext4/inode.c
··· 1053 * Called with i_data_sem down, which is important since we can call 1054 * ext4_discard_preallocations() from here. 1055 */ 1056 - static void ext4_da_update_reserve_space(struct inode *inode, int used) 1057 { 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1059 struct ext4_inode_info *ei = EXT4_I(inode); 1060 - int mdb_free = 0; 1061 1062 spin_lock(&ei->i_block_reservation_lock); 1063 if (unlikely(used > ei->i_reserved_data_blocks)) { ··· 1074 ei->i_reserved_data_blocks -= used; 1075 used += ei->i_allocated_meta_blocks; 1076 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 1077 ei->i_allocated_meta_blocks = 0; 1078 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); 1079 ··· 1092 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1093 1094 /* Update quota subsystem */ 1095 - vfs_dq_claim_block(inode, used); 1096 - if (mdb_free) 1097 - vfs_dq_release_reservation_block(inode, mdb_free); 1098 1099 /* 1100 * If we have done all the pending block allocations and if ··· 1308 */ 1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1310 } 1311 - } 1312 1313 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1314 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1315 - 1316 - /* 1317 - * Update reserved blocks/metadata blocks after successful 1318 - * block allocation which had been deferred till now. 1319 - */ 1320 - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) 1321 - ext4_da_update_reserve_space(inode, retval); 1322 1323 up_write((&EXT4_I(inode)->i_data_sem)); 1324 if (retval > 0 && buffer_mapped(bh)) { ··· 1853 * later. Real quota accounting is done at pages writeout 1854 * time. 1855 */ 1856 - if (vfs_dq_reserve_block(inode, md_needed + 1)) { 1857 - /* 1858 - * We tend to badly over-estimate the amount of 1859 - * metadata blocks which are needed, so if we have 1860 - * reserved any metadata blocks, try to force out the 1861 - * inode and see if we have any better luck. 1862 - */ 1863 - if (md_reserved && retries++ <= 3) 1864 - goto retry; 1865 return -EDQUOT; 1866 - } 1867 1868 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1869 vfs_dq_release_reservation_block(inode, md_needed + 1); 1870 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1871 - retry: 1872 - if (md_reserved) 1873 - write_inode_now(inode, (retries == 3)); 1874 yield(); 1875 goto repeat; 1876 } ··· 2219 * variables are updated after the blocks have been allocated. 2220 */ 2221 new.b_state = 0; 2222 - get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2223 - EXT4_GET_BLOCKS_DELALLOC_RESERVE); 2224 if (mpd->b_state & (1 << BH_Delay)) 2225 - get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2226 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2227 &new, get_blocks_flags); 2228 if (blks < 0) { ··· 3038 loff_t pos, unsigned len, unsigned flags, 3039 struct page **pagep, void **fsdata) 3040 { 3041 - int ret, retries = 0; 3042 struct page *page; 3043 pgoff_t index; 3044 unsigned from, to; ··· 3097 3098 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3099 goto retry; 3100 out: 3101 return ret; 3102 }
··· 1053 * Called with i_data_sem down, which is important since we can call 1054 * ext4_discard_preallocations() from here. 1055 */ 1056 + void ext4_da_update_reserve_space(struct inode *inode, 1057 + int used, int quota_claim) 1058 { 1059 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1060 struct ext4_inode_info *ei = EXT4_I(inode); 1061 + int mdb_free = 0, allocated_meta_blocks = 0; 1062 1063 spin_lock(&ei->i_block_reservation_lock); 1064 if (unlikely(used > ei->i_reserved_data_blocks)) { ··· 1073 ei->i_reserved_data_blocks -= used; 1074 used += ei->i_allocated_meta_blocks; 1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 1076 + allocated_meta_blocks = ei->i_allocated_meta_blocks; 1077 ei->i_allocated_meta_blocks = 0; 1078 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); 1079 ··· 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1091 1092 /* Update quota subsystem */ 1093 + if (quota_claim) { 1094 + vfs_dq_claim_block(inode, used); 1095 + if (mdb_free) 1096 + vfs_dq_release_reservation_block(inode, mdb_free); 1097 + } else { 1098 + /* 1099 + * We did fallocate with an offset that is already delayed 1100 + * allocated. So on delayed allocated writeback we should 1101 + * not update the quota for allocated blocks. But then 1102 + * converting an fallocate region to initialized region would 1103 + * have caused a metadata allocation. So claim quota for 1104 + * that 1105 + */ 1106 + if (allocated_meta_blocks) 1107 + vfs_dq_claim_block(inode, allocated_meta_blocks); 1108 + vfs_dq_release_reservation_block(inode, mdb_free + used); 1109 + } 1110 1111 /* 1112 * If we have done all the pending block allocations and if ··· 1292 */ 1293 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1294 } 1295 1296 + /* 1297 + * Update reserved blocks/metadata blocks after successful 1298 + * block allocation which had been deferred till now. We don't 1299 + * support fallocate for non extent files. So we can update 1300 + * reserve space here. 1301 + */ 1302 + if ((retval > 0) && 1303 + (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 1304 + ext4_da_update_reserve_space(inode, retval, 1); 1305 + } 1306 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1307 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1308 1309 up_write((&EXT4_I(inode)->i_data_sem)); 1310 if (retval > 0 && buffer_mapped(bh)) { ··· 1835 * later. Real quota accounting is done at pages writeout 1836 * time. 1837 */ 1838 + if (vfs_dq_reserve_block(inode, md_needed + 1)) 1839 return -EDQUOT; 1840 1841 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1842 vfs_dq_release_reservation_block(inode, md_needed + 1); 1843 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1844 yield(); 1845 goto repeat; 1846 } ··· 2213 * variables are updated after the blocks have been allocated. 2214 */ 2215 new.b_state = 0; 2216 + get_blocks_flags = EXT4_GET_BLOCKS_CREATE; 2217 if (mpd->b_state & (1 << BH_Delay)) 2218 + get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2219 + 2220 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2221 &new, get_blocks_flags); 2222 if (blks < 0) { ··· 3032 loff_t pos, unsigned len, unsigned flags, 3033 struct page **pagep, void **fsdata) 3034 { 3035 + int ret, retries = 0, quota_retries = 0; 3036 struct page *page; 3037 pgoff_t index; 3038 unsigned from, to; ··· 3091 3092 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3093 goto retry; 3094 + 3095 + if ((ret == -EDQUOT) && 3096 + EXT4_I(inode)->i_reserved_meta_blocks && 3097 + (quota_retries++ < 3)) { 3098 + /* 3099 + * Since we often over-estimate the number of meta 3100 + * data blocks required, we may sometimes get a 3101 + * spurios out of quota error even though there would 3102 + * be enough space once we write the data blocks and 3103 + * find out how many meta data blocks were _really_ 3104 + * required. So try forcing the inode write to see if 3105 + * that helps. 3106 + */ 3107 + write_inode_now(inode, (quota_retries == 3)); 3108 + goto retry; 3109 + } 3110 out: 3111 return ret; 3112 }