Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: update extent status tree after an extent is zeroed out

When we try to split an extent, this extent could be zeroed out and mark
as initialized. But we don't know this in ext4_map_blocks because it
only returns a length of allocated extent. Meanwhile we will mark this
extent as uninitialized because we only check m_flags.

This commit update extent status tree when we try to split an unwritten
extent. We don't need to worry about the status of this extent because
we always mark it as initialized.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>

authored by

Zheng Liu and committed by
Theodore Ts'o
adb23551 cdee7843

+61 -4
+31 -4
fs/ext4/extents.c
··· 2925 2925 { 2926 2926 ext4_fsblk_t newblock; 2927 2927 ext4_lblk_t ee_block; 2928 - struct ext4_extent *ex, newex, orig_ex; 2928 + struct ext4_extent *ex, newex, orig_ex, zero_ex; 2929 2929 struct ext4_extent *ex2 = NULL; 2930 2930 unsigned int ee_len, depth; 2931 2931 int err = 0; ··· 2996 2996 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2997 2997 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2998 2998 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 2999 - if (split_flag & EXT4_EXT_DATA_VALID1) 2999 + if (split_flag & EXT4_EXT_DATA_VALID1) { 3000 3000 err = ext4_ext_zeroout(inode, ex2); 3001 - else 3001 + zero_ex.ee_block = ex2->ee_block; 3002 + zero_ex.ee_len = ext4_ext_get_actual_len(ex2); 3003 + ext4_ext_store_pblock(&zero_ex, 3004 + ext4_ext_pblock(ex2)); 3005 + } else { 3002 3006 err = ext4_ext_zeroout(inode, ex); 3003 - } else 3007 + zero_ex.ee_block = ex->ee_block; 3008 + zero_ex.ee_len = ext4_ext_get_actual_len(ex); 3009 + ext4_ext_store_pblock(&zero_ex, 3010 + ext4_ext_pblock(ex)); 3011 + } 3012 + } else { 3004 3013 err = ext4_ext_zeroout(inode, &orig_ex); 3014 + zero_ex.ee_block = orig_ex.ee_block; 3015 + zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex); 3016 + ext4_ext_store_pblock(&zero_ex, 3017 + ext4_ext_pblock(&orig_ex)); 3018 + } 3005 3019 3006 3020 if (err) 3007 3021 goto fix_extent_len; ··· 3023 3009 ex->ee_len = cpu_to_le16(ee_len); 3024 3010 ext4_ext_try_to_merge(handle, inode, path, ex); 3025 3011 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 3012 + if (err) 3013 + goto fix_extent_len; 3014 + 3015 + /* update extent status tree */ 3016 + err = ext4_es_zeroout(inode, &zero_ex); 3017 + 3026 3018 goto out; 3027 3019 } else if (err) 3028 3020 goto fix_extent_len; ··· 3170 3150 ee_block = le32_to_cpu(ex->ee_block); 3171 3151 ee_len = ext4_ext_get_actual_len(ex); 3172 3152 allocated = ee_len - (map->m_lblk - ee_block); 3153 + zero_ex.ee_len = 0; 3173 3154 3174 3155 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3175 3156 ··· 3268 3247 err = ext4_ext_zeroout(inode, ex); 3269 3248 if (err) 3270 3249 goto out; 3250 + zero_ex.ee_block = ex->ee_block; 3251 + zero_ex.ee_len = ext4_ext_get_actual_len(ex); 3252 + ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); 3271 3253 3272 3254 err = ext4_ext_get_access(handle, inode, path + depth); 3273 3255 if (err) ··· 3329 3305 err = allocated; 3330 3306 3331 3307 out: 3308 + /* If we have gotten a failure, don't zero out status tree */ 3309 + if (!err) 3310 + err = ext4_es_zeroout(inode, &zero_ex); 3332 3311 return err ? err : allocated; 3333 3312 } 3334 3313
+17
fs/ext4/extents_status.c
··· 854 854 return err; 855 855 } 856 856 857 + int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) 858 + { 859 + ext4_lblk_t ee_block; 860 + ext4_fsblk_t ee_pblock; 861 + unsigned int ee_len; 862 + 863 + ee_block = le32_to_cpu(ex->ee_block); 864 + ee_len = ext4_ext_get_actual_len(ex); 865 + ee_pblock = ext4_ext_pblock(ex); 866 + 867 + if (ee_len == 0) 868 + return 0; 869 + 870 + return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, 871 + EXTENT_STATUS_WRITTEN); 872 + } 873 + 857 874 static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) 858 875 { 859 876 struct ext4_sb_info *sbi = container_of(shrink,
+3
fs/ext4/extents_status.h
··· 39 39 EXTENT_STATUS_DELAYED | \ 40 40 EXTENT_STATUS_HOLE) 41 41 42 + struct ext4_extent; 43 + 42 44 struct extent_status { 43 45 struct rb_node rb_node; 44 46 ext4_lblk_t es_lblk; /* first logical block extent covers */ ··· 66 64 struct extent_status *es); 67 65 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 68 66 struct extent_status *es); 67 + extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex); 69 68 70 69 static inline int ext4_es_is_written(struct extent_status *es) 71 70 {
+10
fs/ext4/inode.c
··· 722 722 } 723 723 #endif 724 724 725 + /* 726 + * If the extent has been zeroed out, we don't need to update 727 + * extent status tree. 728 + */ 729 + if ((flags & EXT4_GET_BLOCKS_PRE_IO) && 730 + ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 731 + if (ext4_es_is_written(&es)) 732 + goto has_zeroout; 733 + } 725 734 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 726 735 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 727 736 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && ··· 743 734 retval = ret; 744 735 } 745 736 737 + has_zeroout: 746 738 up_write((&EXT4_I(inode)->i_data_sem)); 747 739 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 748 740 int ret = check_block_validity(inode, map);