Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

If the NO_HOLES feature is enabled holes don't have file extent items in
the btree that represent them anymore. This made the clone operation
ignore the gaps that exist between consecutive file extent items and
therefore not create the holes at the destination. When not using the
NO_HOLES feature, the holes were created at the destination.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <clm@fb.com>

authored by

Filipe Manana and committed by
Chris Mason
f82a9901 96493031

+83 -25
+83 -25
fs/btrfs/ioctl.c
··· 3012 3012 return ret; 3013 3013 } 3014 3014 3015 + static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3016 + struct inode *inode, 3017 + u64 endoff, 3018 + const u64 destoff, 3019 + const u64 olen) 3020 + { 3021 + struct btrfs_root *root = BTRFS_I(inode)->root; 3022 + int ret; 3023 + 3024 + inode_inc_iversion(inode); 3025 + inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3026 + /* 3027 + * We round up to the block size at eof when determining which 3028 + * extents to clone above, but shouldn't round up the file size. 3029 + */ 3030 + if (endoff > destoff + olen) 3031 + endoff = destoff + olen; 3032 + if (endoff > inode->i_size) 3033 + btrfs_i_size_write(inode, endoff); 3034 + 3035 + ret = btrfs_update_inode(trans, root, inode); 3036 + if (ret) { 3037 + btrfs_abort_transaction(trans, root, ret); 3038 + btrfs_end_transaction(trans, root); 3039 + goto out; 3040 + } 3041 + ret = btrfs_end_transaction(trans, root); 3042 + out: 3043 + return ret; 3044 + } 3045 + 3015 3046 /** 3016 3047 * btrfs_clone() - clone a range from inode file to another 3017 3048 * ··· 3055 3024 * @destoff: Offset within @inode to start clone 3056 3025 */ 3057 3026 static int btrfs_clone(struct inode *src, struct inode *inode, 3058 - u64 off, u64 olen, u64 olen_aligned, u64 destoff) 3027 + const u64 off, const u64 olen, const u64 olen_aligned, 3028 + const u64 destoff) 3059 3029 { 3060 3030 struct btrfs_root *root = BTRFS_I(inode)->root; 3061 3031 struct btrfs_path *path = NULL; ··· 3068 3036 int slot; 3069 3037 int ret; 3070 3038 int no_quota; 3071 - u64 len = olen_aligned; 3039 + const u64 len = olen_aligned; 3072 3040 u64 last_disko = 0; 3041 + u64 last_dest_end = destoff; 3073 3042 3074 3043 ret = -ENOMEM; 3075 3044 buf = vmalloc(btrfs_level_size(root, 0)); ··· 3138 3105 u64 disko = 0, diskl = 0; 3139 3106 u64 datao = 0, datal = 0; 3140 3107 u8 comp; 3141 - u64 endoff; 3108 + u64 drop_start; 3142 3109 3143 3110 extent = btrfs_item_ptr(leaf, slot, 3144 3111 struct btrfs_file_extent_item); ··· 3187 3154 new_key.offset = destoff; 3188 3155 3189 3156 /* 3157 + * Deal with a hole that doesn't have an extent item 3158 + * that represents it (NO_HOLES feature enabled). 3159 + * This hole is either in the middle of the cloning 3160 + * range or at the beginning (fully overlaps it or 3161 + * partially overlaps it). 3162 + */ 3163 + if (new_key.offset != last_dest_end) 3164 + drop_start = last_dest_end; 3165 + else 3166 + drop_start = new_key.offset; 3167 + 3168 + /* 3190 3169 * 1 - adjusting old extent (we may have to split it) 3191 3170 * 1 - add new extent 3192 3171 * 1 - inode update ··· 3227 3182 } 3228 3183 3229 3184 ret = btrfs_drop_extents(trans, root, inode, 3230 - new_key.offset, 3185 + drop_start, 3231 3186 new_key.offset + datal, 3232 3187 1); 3233 3188 if (ret) { ··· 3328 3283 aligned_end = ALIGN(new_key.offset + datal, 3329 3284 root->sectorsize); 3330 3285 ret = btrfs_drop_extents(trans, root, inode, 3331 - new_key.offset, 3286 + drop_start, 3332 3287 aligned_end, 3333 3288 1); 3334 3289 if (ret) { ··· 3366 3321 btrfs_mark_buffer_dirty(leaf); 3367 3322 btrfs_release_path(path); 3368 3323 3369 - inode_inc_iversion(inode); 3370 - inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3371 - 3372 - /* 3373 - * we round up to the block size at eof when 3374 - * determining which extents to clone above, 3375 - * but shouldn't round up the file size 3376 - */ 3377 - endoff = new_key.offset + datal; 3378 - if (endoff > destoff+olen) 3379 - endoff = destoff+olen; 3380 - if (endoff > inode->i_size) 3381 - btrfs_i_size_write(inode, endoff); 3382 - 3383 - ret = btrfs_update_inode(trans, root, inode); 3384 - if (ret) { 3385 - btrfs_abort_transaction(trans, root, ret); 3386 - btrfs_end_transaction(trans, root); 3324 + last_dest_end = new_key.offset + datal; 3325 + ret = clone_finish_inode_update(trans, inode, 3326 + last_dest_end, 3327 + destoff, olen); 3328 + if (ret) 3387 3329 goto out; 3388 - } 3389 - ret = btrfs_end_transaction(trans, root); 3390 3330 if (new_key.offset + datal >= destoff + len) 3391 3331 break; 3392 3332 } ··· 3379 3349 key.offset++; 3380 3350 } 3381 3351 ret = 0; 3352 + 3353 + if (last_dest_end < destoff + len) { 3354 + /* 3355 + * We have an implicit hole (NO_HOLES feature is enabled) that 3356 + * fully or partially overlaps our cloning range at its end. 3357 + */ 3358 + btrfs_release_path(path); 3359 + 3360 + /* 3361 + * 1 - remove extent(s) 3362 + * 1 - inode update 3363 + */ 3364 + trans = btrfs_start_transaction(root, 2); 3365 + if (IS_ERR(trans)) { 3366 + ret = PTR_ERR(trans); 3367 + goto out; 3368 + } 3369 + ret = btrfs_drop_extents(trans, root, inode, 3370 + last_dest_end, destoff + len, 1); 3371 + if (ret) { 3372 + if (ret != -EOPNOTSUPP) 3373 + btrfs_abort_transaction(trans, root, ret); 3374 + btrfs_end_transaction(trans, root); 3375 + goto out; 3376 + } 3377 + ret = clone_finish_inode_update(trans, inode, destoff + len, 3378 + destoff, olen); 3379 + } 3382 3380 3383 3381 out: 3384 3382 btrfs_free_path(path);