Btrfs: use generic_remap_file_range_prep() for cloning and deduplication

+134 -492

1 changed file

expand all

btrfs

ioctl.c

+134 -492

fs/btrfs/ioctl.c

··· 3206 3206 return ret; 3207 3207 } 3208 3208 3209 - static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) 3210 - { 3211 - struct page *page; 3212 - 3213 - page = grab_cache_page(inode->i_mapping, index); 3214 - if (!page) 3215 - return ERR_PTR(-ENOMEM); 3216 - 3217 - if (!PageUptodate(page)) { 3218 - int ret; 3219 - 3220 - ret = btrfs_readpage(NULL, page); 3221 - if (ret) 3222 - return ERR_PTR(ret); 3223 - lock_page(page); 3224 - if (!PageUptodate(page)) { 3225 - unlock_page(page); 3226 - put_page(page); 3227 - return ERR_PTR(-EIO); 3228 - } 3229 - if (page->mapping != inode->i_mapping) { 3230 - unlock_page(page); 3231 - put_page(page); 3232 - return ERR_PTR(-EAGAIN); 3233 - } 3234 - } 3235 - 3236 - return page; 3237 - } 3238 - 3239 - static int gather_extent_pages(struct inode *inode, struct page **pages, 3240 - int num_pages, u64 off) 3241 - { 3242 - int i; 3243 - pgoff_t index = off >> PAGE_SHIFT; 3244 - 3245 - for (i = 0; i < num_pages; i++) { 3246 - again: 3247 - pages[i] = extent_same_get_page(inode, index + i); 3248 - if (IS_ERR(pages[i])) { 3249 - int err = PTR_ERR(pages[i]); 3250 - 3251 - if (err == -EAGAIN) 3252 - goto again; 3253 - pages[i] = NULL; 3254 - return err; 3255 - } 3256 - } 3257 - return 0; 3258 - } 3259 - 3260 - static int lock_extent_range(struct inode *inode, u64 off, u64 len, 3261 - bool retry_range_locking) 3262 - { 3263 - /* 3264 - * Do any pending delalloc/csum calculations on inode, one way or 3265 - * another, and lock file content. 3266 - * The locking order is: 3267 - * 3268 - * 1) pages 3269 - * 2) range in the inode's io tree 3270 - */ 3271 - while (1) { 3272 - struct btrfs_ordered_extent *ordered; 3273 - lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 3274 - ordered = btrfs_lookup_first_ordered_extent(inode, 3275 - off + len - 1); 3276 - if ((!ordered || 3277 - ordered->file_offset + ordered->len <= off || 3278 - ordered->file_offset >= off + len) && 3279 - !test_range_bit(&BTRFS_I(inode)->io_tree, off, 3280 - off + len - 1, EXTENT_DELALLOC, 0, NULL)) { 3281 - if (ordered) 3282 - btrfs_put_ordered_extent(ordered); 3283 - break; 3284 - } 3285 - unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 3286 - if (ordered) 3287 - btrfs_put_ordered_extent(ordered); 3288 - if (!retry_range_locking) 3289 - return -EAGAIN; 3290 - btrfs_wait_ordered_range(inode, off, len); 3291 - } 3292 - return 0; 3293 - } 3294 - 3295 3209 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) 3296 3210 { 3297 3211 inode_unlock(inode1); ··· 3221 3307 inode_lock_nested(inode2, I_MUTEX_CHILD); 3222 3308 } 3223 3309 3224 - static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, 3225 - struct inode *inode2, u64 loff2, u64 len) 3226 - { 3227 - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 3228 - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 3229 - } 3230 - 3231 - static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 3232 - struct inode *inode2, u64 loff2, u64 len, 3233 - bool retry_range_locking) 3234 - { 3235 - int ret; 3236 - 3237 - if (inode1 < inode2) { 3238 - swap(inode1, inode2); 3239 - swap(loff1, loff2); 3240 - } 3241 - ret = lock_extent_range(inode1, loff1, len, retry_range_locking); 3242 - if (ret) 3243 - return ret; 3244 - ret = lock_extent_range(inode2, loff2, len, retry_range_locking); 3245 - if (ret) 3246 - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, 3247 - loff1 + len - 1); 3248 - return ret; 3249 - } 3250 - 3251 - struct cmp_pages { 3252 - int num_pages; 3253 - struct page **src_pages; 3254 - struct page **dst_pages; 3255 - }; 3256 - 3257 - static void btrfs_cmp_data_free(struct cmp_pages *cmp) 3258 - { 3259 - int i; 3260 - struct page *pg; 3261 - 3262 - for (i = 0; i < cmp->num_pages; i++) { 3263 - pg = cmp->src_pages[i]; 3264 - if (pg) { 3265 - unlock_page(pg); 3266 - put_page(pg); 3267 - cmp->src_pages[i] = NULL; 3268 - } 3269 - pg = cmp->dst_pages[i]; 3270 - if (pg) { 3271 - unlock_page(pg); 3272 - put_page(pg); 3273 - cmp->dst_pages[i] = NULL; 3274 - } 3275 - } 3276 - } 3277 - 3278 - static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, 3279 - struct inode *dst, u64 dst_loff, 3280 - u64 len, struct cmp_pages *cmp) 3281 - { 3282 - int ret; 3283 - int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; 3284 - 3285 - cmp->num_pages = num_pages; 3286 - 3287 - ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff); 3288 - if (ret) 3289 - goto out; 3290 - 3291 - ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff); 3292 - 3293 - out: 3294 - if (ret) 3295 - btrfs_cmp_data_free(cmp); 3296 - return ret; 3297 - } 3298 - 3299 - static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) 3300 - { 3301 - int ret = 0; 3302 - int i; 3303 - struct page *src_page, *dst_page; 3304 - unsigned int cmp_len = PAGE_SIZE; 3305 - void *addr, *dst_addr; 3306 - 3307 - i = 0; 3308 - while (len) { 3309 - if (len < PAGE_SIZE) 3310 - cmp_len = len; 3311 - 3312 - BUG_ON(i >= cmp->num_pages); 3313 - 3314 - src_page = cmp->src_pages[i]; 3315 - dst_page = cmp->dst_pages[i]; 3316 - ASSERT(PageLocked(src_page)); 3317 - ASSERT(PageLocked(dst_page)); 3318 - 3319 - addr = kmap_atomic(src_page); 3320 - dst_addr = kmap_atomic(dst_page); 3321 - 3322 - flush_dcache_page(src_page); 3323 - flush_dcache_page(dst_page); 3324 - 3325 - if (memcmp(addr, dst_addr, cmp_len)) 3326 - ret = -EBADE; 3327 - 3328 - kunmap_atomic(addr); 3329 - kunmap_atomic(dst_addr); 3330 - 3331 - if (ret) 3332 - break; 3333 - 3334 - len -= cmp_len; 3335 - i++; 3336 - } 3337 - 3338 - return ret; 3339 - } 3340 - 3341 - static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen, 3342 - u64 olen) 3343 - { 3344 - u64 len = *plen; 3345 - u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; 3346 - 3347 - if (off + olen > inode->i_size || off + olen < off) 3348 - return -EINVAL; 3349 - 3350 - /* if we extend to eof, continue to block boundary */ 3351 - if (off + len == inode->i_size) 3352 - *plen = len = ALIGN(inode->i_size, bs) - off; 3353 - 3354 - /* Check that we are block aligned - btrfs_clone() requires this */ 3355 - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) 3356 - return -EINVAL; 3357 - 3358 - return 0; 3359 - } 3360 - 3361 3310 static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, 3362 - struct inode *dst, u64 dst_loff, 3363 - struct cmp_pages *cmp) 3311 + struct inode *dst, u64 dst_loff) 3364 3312 { 3313 + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 3365 3314 int ret; 3366 3315 u64 len = olen; 3367 - bool same_inode = (src == dst); 3368 - u64 same_lock_start = 0; 3369 - u64 same_lock_len = 0; 3370 3316 3371 - ret = extent_same_check_offsets(src, loff, &len, olen); 3372 - if (ret) 3373 - return ret; 3374 - 3375 - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); 3376 - if (ret) 3377 - return ret; 3378 - 3379 - if (same_inode) { 3380 - /* 3381 - * Single inode case wants the same checks, except we 3382 - * don't want our length pushed out past i_size as 3383 - * comparing that data range makes no sense. 3384 - * 3385 - * extent_same_check_offsets() will do this for an 3386 - * unaligned length at i_size, so catch it here and 3387 - * reject the request. 3388 - * 3389 - * This effectively means we require aligned extents 3390 - * for the single-inode case, whereas the other cases 3391 - * allow an unaligned length so long as it ends at 3392 - * i_size. 3393 - */ 3394 - if (len != olen) 3395 - return -EINVAL; 3396 - 3397 - /* Check for overlapping ranges */ 3398 - if (dst_loff + len > loff && dst_loff < loff + len) 3399 - return -EINVAL; 3400 - 3401 - same_lock_start = min_t(u64, loff, dst_loff); 3402 - same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; 3403 - } else { 3404 - /* 3405 - * If the source and destination inodes are different, the 3406 - * source's range end offset matches the source's i_size, that 3407 - * i_size is not a multiple of the sector size, and the 3408 - * destination range does not go past the destination's i_size, 3409 - * we must round down the length to the nearest sector size 3410 - * multiple. If we don't do this adjustment we end replacing 3411 - * with zeroes the bytes in the range that starts at the 3412 - * deduplication range's end offset and ends at the next sector 3413 - * size multiple. 3414 - */ 3415 - if (loff + olen == i_size_read(src) && 3416 - dst_loff + len < i_size_read(dst)) { 3417 - const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; 3418 - 3419 - len = round_down(i_size_read(src), sz) - loff; 3420 - if (len == 0) 3421 - return 0; 3422 - olen = len; 3423 - } 3424 - } 3425 - 3426 - again: 3427 - ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp); 3428 - if (ret) 3429 - return ret; 3430 - 3431 - if (same_inode) 3432 - ret = lock_extent_range(src, same_lock_start, same_lock_len, 3433 - false); 3434 - else 3435 - ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, 3436 - false); 3317 + if (loff + len == src->i_size) 3318 + len = ALIGN(src->i_size, bs) - loff; 3437 3319 /* 3438 - * If one of the inodes has dirty pages in the respective range or 3439 - * ordered extents, we need to flush dellaloc and wait for all ordered 3440 - * extents in the range. We must unlock the pages and the ranges in the 3441 - * io trees to avoid deadlocks when flushing delalloc (requires locking 3442 - * pages) and when waiting for ordered extents to complete (they require 3443 - * range locking). 3320 + * For same inode case we don't want our length pushed out past i_size 3321 + * as comparing that data range makes no sense. 3322 + * 3323 + * This effectively means we require aligned extents for the single 3324 + * inode case, whereas the other cases allow an unaligned length so long 3325 + * as it ends at i_size. 3444 3326 */ 3445 - if (ret == -EAGAIN) { 3446 - /* 3447 - * Ranges in the io trees already unlocked. Now unlock all 3448 - * pages before waiting for all IO to complete. 3449 - */ 3450 - btrfs_cmp_data_free(cmp); 3451 - if (same_inode) { 3452 - btrfs_wait_ordered_range(src, same_lock_start, 3453 - same_lock_len); 3454 - } else { 3455 - btrfs_wait_ordered_range(src, loff, len); 3456 - btrfs_wait_ordered_range(dst, dst_loff, len); 3457 - } 3458 - goto again; 3459 - } 3460 - ASSERT(ret == 0); 3461 - if (WARN_ON(ret)) { 3462 - /* ranges in the io trees already unlocked */ 3463 - btrfs_cmp_data_free(cmp); 3464 - return ret; 3465 - } 3327 + if (dst == src && len != olen) 3328 + return -EINVAL; 3466 3329 3467 - /* pass original length for comparison so we stay within i_size */ 3468 - ret = btrfs_cmp_data(olen, cmp); 3469 - if (ret == 0) 3470 - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3471 - 3472 - if (same_inode) 3473 - unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, 3474 - same_lock_start + same_lock_len - 1); 3475 - else 3476 - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); 3477 - 3478 - btrfs_cmp_data_free(cmp); 3330 + /* 3331 + * Lock destination range to serialize with concurrent readpages(). 3332 + */ 3333 + lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3334 + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3335 + unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3479 3336 3480 3337 return ret; 3481 3338 } ··· 3257 3572 struct inode *dst, u64 dst_loff) 3258 3573 { 3259 3574 int ret; 3260 - struct cmp_pages cmp; 3261 3575 int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT; 3262 - bool same_inode = (src == dst); 3263 3576 u64 i, tail_len, chunk_count; 3264 - 3265 - if (olen == 0) 3266 - return 0; 3267 - 3268 - if (same_inode) 3269 - inode_lock(src); 3270 - else 3271 - btrfs_double_inode_lock(src, dst); 3272 3577 3273 3578 /* don't make the dst file partly checksummed */ 3274 3579 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3275 - (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { 3276 - ret = -EINVAL; 3277 - goto out_unlock; 3278 - } 3580 + (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) 3581 + return -EINVAL; 3279 3582 3280 - if (IS_SWAPFILE(src) || IS_SWAPFILE(dst)) { 3281 - ret = -ETXTBSY; 3282 - goto out_unlock; 3283 - } 3583 + if (IS_SWAPFILE(src) || IS_SWAPFILE(dst)) 3584 + return -ETXTBSY; 3284 3585 3285 3586 tail_len = olen % BTRFS_MAX_DEDUPE_LEN; 3286 3587 chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); 3287 3588 if (chunk_count == 0) 3288 3589 num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT; 3289 3590 3290 - /* 3291 - * If deduping ranges in the same inode, locking rules make it 3292 - * mandatory to always lock pages in ascending order to avoid deadlocks 3293 - * with concurrent tasks (such as starting writeback/delalloc). 3294 - */ 3295 - if (same_inode && dst_loff < loff) 3296 - swap(loff, dst_loff); 3297 - 3298 - /* 3299 - * We must gather up all the pages before we initiate our extent 3300 - * locking. We use an array for the page pointers. Size of the array is 3301 - * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN. 3302 - */ 3303 - cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *), 3304 - GFP_KERNEL | __GFP_ZERO); 3305 - cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *), 3306 - GFP_KERNEL | __GFP_ZERO); 3307 - if (!cmp.src_pages || !cmp.dst_pages) { 3308 - ret = -ENOMEM; 3309 - goto out_free; 3310 - } 3311 - 3312 3591 for (i = 0; i < chunk_count; i++) { 3313 3592 ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, 3314 - dst, dst_loff, &cmp); 3593 + dst, dst_loff); 3315 3594 if (ret) 3316 - goto out_free; 3595 + return ret; 3317 3596 3318 3597 loff += BTRFS_MAX_DEDUPE_LEN; 3319 3598 dst_loff += BTRFS_MAX_DEDUPE_LEN; ··· 3285 3636 3286 3637 if (tail_len > 0) 3287 3638 ret = btrfs_extent_same_range(src, loff, tail_len, dst, 3288 - dst_loff, &cmp); 3289 - 3290 - out_free: 3291 - kvfree(cmp.src_pages); 3292 - kvfree(cmp.dst_pages); 3293 - 3294 - out_unlock: 3295 - if (same_inode) 3296 - inode_unlock(src); 3297 - else 3298 - btrfs_double_inode_unlock(src, dst); 3639 + dst_loff); 3299 3640 3300 3641 return ret; 3301 3642 } ··· 3872 4233 struct inode *inode = file_inode(file); 3873 4234 struct inode *src = file_inode(file_src); 3874 4235 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3875 - struct btrfs_root *root = BTRFS_I(inode)->root; 3876 4236 int ret; 3877 4237 u64 len = olen; 3878 4238 u64 bs = fs_info->sb->s_blocksize; 3879 - int same_inode = src == inode; 3880 4239 3881 4240 /* 3882 4241 * TODO: ··· 3887 4250 * be either compressed or non-compressed. 3888 4251 */ 3889 4252 3890 - if (btrfs_root_readonly(root)) 3891 - return -EROFS; 3892 - 3893 - if (file_src->f_path.mnt != file->f_path.mnt || 3894 - src->i_sb != inode->i_sb) 3895 - return -EXDEV; 3896 - 3897 - if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3898 - return -EISDIR; 3899 - 3900 - if (!same_inode) { 3901 - btrfs_double_inode_lock(src, inode); 3902 - } else { 3903 - inode_lock(src); 3904 - } 3905 - 3906 4253 /* don't make the dst file partly checksummed */ 3907 4254 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3908 - (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 3909 - ret = -EINVAL; 3910 - goto out_unlock; 3911 - } 4255 + (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 4256 + return -EINVAL; 3912 4257 3913 - if (IS_SWAPFILE(src) || IS_SWAPFILE(inode)) { 3914 - ret = -ETXTBSY; 3915 - goto out_unlock; 3916 - } 4258 + if (IS_SWAPFILE(src) || IS_SWAPFILE(inode)) 4259 + return -ETXTBSY; 3917 4260 3918 - /* determine range to clone */ 3919 - ret = -EINVAL; 3920 - if (off + len > src->i_size || off + len < off) 3921 - goto out_unlock; 3922 - if (len == 0) 3923 - olen = len = src->i_size - off; 3924 4261 /* 3925 - * If we extend to eof, continue to block boundary if and only if the 3926 - * destination end offset matches the destination file's size, otherwise 3927 - * we would be corrupting data by placing the eof block into the middle 3928 - * of a file. 4262 + * VFS's generic_remap_file_range_prep() protects us from cloning the 4263 + * eof block into the middle of a file, which would result in corruption 4264 + * if the file size is not blocksize aligned. So we don't need to check 4265 + * for that case here. 3929 4266 */ 3930 - if (off + len == src->i_size) { 3931 - if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size) 3932 - goto out_unlock; 4267 + if (off + len == src->i_size) 3933 4268 len = ALIGN(src->i_size, bs) - off; 3934 - } 3935 - 3936 - if (len == 0) { 3937 - ret = 0; 3938 - goto out_unlock; 3939 - } 3940 - 3941 - /* verify the end result is block aligned */ 3942 - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3943 - !IS_ALIGNED(destoff, bs)) 3944 - goto out_unlock; 3945 - 3946 - /* verify if ranges are overlapped within the same file */ 3947 - if (same_inode) { 3948 - if (destoff + len > off && destoff < off + len) 3949 - goto out_unlock; 3950 - } 3951 4269 3952 4270 if (destoff > inode->i_size) { 3953 4271 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3954 4272 if (ret) 3955 - goto out_unlock; 4273 + return ret; 3956 4274 } 3957 4275 3958 4276 /* 3959 - * Lock the target range too. Right after we replace the file extent 3960 - * items in the fs tree (which now point to the cloned data), we might 3961 - * have a worker replace them with extent items relative to a write 3962 - * operation that was issued before this clone operation (i.e. confront 3963 - * with inode.c:btrfs_finish_ordered_io). 4277 + * Lock destination range to serialize with concurrent readpages(). 3964 4278 */ 3965 - if (same_inode) { 3966 - u64 lock_start = min_t(u64, off, destoff); 3967 - u64 lock_len = max_t(u64, off, destoff) + len - lock_start; 3968 - 3969 - ret = lock_extent_range(src, lock_start, lock_len, true); 3970 - } else { 3971 - ret = btrfs_double_extent_lock(src, off, inode, destoff, len, 3972 - true); 3973 - } 3974 - ASSERT(ret == 0); 3975 - if (WARN_ON(ret)) { 3976 - /* ranges in the io trees already unlocked */ 3977 - goto out_unlock; 3978 - } 3979 - 4279 + lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3980 4280 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3981 - 3982 - if (same_inode) { 3983 - u64 lock_start = min_t(u64, off, destoff); 3984 - u64 lock_end = max_t(u64, off, destoff) + len - 1; 3985 - 3986 - unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end); 3987 - } else { 3988 - btrfs_double_extent_unlock(src, off, inode, destoff, len); 3989 - } 4281 + unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3990 4282 /* 3991 4283 * Truncate page cache pages so that future reads will see the cloned 3992 4284 * data immediately and not the previous data. ··· 3923 4357 truncate_inode_pages_range(&inode->i_data, 3924 4358 round_down(destoff, PAGE_SIZE), 3925 4359 round_up(destoff + len, PAGE_SIZE) - 1); 3926 - out_unlock: 3927 - if (!same_inode) 3928 - btrfs_double_inode_unlock(src, inode); 4360 + 4361 + return ret; 4362 + } 4363 + 4364 + static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, 4365 + struct file *file_out, loff_t pos_out, 4366 + loff_t *len, unsigned int remap_flags) 4367 + { 4368 + struct inode *inode_in = file_inode(file_in); 4369 + struct inode *inode_out = file_inode(file_out); 4370 + u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize; 4371 + bool same_inode = inode_out == inode_in; 4372 + u64 wb_len; 4373 + int ret; 4374 + 4375 + if (!(remap_flags & REMAP_FILE_DEDUP)) { 4376 + struct btrfs_root *root_out = BTRFS_I(inode_out)->root; 4377 + 4378 + if (btrfs_root_readonly(root_out)) 4379 + return -EROFS; 4380 + 4381 + if (file_in->f_path.mnt != file_out->f_path.mnt || 4382 + inode_in->i_sb != inode_out->i_sb) 4383 + return -EXDEV; 4384 + } 4385 + 4386 + if (same_inode) 4387 + inode_lock(inode_in); 3929 4388 else 3930 - inode_unlock(src); 4389 + btrfs_double_inode_lock(inode_in, inode_out); 4390 + 4391 + /* 4392 + * Now that the inodes are locked, we need to start writeback ourselves 4393 + * and can not rely on the writeback from the VFS's generic helper 4394 + * generic_remap_file_range_prep() because: 4395 + * 4396 + * 1) For compression we must call filemap_fdatawrite_range() range 4397 + * twice (btrfs_fdatawrite_range() does it for us), and the generic 4398 + * helper only calls it once; 4399 + * 4400 + * 2) filemap_fdatawrite_range(), called by the generic helper only 4401 + * waits for the writeback to complete, i.e. for IO to be done, and 4402 + * not for the ordered extents to complete. We need to wait for them 4403 + * to complete so that new file extent items are in the fs tree. 4404 + */ 4405 + if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP)) 4406 + wb_len = ALIGN(inode_in->i_size, bs) - ALIGN_DOWN(pos_in, bs); 4407 + else 4408 + wb_len = ALIGN(*len, bs); 4409 + 4410 + /* 4411 + * Since we don't lock ranges, wait for ongoing lockless dio writes (as 4412 + * any in progress could create its ordered extents after we wait for 4413 + * existing ordered extents below). 4414 + */ 4415 + inode_dio_wait(inode_in); 4416 + if (!same_inode) 4417 + inode_dio_wait(inode_out); 4418 + 4419 + ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), 4420 + wb_len); 4421 + if (ret < 0) 4422 + goto out_unlock; 4423 + ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), 4424 + wb_len); 4425 + if (ret < 0) 4426 + goto out_unlock; 4427 + 4428 + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, 4429 + len, remap_flags); 4430 + if (ret < 0 || *len == 0) 4431 + goto out_unlock; 4432 + 4433 + return 0; 4434 + 4435 + out_unlock: 4436 + if (same_inode) 4437 + inode_unlock(inode_in); 4438 + else 4439 + btrfs_double_inode_unlock(inode_in, inode_out); 4440 + 3931 4441 return ret; 3932 4442 } 3933 4443 ··· 4011 4369 struct file *dst_file, loff_t destoff, loff_t len, 4012 4370 unsigned int remap_flags) 4013 4371 { 4372 + struct inode *src_inode = file_inode(src_file); 4373 + struct inode *dst_inode = file_inode(dst_file); 4374 + bool same_inode = dst_inode == src_inode; 4014 4375 int ret; 4015 4376 4016 4377 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 4017 4378 return -EINVAL; 4018 4379 4019 - if (remap_flags & REMAP_FILE_DEDUP) { 4020 - struct inode *src = file_inode(src_file); 4021 - struct inode *dst = file_inode(dst_file); 4022 - u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 4380 + ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff, 4381 + &len, remap_flags); 4382 + if (ret < 0 || len == 0) 4383 + return ret; 4023 4384 4024 - if (WARN_ON_ONCE(bs < PAGE_SIZE)) { 4025 - /* 4026 - * Btrfs does not support blocksize < page_size. As a 4027 - * result, btrfs_cmp_data() won't correctly handle 4028 - * this situation without an update. 4029 - */ 4030 - return -EINVAL; 4031 - } 4032 - 4033 - ret = btrfs_extent_same(src, off, len, dst, destoff); 4034 - } else { 4385 + if (remap_flags & REMAP_FILE_DEDUP) 4386 + ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff); 4387 + else 4035 4388 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); 4036 - } 4389 + 4390 + if (same_inode) 4391 + inode_unlock(src_inode); 4392 + else 4393 + btrfs_double_inode_unlock(src_inode, dst_inode); 4394 + 4037 4395 return ret < 0 ? ret : len; 4038 4396 } 4039 4397

Configure Feed

Configure Feed