Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs copy_file_range updates from Al Viro:
"Several series around copy_file_range/CLONE"

* 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
btrfs: use new dedupe data function pointer
vfs: hoist the btrfs deduplication ioctl to the vfs
vfs: wire up compat ioctl for CLONE/CLONE_RANGE
cifs: avoid unused variable and label
nfsd: implement the NFSv4.2 CLONE operation
nfsd: Pass filehandle to nfs4_preprocess_stateid_op()
vfs: pull btrfs clone API to vfs layer
locks: new locks_mandatory_area calling convention
vfs: Add vfs_copy_file_range() support for pagecache copies
btrfs: add .copy_file_range file operation
x86: add sys_copy_file_range to syscall tables
vfs: add copy_file_range syscall and vfs helper

+734 -344
+1
arch/x86/entry/syscalls/syscall_32.tbl
··· 383 383 374 i386 userfaultfd sys_userfaultfd 384 384 375 i386 membarrier sys_membarrier 385 385 376 i386 mlock2 sys_mlock2 386 + 377 i386 copy_file_range sys_copy_file_range
+1
arch/x86/entry/syscalls/syscall_64.tbl
··· 332 332 323 common userfaultfd sys_userfaultfd 333 333 324 common membarrier sys_membarrier 334 334 325 common mlock2 sys_mlock2 335 + 326 common copy_file_range sys_copy_file_range 335 336 336 337 # 337 338 # x32-specific system call numbers start at 512 to avoid cache impact
+7 -1
fs/btrfs/ctree.h
··· 4024 4024 struct btrfs_ioctl_space_info *space); 4025 4025 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 4026 4026 struct btrfs_ioctl_balance_args *bargs); 4027 - 4027 + ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, 4028 + struct file *dst_file, u64 dst_loff); 4028 4029 4029 4030 /* file.c */ 4030 4031 int btrfs_auto_defrag_init(void); ··· 4056 4055 loff_t pos, size_t write_bytes, 4057 4056 struct extent_state **cached); 4058 4057 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); 4058 + ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in, 4059 + struct file *file_out, loff_t pos_out, 4060 + size_t len, unsigned int flags); 4061 + int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, 4062 + struct file *file_out, loff_t pos_out, u64 len); 4059 4063 4060 4064 /* tree-defrag.c */ 4061 4065 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+3
fs/btrfs/file.c
··· 2934 2934 #ifdef CONFIG_COMPAT 2935 2935 .compat_ioctl = btrfs_ioctl, 2936 2936 #endif 2937 + .copy_file_range = btrfs_copy_file_range, 2938 + .clone_file_range = btrfs_clone_file_range, 2939 + .dedupe_file_range = btrfs_dedupe_file_range, 2937 2940 }; 2938 2941 2939 2942 void btrfs_auto_defrag_exit(void)
+36 -150
fs/btrfs/ioctl.c
··· 2962 2962 flush_dcache_page(dst_page); 2963 2963 2964 2964 if (memcmp(addr, dst_addr, cmp_len)) 2965 - ret = BTRFS_SAME_DATA_DIFFERS; 2965 + ret = -EBADE; 2966 2966 2967 2967 kunmap_atomic(addr); 2968 2968 kunmap_atomic(dst_addr); ··· 3098 3098 3099 3099 #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 3100 3100 3101 - static long btrfs_ioctl_file_extent_same(struct file *file, 3102 - struct btrfs_ioctl_same_args __user *argp) 3101 + ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, 3102 + struct file *dst_file, u64 dst_loff) 3103 3103 { 3104 - struct btrfs_ioctl_same_args *same = NULL; 3105 - struct btrfs_ioctl_same_extent_info *info; 3106 - struct inode *src = file_inode(file); 3107 - u64 off; 3108 - u64 len; 3109 - int i; 3110 - int ret; 3111 - unsigned long size; 3104 + struct inode *src = file_inode(src_file); 3105 + struct inode *dst = file_inode(dst_file); 3112 3106 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 3113 - bool is_admin = capable(CAP_SYS_ADMIN); 3114 - u16 count; 3107 + ssize_t res; 3115 3108 3116 - if (!(file->f_mode & FMODE_READ)) 3117 - return -EINVAL; 3118 - 3119 - ret = mnt_want_write_file(file); 3120 - if (ret) 3121 - return ret; 3122 - 3123 - if (get_user(count, &argp->dest_count)) { 3124 - ret = -EFAULT; 3125 - goto out; 3126 - } 3127 - 3128 - size = offsetof(struct btrfs_ioctl_same_args __user, info[count]); 3129 - 3130 - same = memdup_user(argp, size); 3131 - 3132 - if (IS_ERR(same)) { 3133 - ret = PTR_ERR(same); 3134 - same = NULL; 3135 - goto out; 3136 - } 3137 - 3138 - off = same->logical_offset; 3139 - len = same->length; 3140 - 3141 - /* 3142 - * Limit the total length we will dedupe for each operation. 3143 - * This is intended to bound the total time spent in this 3144 - * ioctl to something sane. 3145 - */ 3146 - if (len > BTRFS_MAX_DEDUPE_LEN) 3147 - len = BTRFS_MAX_DEDUPE_LEN; 3109 + if (olen > BTRFS_MAX_DEDUPE_LEN) 3110 + olen = BTRFS_MAX_DEDUPE_LEN; 3148 3111 3149 3112 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { 3150 3113 /* ··· 3115 3152 * result, btrfs_cmp_data() won't correctly handle 3116 3153 * this situation without an update. 3117 3154 */ 3118 - ret = -EINVAL; 3119 - goto out; 3155 + return -EINVAL; 3120 3156 } 3121 3157 3122 - ret = -EISDIR; 3123 - if (S_ISDIR(src->i_mode)) 3124 - goto out; 3125 - 3126 - ret = -EACCES; 3127 - if (!S_ISREG(src->i_mode)) 3128 - goto out; 3129 - 3130 - /* pre-format output fields to sane values */ 3131 - for (i = 0; i < count; i++) { 3132 - same->info[i].bytes_deduped = 0ULL; 3133 - same->info[i].status = 0; 3134 - } 3135 - 3136 - for (i = 0, info = same->info; i < count; i++, info++) { 3137 - struct inode *dst; 3138 - struct fd dst_file = fdget(info->fd); 3139 - if (!dst_file.file) { 3140 - info->status = -EBADF; 3141 - continue; 3142 - } 3143 - dst = file_inode(dst_file.file); 3144 - 3145 - if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) { 3146 - info->status = -EINVAL; 3147 - } else if (file->f_path.mnt != dst_file.file->f_path.mnt) { 3148 - info->status = -EXDEV; 3149 - } else if (S_ISDIR(dst->i_mode)) { 3150 - info->status = -EISDIR; 3151 - } else if (!S_ISREG(dst->i_mode)) { 3152 - info->status = -EACCES; 3153 - } else { 3154 - info->status = btrfs_extent_same(src, off, len, dst, 3155 - info->logical_offset); 3156 - if (info->status == 0) 3157 - info->bytes_deduped += len; 3158 - } 3159 - fdput(dst_file); 3160 - } 3161 - 3162 - ret = copy_to_user(argp, same, size); 3163 - if (ret) 3164 - ret = -EFAULT; 3165 - 3166 - out: 3167 - mnt_drop_write_file(file); 3168 - kfree(same); 3169 - return ret; 3158 + res = btrfs_extent_same(src, loff, olen, dst, dst_loff); 3159 + if (res) 3160 + return res; 3161 + return olen; 3170 3162 } 3171 3163 3172 3164 static int clone_finish_inode_update(struct btrfs_trans_handle *trans, ··· 3697 3779 return ret; 3698 3780 } 3699 3781 3700 - static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3701 - u64 off, u64 olen, u64 destoff) 3782 + static noinline int btrfs_clone_files(struct file *file, struct file *file_src, 3783 + u64 off, u64 olen, u64 destoff) 3702 3784 { 3703 3785 struct inode *inode = file_inode(file); 3786 + struct inode *src = file_inode(file_src); 3704 3787 struct btrfs_root *root = BTRFS_I(inode)->root; 3705 - struct fd src_file; 3706 - struct inode *src; 3707 3788 int ret; 3708 3789 u64 len = olen; 3709 3790 u64 bs = root->fs_info->sb->s_blocksize; 3710 - int same_inode = 0; 3791 + int same_inode = src == inode; 3711 3792 3712 3793 /* 3713 3794 * TODO: ··· 3719 3802 * be either compressed or non-compressed. 3720 3803 */ 3721 3804 3722 - /* the destination must be opened for writing */ 3723 - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 3724 - return -EINVAL; 3725 - 3726 3805 if (btrfs_root_readonly(root)) 3727 3806 return -EROFS; 3728 3807 3729 - ret = mnt_want_write_file(file); 3730 - if (ret) 3731 - return ret; 3732 - 3733 - src_file = fdget(srcfd); 3734 - if (!src_file.file) { 3735 - ret = -EBADF; 3736 - goto out_drop_write; 3737 - } 3738 - 3739 - ret = -EXDEV; 3740 - if (src_file.file->f_path.mnt != file->f_path.mnt) 3741 - goto out_fput; 3742 - 3743 - src = file_inode(src_file.file); 3744 - 3745 - ret = -EINVAL; 3746 - if (src == inode) 3747 - same_inode = 1; 3748 - 3749 - /* the src must be open for reading */ 3750 - if (!(src_file.file->f_mode & FMODE_READ)) 3751 - goto out_fput; 3808 + if (file_src->f_path.mnt != file->f_path.mnt || 3809 + src->i_sb != inode->i_sb) 3810 + return -EXDEV; 3752 3811 3753 3812 /* don't make the dst file partly checksummed */ 3754 3813 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3755 3814 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 3756 - goto out_fput; 3815 + return -EINVAL; 3757 3816 3758 - ret = -EISDIR; 3759 3817 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3760 - goto out_fput; 3761 - 3762 - ret = -EXDEV; 3763 - if (src->i_sb != inode->i_sb) 3764 - goto out_fput; 3818 + return -EISDIR; 3765 3819 3766 3820 if (!same_inode) { 3767 3821 btrfs_double_inode_lock(src, inode); ··· 3809 3921 btrfs_double_inode_unlock(src, inode); 3810 3922 else 3811 3923 mutex_unlock(&src->i_mutex); 3812 - out_fput: 3813 - fdput(src_file); 3814 - out_drop_write: 3815 - mnt_drop_write_file(file); 3816 3924 return ret; 3817 3925 } 3818 3926 3819 - static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 3927 + ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in, 3928 + struct file *file_out, loff_t pos_out, 3929 + size_t len, unsigned int flags) 3820 3930 { 3821 - struct btrfs_ioctl_clone_range_args args; 3931 + ssize_t ret; 3822 3932 3823 - if (copy_from_user(&args, argp, sizeof(args))) 3824 - return -EFAULT; 3825 - return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 3826 - args.src_length, args.dest_offset); 3933 + ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out); 3934 + if (ret == 0) 3935 + ret = len; 3936 + return ret; 3937 + } 3938 + 3939 + int btrfs_clone_file_range(struct file *src_file, loff_t off, 3940 + struct file *dst_file, loff_t destoff, u64 len) 3941 + { 3942 + return btrfs_clone_files(dst_file, src_file, off, len, destoff); 3827 3943 } 3828 3944 3829 3945 /* ··· 5377 5485 return btrfs_ioctl_dev_info(root, argp); 5378 5486 case BTRFS_IOC_BALANCE: 5379 5487 return btrfs_ioctl_balance(file, NULL); 5380 - case BTRFS_IOC_CLONE: 5381 - return btrfs_ioctl_clone(file, arg, 0, 0, 0); 5382 - case BTRFS_IOC_CLONE_RANGE: 5383 - return btrfs_ioctl_clone_range(file, argp); 5384 5488 case BTRFS_IOC_TRANS_START: 5385 5489 return btrfs_ioctl_trans_start(file); 5386 5490 case BTRFS_IOC_TRANS_END: ··· 5454 5566 return btrfs_ioctl_get_fslabel(file, argp); 5455 5567 case BTRFS_IOC_SET_FSLABEL: 5456 5568 return btrfs_ioctl_set_fslabel(file, argp); 5457 - case BTRFS_IOC_FILE_EXTENT_SAME: 5458 - return btrfs_ioctl_file_extent_same(file, argp); 5459 5569 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 5460 5570 return btrfs_ioctl_get_supported_features(file, argp); 5461 5571 case BTRFS_IOC_GET_FEATURES:
+61
fs/cifs/cifsfs.c
··· 913 913 #endif 914 914 }; 915 915 916 + static int cifs_clone_file_range(struct file *src_file, loff_t off, 917 + struct file *dst_file, loff_t destoff, u64 len) 918 + { 919 + struct inode *src_inode = file_inode(src_file); 920 + struct inode *target_inode = file_inode(dst_file); 921 + struct cifsFileInfo *smb_file_src = src_file->private_data; 922 + struct cifsFileInfo *smb_file_target = dst_file->private_data; 923 + struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink); 924 + unsigned int xid; 925 + int rc; 926 + 927 + cifs_dbg(FYI, "clone range\n"); 928 + 929 + xid = get_xid(); 930 + 931 + if (!src_file->private_data || !dst_file->private_data) { 932 + rc = -EBADF; 933 + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); 934 + goto out; 935 + } 936 + 937 + /* 938 + * Note: cifs case is easier than btrfs since server responsible for 939 + * checks for proper open modes and file type and if it wants 940 + * server could even support copy of range where source = target 941 + */ 942 + lock_two_nondirectories(target_inode, src_inode); 943 + 944 + if (len == 0) 945 + len = src_inode->i_size - off; 946 + 947 + cifs_dbg(FYI, "about to flush pages\n"); 948 + /* should we flush first and last page first */ 949 + truncate_inode_pages_range(&target_inode->i_data, destoff, 950 + PAGE_CACHE_ALIGN(destoff + len)-1); 951 + 952 + if (target_tcon->ses->server->ops->duplicate_extents) 953 + rc = target_tcon->ses->server->ops->duplicate_extents(xid, 954 + smb_file_src, smb_file_target, off, len, destoff); 955 + else 956 + rc = -EOPNOTSUPP; 957 + 958 + /* force revalidate of size and timestamps of target file now 959 + that target is updated on the server */ 960 + CIFS_I(target_inode)->time = 0; 961 + /* although unlocking in the reverse order from locking is not 962 + strictly necessary here it is a little cleaner to be consistent */ 963 + unlock_two_nondirectories(src_inode, target_inode); 964 + out: 965 + free_xid(xid); 966 + return rc; 967 + } 968 + 916 969 const struct file_operations cifs_file_ops = { 917 970 .read_iter = cifs_loose_read_iter, 918 971 .write_iter = cifs_file_write_iter, ··· 978 925 .splice_read = generic_file_splice_read, 979 926 .llseek = cifs_llseek, 980 927 .unlocked_ioctl = cifs_ioctl, 928 + .clone_file_range = cifs_clone_file_range, 981 929 .setlease = cifs_setlease, 982 930 .fallocate = cifs_fallocate, 983 931 }; ··· 995 941 .splice_read = generic_file_splice_read, 996 942 .llseek = cifs_llseek, 997 943 .unlocked_ioctl = cifs_ioctl, 944 + .clone_file_range = cifs_clone_file_range, 945 + .clone_file_range = cifs_clone_file_range, 998 946 .setlease = cifs_setlease, 999 947 .fallocate = cifs_fallocate, 1000 948 }; ··· 1013 957 .mmap = cifs_file_mmap, 1014 958 .splice_read = generic_file_splice_read, 1015 959 .unlocked_ioctl = cifs_ioctl, 960 + .clone_file_range = cifs_clone_file_range, 1016 961 .llseek = cifs_llseek, 1017 962 .setlease = cifs_setlease, 1018 963 .fallocate = cifs_fallocate, ··· 1030 973 .splice_read = generic_file_splice_read, 1031 974 .llseek = cifs_llseek, 1032 975 .unlocked_ioctl = cifs_ioctl, 976 + .clone_file_range = cifs_clone_file_range, 1033 977 .setlease = cifs_setlease, 1034 978 .fallocate = cifs_fallocate, 1035 979 }; ··· 1046 988 .splice_read = generic_file_splice_read, 1047 989 .llseek = cifs_llseek, 1048 990 .unlocked_ioctl = cifs_ioctl, 991 + .clone_file_range = cifs_clone_file_range, 1049 992 .setlease = cifs_setlease, 1050 993 .fallocate = cifs_fallocate, 1051 994 }; ··· 1062 1003 .mmap = cifs_file_mmap, 1063 1004 .splice_read = generic_file_splice_read, 1064 1005 .unlocked_ioctl = cifs_ioctl, 1006 + .clone_file_range = cifs_clone_file_range, 1065 1007 .llseek = cifs_llseek, 1066 1008 .setlease = cifs_setlease, 1067 1009 .fallocate = cifs_fallocate, ··· 1073 1013 .release = cifs_closedir, 1074 1014 .read = generic_read_dir, 1075 1015 .unlocked_ioctl = cifs_ioctl, 1016 + .clone_file_range = cifs_clone_file_range, 1076 1017 .llseek = generic_file_llseek, 1077 1018 }; 1078 1019
-1
fs/cifs/cifsfs.h
··· 130 130 extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); 131 131 extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 132 132 extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 133 - 134 133 #ifdef CONFIG_CIFS_NFSD_EXPORT 135 134 extern const struct export_operations cifs_export_ops; 136 135 #endif /* CONFIG_CIFS_NFSD_EXPORT */
+60 -68
fs/cifs/ioctl.c
··· 34 34 #include "cifs_ioctl.h" 35 35 #include <linux/btrfs.h> 36 36 37 + static int cifs_file_clone_range(unsigned int xid, struct file *src_file, 38 + struct file *dst_file) 39 + { 40 + struct inode *src_inode = file_inode(src_file); 41 + struct inode *target_inode = file_inode(dst_file); 42 + struct cifsFileInfo *smb_file_src; 43 + struct cifsFileInfo *smb_file_target; 44 + struct cifs_tcon *src_tcon; 45 + struct cifs_tcon *target_tcon; 46 + int rc; 47 + 48 + cifs_dbg(FYI, "ioctl clone range\n"); 49 + 50 + if (!src_file->private_data || !dst_file->private_data) { 51 + rc = -EBADF; 52 + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); 53 + goto out; 54 + } 55 + 56 + rc = -EXDEV; 57 + smb_file_target = dst_file->private_data; 58 + smb_file_src = src_file->private_data; 59 + src_tcon = tlink_tcon(smb_file_src->tlink); 60 + target_tcon = tlink_tcon(smb_file_target->tlink); 61 + 62 + if (src_tcon->ses != target_tcon->ses) { 63 + cifs_dbg(VFS, "source and target of copy not on same server\n"); 64 + goto out; 65 + } 66 + 67 + /* 68 + * Note: cifs case is easier than btrfs since server responsible for 69 + * checks for proper open modes and file type and if it wants 70 + * server could even support copy of range where source = target 71 + */ 72 + lock_two_nondirectories(target_inode, src_inode); 73 + 74 + cifs_dbg(FYI, "about to flush pages\n"); 75 + /* should we flush first and last page first */ 76 + truncate_inode_pages(&target_inode->i_data, 0); 77 + 78 + if (target_tcon->ses->server->ops->clone_range) 79 + rc = target_tcon->ses->server->ops->clone_range(xid, 80 + smb_file_src, smb_file_target, 0, src_inode->i_size, 0); 81 + else 82 + rc = -EOPNOTSUPP; 83 + 84 + /* force revalidate of size and timestamps of target file now 85 + that target is updated on the server */ 86 + CIFS_I(target_inode)->time = 0; 87 + /* although unlocking in the reverse order from locking is not 88 + strictly necessary here it is a little cleaner to be consistent */ 89 + unlock_two_nondirectories(src_inode, target_inode); 90 + out: 91 + return rc; 92 + } 93 + 37 94 static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, 38 - unsigned long srcfd, u64 off, u64 len, u64 destoff, 39 - bool dup_extents) 95 + unsigned long srcfd) 40 96 { 41 97 int rc; 42 - struct cifsFileInfo *smb_file_target = dst_file->private_data; 43 - struct inode *target_inode = file_inode(dst_file); 44 - struct cifs_tcon *target_tcon; 45 98 struct fd src_file; 46 - struct cifsFileInfo *smb_file_src; 47 99 struct inode *src_inode; 48 - struct cifs_tcon *src_tcon; 49 100 50 101 cifs_dbg(FYI, "ioctl clone range\n"); 51 102 /* the destination must be opened for writing */ ··· 124 73 goto out_fput; 125 74 } 126 75 127 - if ((!src_file.file->private_data) || (!dst_file->private_data)) { 128 - rc = -EBADF; 129 - cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); 130 - goto out_fput; 131 - } 132 - 133 - rc = -EXDEV; 134 - smb_file_target = dst_file->private_data; 135 - smb_file_src = src_file.file->private_data; 136 - src_tcon = tlink_tcon(smb_file_src->tlink); 137 - target_tcon = tlink_tcon(smb_file_target->tlink); 138 - 139 - /* check source and target on same server (or volume if dup_extents) */ 140 - if (dup_extents && (src_tcon != target_tcon)) { 141 - cifs_dbg(VFS, "source and target of copy not on same share\n"); 142 - goto out_fput; 143 - } 144 - 145 - if (!dup_extents && (src_tcon->ses != target_tcon->ses)) { 146 - cifs_dbg(VFS, "source and target of copy not on same server\n"); 147 - goto out_fput; 148 - } 149 - 150 76 src_inode = file_inode(src_file.file); 151 77 rc = -EINVAL; 152 78 if (S_ISDIR(src_inode->i_mode)) 153 79 goto out_fput; 154 80 155 - /* 156 - * Note: cifs case is easier than btrfs since server responsible for 157 - * checks for proper open modes and file type and if it wants 158 - * server could even support copy of range where source = target 159 - */ 160 - lock_two_nondirectories(target_inode, src_inode); 81 + rc = cifs_file_clone_range(xid, src_file.file, dst_file); 161 82 162 - /* determine range to clone */ 163 - rc = -EINVAL; 164 - if (off + len > src_inode->i_size || off + len < off) 165 - goto out_unlock; 166 - if (len == 0) 167 - len = src_inode->i_size - off; 168 - 169 - cifs_dbg(FYI, "about to flush pages\n"); 170 - /* should we flush first and last page first */ 171 - truncate_inode_pages_range(&target_inode->i_data, destoff, 172 - PAGE_CACHE_ALIGN(destoff + len)-1); 173 - 174 - if (dup_extents && target_tcon->ses->server->ops->duplicate_extents) 175 - rc = target_tcon->ses->server->ops->duplicate_extents(xid, 176 - smb_file_src, smb_file_target, off, len, destoff); 177 - else if (!dup_extents && target_tcon->ses->server->ops->clone_range) 178 - rc = target_tcon->ses->server->ops->clone_range(xid, 179 - smb_file_src, smb_file_target, off, len, destoff); 180 - else 181 - rc = -EOPNOTSUPP; 182 - 183 - /* force revalidate of size and timestamps of target file now 184 - that target is updated on the server */ 185 - CIFS_I(target_inode)->time = 0; 186 - out_unlock: 187 - /* although unlocking in the reverse order from locking is not 188 - strictly necessary here it is a little cleaner to be consistent */ 189 - unlock_two_nondirectories(src_inode, target_inode); 190 83 out_fput: 191 84 fdput(src_file); 192 85 out_drop_write: ··· 251 256 } 252 257 break; 253 258 case CIFS_IOC_COPYCHUNK_FILE: 254 - rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false); 255 - break; 256 - case BTRFS_IOC_CLONE: 257 - rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true); 259 + rc = cifs_ioctl_clone(xid, filep, arg); 258 260 break; 259 261 case CIFS_IOC_SET_INTEGRITY: 260 262 if (pSMBFile == NULL)
+5
fs/compat_ioctl.c
··· 1601 1601 goto out_fput; 1602 1602 #endif 1603 1603 1604 + case FICLONE: 1605 + case FICLONERANGE: 1606 + case FIDEDUPERANGE: 1607 + goto do_ioctl; 1608 + 1604 1609 case FIBMAP: 1605 1610 case FIGETBSZ: 1606 1611 case FIONREAD:
+67
fs/ioctl.c
··· 215 215 return error; 216 216 } 217 217 218 + static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, 219 + u64 off, u64 olen, u64 destoff) 220 + { 221 + struct fd src_file = fdget(srcfd); 222 + int ret; 223 + 224 + if (!src_file.file) 225 + return -EBADF; 226 + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); 227 + fdput(src_file); 228 + return ret; 229 + } 230 + 231 + static long ioctl_file_clone_range(struct file *file, void __user *argp) 232 + { 233 + struct file_clone_range args; 234 + 235 + if (copy_from_user(&args, argp, sizeof(args))) 236 + return -EFAULT; 237 + return ioctl_file_clone(file, args.src_fd, args.src_offset, 238 + args.src_length, args.dest_offset); 239 + } 240 + 218 241 #ifdef CONFIG_BLOCK 219 242 220 243 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) ··· 568 545 return thaw_super(sb); 569 546 } 570 547 548 + static long ioctl_file_dedupe_range(struct file *file, void __user *arg) 549 + { 550 + struct file_dedupe_range __user *argp = arg; 551 + struct file_dedupe_range *same = NULL; 552 + int ret; 553 + unsigned long size; 554 + u16 count; 555 + 556 + if (get_user(count, &argp->dest_count)) { 557 + ret = -EFAULT; 558 + goto out; 559 + } 560 + 561 + size = offsetof(struct file_dedupe_range __user, info[count]); 562 + 563 + same = memdup_user(argp, size); 564 + if (IS_ERR(same)) { 565 + ret = PTR_ERR(same); 566 + same = NULL; 567 + goto out; 568 + } 569 + 570 + ret = vfs_dedupe_file_range(file, same); 571 + if (ret) 572 + goto out; 573 + 574 + ret = copy_to_user(argp, same, size); 575 + if (ret) 576 + ret = -EFAULT; 577 + 578 + out: 579 + kfree(same); 580 + return ret; 581 + } 582 + 571 583 /* 572 584 * When you add any new common ioctls to the switches above and below 573 585 * please update compat_sys_ioctl() too. ··· 657 599 658 600 case FIGETBSZ: 659 601 return put_user(inode->i_sb->s_blocksize, argp); 602 + 603 + case FICLONE: 604 + return ioctl_file_clone(filp, arg, 0, 0, 0); 605 + 606 + case FICLONERANGE: 607 + return ioctl_file_clone_range(filp, argp); 608 + 609 + case FIDEDUPERANGE: 610 + return ioctl_file_dedupe_range(filp, argp); 660 611 661 612 default: 662 613 if (S_ISREG(inode->i_mode))
+9 -13
fs/locks.c
··· 1258 1258 1259 1259 /** 1260 1260 * locks_mandatory_area - Check for a conflicting lock 1261 - * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ 1262 - * for shared 1263 - * @inode: the file to check 1261 + * @inode: the file to check 1264 1262 * @filp: how the file was opened (if it was) 1265 - * @offset: start of area to check 1266 - * @count: length of area to check 1263 + * @start: first byte in the file to check 1264 + * @end: lastbyte in the file to check 1265 + * @type: %F_WRLCK for a write lock, else %F_RDLCK 1267 1266 * 1268 1267 * Searches the inode's list of locks to find any POSIX locks which conflict. 1269 - * This function is called from rw_verify_area() and 1270 - * locks_verify_truncate(). 1271 1268 */ 1272 - int locks_mandatory_area(int read_write, struct inode *inode, 1273 - struct file *filp, loff_t offset, 1274 - size_t count) 1269 + int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, 1270 + loff_t end, unsigned char type) 1275 1271 { 1276 1272 struct file_lock fl; 1277 1273 int error; ··· 1279 1283 fl.fl_flags = FL_POSIX | FL_ACCESS; 1280 1284 if (filp && !(filp->f_flags & O_NONBLOCK)) 1281 1285 sleep = true; 1282 - fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; 1283 - fl.fl_start = offset; 1284 - fl.fl_end = offset + count - 1; 1286 + fl.fl_type = type; 1287 + fl.fl_start = start; 1288 + fl.fl_end = end; 1285 1289 1286 1290 for (;;) { 1287 1291 if (filp) {
+10 -77
fs/nfs/nfs4file.c
··· 195 195 return nfs42_proc_allocate(filep, offset, len); 196 196 } 197 197 198 - static noinline long 199 - nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, 200 - u64 src_off, u64 dst_off, u64 count) 198 + static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, 199 + struct file *dst_file, loff_t dst_off, u64 count) 201 200 { 202 201 struct inode *dst_inode = file_inode(dst_file); 203 202 struct nfs_server *server = NFS_SERVER(dst_inode); 204 - struct fd src_file; 205 - struct inode *src_inode; 203 + struct inode *src_inode = file_inode(src_file); 206 204 unsigned int bs = server->clone_blksize; 207 205 bool same_inode = false; 208 206 int ret; 209 - 210 - /* dst file must be opened for writing */ 211 - if (!(dst_file->f_mode & FMODE_WRITE)) 212 - return -EINVAL; 213 - 214 - ret = mnt_want_write_file(dst_file); 215 - if (ret) 216 - return ret; 217 - 218 - src_file = fdget(srcfd); 219 - if (!src_file.file) { 220 - ret = -EBADF; 221 - goto out_drop_write; 222 - } 223 - 224 - src_inode = file_inode(src_file.file); 225 - 226 - if (src_inode == dst_inode) 227 - same_inode = true; 228 - 229 - /* src file must be opened for reading */ 230 - if (!(src_file.file->f_mode & FMODE_READ)) 231 - goto out_fput; 232 - 233 - /* src and dst must be regular files */ 234 - ret = -EISDIR; 235 - if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode)) 236 - goto out_fput; 237 - 238 - ret = -EXDEV; 239 - if (src_file.file->f_path.mnt != dst_file->f_path.mnt || 240 - src_inode->i_sb != dst_inode->i_sb) 241 - goto out_fput; 242 207 243 208 /* check alignment w.r.t. clone_blksize */ 244 209 ret = -EINVAL; 245 210 if (bs) { 246 211 if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) 247 - goto out_fput; 212 + goto out; 248 213 if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) 249 - goto out_fput; 214 + goto out; 250 215 } 251 216 252 - /* verify if ranges are overlapped within the same file */ 253 - if (same_inode) { 254 - if (dst_off + count > src_off && dst_off < src_off + count) 255 - goto out_fput; 256 - } 217 + if (src_inode == dst_inode) 218 + same_inode = true; 257 219 258 220 /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ 259 221 if (same_inode) { ··· 237 275 if (ret) 238 276 goto out_unlock; 239 277 240 - ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); 278 + ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count); 241 279 242 280 /* truncate inode page cache of the dst range so that future reads can fetch 243 281 * new data from server */ ··· 254 292 mutex_unlock(&dst_inode->i_mutex); 255 293 mutex_unlock(&src_inode->i_mutex); 256 294 } 257 - out_fput: 258 - fdput(src_file); 259 - out_drop_write: 260 - mnt_drop_write_file(dst_file); 295 + out: 261 296 return ret; 262 - } 263 - 264 - static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) 265 - { 266 - struct btrfs_ioctl_clone_range_args args; 267 - 268 - if (copy_from_user(&args, argp, sizeof(args))) 269 - return -EFAULT; 270 - 271 - return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset, 272 - args.dest_offset, args.src_length); 273 - } 274 - 275 - long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 276 - { 277 - void __user *argp = (void __user *)arg; 278 - 279 - switch (cmd) { 280 - case BTRFS_IOC_CLONE: 281 - return nfs42_ioctl_clone(file, arg, 0, 0, 0); 282 - case BTRFS_IOC_CLONE_RANGE: 283 - return nfs42_ioctl_clone_range(file, argp); 284 - } 285 - 286 - return -ENOTTY; 287 297 } 288 298 #endif /* CONFIG_NFS_V4_2 */ 289 299 ··· 276 342 #ifdef CONFIG_NFS_V4_2 277 343 .llseek = nfs4_file_llseek, 278 344 .fallocate = nfs42_fallocate, 279 - .unlocked_ioctl = nfs4_ioctl, 280 - .compat_ioctl = nfs4_ioctl, 345 + .clone_file_range = nfs42_clone_file_range, 281 346 #else 282 347 .llseek = nfs_file_llseek, 283 348 #endif
+56 -7
fs/nfsd/nfs4proc.c
··· 774 774 clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); 775 775 776 776 /* check stateid */ 777 - status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid, 778 - RD_STATE, &read->rd_filp, &read->rd_tmp_file); 777 + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 778 + &read->rd_stateid, RD_STATE, 779 + &read->rd_filp, &read->rd_tmp_file); 779 780 if (status) { 780 781 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 781 782 goto out; ··· 922 921 923 922 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 924 923 status = nfs4_preprocess_stateid_op(rqstp, cstate, 925 - &setattr->sa_stateid, WR_STATE, NULL, NULL); 924 + &cstate->current_fh, &setattr->sa_stateid, 925 + WR_STATE, NULL, NULL); 926 926 if (status) { 927 927 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); 928 928 return status; ··· 987 985 if (write->wr_offset >= OFFSET_MAX) 988 986 return nfserr_inval; 989 987 990 - status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE, 991 - &filp, NULL); 988 + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 989 + stateid, WR_STATE, &filp, NULL); 992 990 if (status) { 993 991 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); 994 992 return status; ··· 1012 1010 } 1013 1011 1014 1012 static __be32 1013 + nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1014 + struct nfsd4_clone *clone) 1015 + { 1016 + struct file *src, *dst; 1017 + __be32 status; 1018 + 1019 + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, 1020 + &clone->cl_src_stateid, RD_STATE, 1021 + &src, NULL); 1022 + if (status) { 1023 + dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); 1024 + goto out; 1025 + } 1026 + 1027 + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1028 + &clone->cl_dst_stateid, WR_STATE, 1029 + &dst, NULL); 1030 + if (status) { 1031 + dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); 1032 + goto out_put_src; 1033 + } 1034 + 1035 + /* fix up for NFS-specific error code */ 1036 + if (!S_ISREG(file_inode(src)->i_mode) || 1037 + !S_ISREG(file_inode(dst)->i_mode)) { 1038 + status = nfserr_wrong_type; 1039 + goto out_put_dst; 1040 + } 1041 + 1042 + status = nfsd4_clone_file_range(src, clone->cl_src_pos, 1043 + dst, clone->cl_dst_pos, clone->cl_count); 1044 + 1045 + out_put_dst: 1046 + fput(dst); 1047 + out_put_src: 1048 + fput(src); 1049 + out: 1050 + return status; 1051 + } 1052 + 1053 + static __be32 1015 1054 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1016 1055 struct nfsd4_fallocate *fallocate, int flags) 1017 1056 { 1018 1057 __be32 status = nfserr_notsupp; 1019 1058 struct file *file; 1020 1059 1021 - status = nfs4_preprocess_stateid_op(rqstp, cstate, 1060 + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1022 1061 &fallocate->falloc_stateid, 1023 1062 WR_STATE, &file, NULL); 1024 1063 if (status != nfs_ok) { ··· 1098 1055 __be32 status; 1099 1056 struct file *file; 1100 1057 1101 - status = nfs4_preprocess_stateid_op(rqstp, cstate, 1058 + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, 1102 1059 &seek->seek_stateid, 1103 1060 RD_STATE, &file, NULL); 1104 1061 if (status) { ··· 2320 2277 .op_func = (nfsd4op_func)nfsd4_deallocate, 2321 2278 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 2322 2279 .op_name = "OP_DEALLOCATE", 2280 + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2281 + }, 2282 + [OP_CLONE] = { 2283 + .op_func = (nfsd4op_func)nfsd4_clone, 2284 + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 2285 + .op_name = "OP_CLONE", 2323 2286 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2324 2287 }, 2325 2288 [OP_SEEK] = {
+2 -3
fs/nfsd/nfs4state.c
··· 4797 4797 */ 4798 4798 __be32 4799 4799 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 4800 - struct nfsd4_compound_state *cstate, stateid_t *stateid, 4801 - int flags, struct file **filpp, bool *tmp_file) 4800 + struct nfsd4_compound_state *cstate, struct svc_fh *fhp, 4801 + stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file) 4802 4802 { 4803 - struct svc_fh *fhp = &cstate->current_fh; 4804 4803 struct inode *ino = d_inode(fhp->fh_dentry); 4805 4804 struct net *net = SVC_NET(rqstp); 4806 4805 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+21
fs/nfsd/nfs4xdr.c
··· 1675 1675 } 1676 1676 1677 1677 static __be32 1678 + nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) 1679 + { 1680 + DECODE_HEAD; 1681 + 1682 + status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid); 1683 + if (status) 1684 + return status; 1685 + status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid); 1686 + if (status) 1687 + return status; 1688 + 1689 + READ_BUF(8 + 8 + 8); 1690 + p = xdr_decode_hyper(p, &clone->cl_src_pos); 1691 + p = xdr_decode_hyper(p, &clone->cl_dst_pos); 1692 + p = xdr_decode_hyper(p, &clone->cl_count); 1693 + DECODE_TAIL; 1694 + } 1695 + 1696 + static __be32 1678 1697 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) 1679 1698 { 1680 1699 DECODE_HEAD; ··· 1804 1785 [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, 1805 1786 [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, 1806 1787 [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, 1788 + [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, 1807 1789 }; 1808 1790 1809 1791 static inline bool ··· 4312 4292 [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, 4313 4293 [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, 4314 4294 [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, 4295 + [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, 4315 4296 }; 4316 4297 4317 4298 /*
+2 -2
fs/nfsd/state.h
··· 578 578 struct nfsd_net; 579 579 580 580 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 581 - struct nfsd4_compound_state *cstate, stateid_t *stateid, 582 - int flags, struct file **filp, bool *tmp_file); 581 + struct nfsd4_compound_state *cstate, struct svc_fh *fhp, 582 + stateid_t *stateid, int flags, struct file **filp, bool *tmp_file); 583 583 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, 584 584 stateid_t *stateid, unsigned char typemask, 585 585 struct nfs4_stid **s, struct nfsd_net *nn);
+8
fs/nfsd/vfs.c
··· 36 36 #endif /* CONFIG_NFSD_V3 */ 37 37 38 38 #ifdef CONFIG_NFSD_V4 39 + #include "../internal.h" 39 40 #include "acl.h" 40 41 #include "idmap.h" 41 42 #endif /* CONFIG_NFSD_V4 */ ··· 498 497 return nfserr_notsupp; 499 498 } 500 499 #endif 500 + 501 + __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, 502 + u64 dst_pos, u64 count) 503 + { 504 + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, 505 + count)); 506 + } 501 507 502 508 __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, 503 509 struct file *file, loff_t offset, loff_t len,
+2
fs/nfsd/vfs.h
··· 56 56 struct xdr_netobj *); 57 57 __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, 58 58 struct file *, loff_t, loff_t, int); 59 + __be32 nfsd4_clone_file_range(struct file *, u64, struct file *, 60 + u64, u64); 59 61 #endif /* CONFIG_NFSD_V4 */ 60 62 __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, 61 63 char *name, int len, struct iattr *attrs,
+10
fs/nfsd/xdr4.h
··· 491 491 u64 falloc_length; 492 492 }; 493 493 494 + struct nfsd4_clone { 495 + /* request */ 496 + stateid_t cl_src_stateid; 497 + stateid_t cl_dst_stateid; 498 + u64 cl_src_pos; 499 + u64 cl_dst_pos; 500 + u64 cl_count; 501 + }; 502 + 494 503 struct nfsd4_seek { 495 504 /* request */ 496 505 stateid_t seek_stateid; ··· 564 555 /* NFSv4.2 */ 565 556 struct nfsd4_fallocate allocate; 566 557 struct nfsd4_fallocate deallocate; 558 + struct nfsd4_clone clone; 567 559 struct nfsd4_seek seek; 568 560 } u; 569 561 struct nfs4_replay * replay;
+299 -3
fs/read_write.c
··· 16 16 #include <linux/pagemap.h> 17 17 #include <linux/splice.h> 18 18 #include <linux/compat.h> 19 + #include <linux/mount.h> 19 20 #include "internal.h" 20 21 21 22 #include <asm/uaccess.h> ··· 396 395 } 397 396 398 397 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 399 - retval = locks_mandatory_area( 400 - read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 401 - inode, file, pos, count); 398 + retval = locks_mandatory_area(inode, file, pos, pos + count - 1, 399 + read_write == READ ? F_RDLCK : F_WRLCK); 402 400 if (retval < 0) 403 401 return retval; 404 402 } ··· 1327 1327 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1328 1328 } 1329 1329 #endif 1330 + 1331 + /* 1332 + * copy_file_range() differs from regular file read and write in that it 1333 + * specifically allows return partial success. When it does so is up to 1334 + * the copy_file_range method. 1335 + */ 1336 + ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1337 + struct file *file_out, loff_t pos_out, 1338 + size_t len, unsigned int flags) 1339 + { 1340 + struct inode *inode_in = file_inode(file_in); 1341 + struct inode *inode_out = file_inode(file_out); 1342 + ssize_t ret; 1343 + 1344 + if (flags != 0) 1345 + return -EINVAL; 1346 + 1347 + /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */ 1348 + ret = rw_verify_area(READ, file_in, &pos_in, len); 1349 + if (ret >= 0) 1350 + ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1351 + if (ret < 0) 1352 + return ret; 1353 + 1354 + if (!(file_in->f_mode & FMODE_READ) || 1355 + !(file_out->f_mode & FMODE_WRITE) || 1356 + (file_out->f_flags & O_APPEND)) 1357 + return -EBADF; 1358 + 1359 + /* this could be relaxed once a method supports cross-fs copies */ 1360 + if (inode_in->i_sb != inode_out->i_sb) 1361 + return -EXDEV; 1362 + 1363 + if (len == 0) 1364 + return 0; 1365 + 1366 + ret = mnt_want_write_file(file_out); 1367 + if (ret) 1368 + return ret; 1369 + 1370 + ret = -EOPNOTSUPP; 1371 + if (file_out->f_op->copy_file_range) 1372 + ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, 1373 + pos_out, len, flags); 1374 + if (ret == -EOPNOTSUPP) 1375 + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1376 + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1377 + 1378 + if (ret > 0) { 1379 + fsnotify_access(file_in); 1380 + add_rchar(current, ret); 1381 + fsnotify_modify(file_out); 1382 + add_wchar(current, ret); 1383 + } 1384 + inc_syscr(current); 1385 + inc_syscw(current); 1386 + 1387 + mnt_drop_write_file(file_out); 1388 + 1389 + return ret; 1390 + } 1391 + EXPORT_SYMBOL(vfs_copy_file_range); 1392 + 1393 + SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1394 + int, fd_out, loff_t __user *, off_out, 1395 + size_t, len, unsigned int, flags) 1396 + { 1397 + loff_t pos_in; 1398 + loff_t pos_out; 1399 + struct fd f_in; 1400 + struct fd f_out; 1401 + ssize_t ret = -EBADF; 1402 + 1403 + f_in = fdget(fd_in); 1404 + if (!f_in.file) 1405 + goto out2; 1406 + 1407 + f_out = fdget(fd_out); 1408 + if (!f_out.file) 1409 + goto out1; 1410 + 1411 + ret = -EFAULT; 1412 + if (off_in) { 1413 + if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1414 + goto out; 1415 + } else { 1416 + pos_in = f_in.file->f_pos; 1417 + } 1418 + 1419 + if (off_out) { 1420 + if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1421 + goto out; 1422 + } else { 1423 + pos_out = f_out.file->f_pos; 1424 + } 1425 + 1426 + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1427 + flags); 1428 + if (ret > 0) { 1429 + pos_in += ret; 1430 + pos_out += ret; 1431 + 1432 + if (off_in) { 1433 + if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1434 + ret = -EFAULT; 1435 + } else { 1436 + f_in.file->f_pos = pos_in; 1437 + } 1438 + 1439 + if (off_out) { 1440 + if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1441 + ret = -EFAULT; 1442 + } else { 1443 + f_out.file->f_pos = pos_out; 1444 + } 1445 + } 1446 + 1447 + out: 1448 + fdput(f_out); 1449 + out1: 1450 + fdput(f_in); 1451 + out2: 1452 + return ret; 1453 + } 1454 + 1455 + static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) 1456 + { 1457 + struct inode *inode = file_inode(file); 1458 + 1459 + if (unlikely(pos < 0)) 1460 + return -EINVAL; 1461 + 1462 + if (unlikely((loff_t) (pos + len) < 0)) 1463 + return -EINVAL; 1464 + 1465 + if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 1466 + loff_t end = len ? pos + len - 1 : OFFSET_MAX; 1467 + int retval; 1468 + 1469 + retval = locks_mandatory_area(inode, file, pos, end, 1470 + write ? F_WRLCK : F_RDLCK); 1471 + if (retval < 0) 1472 + return retval; 1473 + } 1474 + 1475 + return security_file_permission(file, write ? MAY_WRITE : MAY_READ); 1476 + } 1477 + 1478 + int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1479 + struct file *file_out, loff_t pos_out, u64 len) 1480 + { 1481 + struct inode *inode_in = file_inode(file_in); 1482 + struct inode *inode_out = file_inode(file_out); 1483 + int ret; 1484 + 1485 + if (inode_in->i_sb != inode_out->i_sb || 1486 + file_in->f_path.mnt != file_out->f_path.mnt) 1487 + return -EXDEV; 1488 + 1489 + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1490 + return -EISDIR; 1491 + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1492 + return -EINVAL; 1493 + 1494 + if (!(file_in->f_mode & FMODE_READ) || 1495 + !(file_out->f_mode & FMODE_WRITE) || 1496 + (file_out->f_flags & O_APPEND) || 1497 + !file_in->f_op->clone_file_range) 1498 + return -EBADF; 1499 + 1500 + ret = clone_verify_area(file_in, pos_in, len, false); 1501 + if (ret) 1502 + return ret; 1503 + 1504 + ret = clone_verify_area(file_out, pos_out, len, true); 1505 + if (ret) 1506 + return ret; 1507 + 1508 + if (pos_in + len > i_size_read(inode_in)) 1509 + return -EINVAL; 1510 + 1511 + ret = mnt_want_write_file(file_out); 1512 + if (ret) 1513 + return ret; 1514 + 1515 + ret = file_in->f_op->clone_file_range(file_in, pos_in, 1516 + file_out, pos_out, len); 1517 + if (!ret) { 1518 + fsnotify_access(file_in); 1519 + fsnotify_modify(file_out); 1520 + } 1521 + 1522 + mnt_drop_write_file(file_out); 1523 + return ret; 1524 + } 1525 + EXPORT_SYMBOL(vfs_clone_file_range); 1526 + 1527 + int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) 1528 + { 1529 + struct file_dedupe_range_info *info; 1530 + struct inode *src = file_inode(file); 1531 + u64 off; 1532 + u64 len; 1533 + int i; 1534 + int ret; 1535 + bool is_admin = capable(CAP_SYS_ADMIN); 1536 + u16 count = same->dest_count; 1537 + struct file *dst_file; 1538 + loff_t dst_off; 1539 + ssize_t deduped; 1540 + 1541 + if (!(file->f_mode & FMODE_READ)) 1542 + return -EINVAL; 1543 + 1544 + if (same->reserved1 || same->reserved2) 1545 + return -EINVAL; 1546 + 1547 + off = same->src_offset; 1548 + len = same->src_length; 1549 + 1550 + ret = -EISDIR; 1551 + if (S_ISDIR(src->i_mode)) 1552 + goto out; 1553 + 1554 + ret = -EINVAL; 1555 + if (!S_ISREG(src->i_mode)) 1556 + goto out; 1557 + 1558 + ret = clone_verify_area(file, off, len, false); 1559 + if (ret < 0) 1560 + goto out; 1561 + ret = 0; 1562 + 1563 + /* pre-format output fields to sane values */ 1564 + for (i = 0; i < count; i++) { 1565 + same->info[i].bytes_deduped = 0ULL; 1566 + same->info[i].status = FILE_DEDUPE_RANGE_SAME; 1567 + } 1568 + 1569 + for (i = 0, info = same->info; i < count; i++, info++) { 1570 + struct inode *dst; 1571 + struct fd dst_fd = fdget(info->dest_fd); 1572 + 1573 + dst_file = dst_fd.file; 1574 + if (!dst_file) { 1575 + info->status = -EBADF; 1576 + goto next_loop; 1577 + } 1578 + dst = file_inode(dst_file); 1579 + 1580 + ret = mnt_want_write_file(dst_file); 1581 + if (ret) { 1582 + info->status = ret; 1583 + goto next_loop; 1584 + } 1585 + 1586 + dst_off = info->dest_offset; 1587 + ret = clone_verify_area(dst_file, dst_off, len, true); 1588 + if (ret < 0) { 1589 + info->status = ret; 1590 + goto next_file; 1591 + } 1592 + ret = 0; 1593 + 1594 + if (info->reserved) { 1595 + info->status = -EINVAL; 1596 + } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 1597 + info->status = -EINVAL; 1598 + } else if (file->f_path.mnt != dst_file->f_path.mnt) { 1599 + info->status = -EXDEV; 1600 + } else if (S_ISDIR(dst->i_mode)) { 1601 + info->status = -EISDIR; 1602 + } else if (dst_file->f_op->dedupe_file_range == NULL) { 1603 + info->status = -EINVAL; 1604 + } else { 1605 + deduped = dst_file->f_op->dedupe_file_range(file, off, 1606 + len, dst_file, 1607 + info->dest_offset); 1608 + if (deduped == -EBADE) 1609 + info->status = FILE_DEDUPE_RANGE_DIFFERS; 1610 + else if (deduped < 0) 1611 + info->status = deduped; 1612 + else 1613 + info->bytes_deduped += deduped; 1614 + } 1615 + 1616 + next_file: 1617 + mnt_drop_write_file(dst_file); 1618 + next_loop: 1619 + fdput(dst_fd); 1620 + } 1621 + 1622 + out: 1623 + return ret; 1624 + } 1625 + EXPORT_SYMBOL(vfs_dedupe_file_range);
+26 -16
include/linux/fs.h
··· 1630 1630 #ifndef CONFIG_MMU 1631 1631 unsigned (*mmap_capabilities)(struct file *); 1632 1632 #endif 1633 + ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, 1634 + loff_t, size_t, unsigned int); 1635 + int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, 1636 + u64); 1637 + ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, 1638 + u64); 1633 1639 }; 1634 1640 1635 1641 struct inode_operations { ··· 1686 1680 unsigned long, loff_t *); 1687 1681 extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1688 1682 unsigned long, loff_t *); 1683 + extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, 1684 + loff_t, size_t, unsigned int); 1685 + extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1686 + struct file *file_out, loff_t pos_out, u64 len); 1687 + extern int vfs_dedupe_file_range(struct file *file, 1688 + struct file_dedupe_range *same); 1689 1689 1690 1690 struct super_operations { 1691 1691 struct inode *(*alloc_inode)(struct super_block *sb); ··· 2039 2027 2040 2028 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 2041 2029 2042 - #define FLOCK_VERIFY_READ 1 2043 - #define FLOCK_VERIFY_WRITE 2 2044 - 2045 2030 #ifdef CONFIG_MANDATORY_FILE_LOCKING 2046 2031 extern int locks_mandatory_locked(struct file *); 2047 - extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 2032 + extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char); 2048 2033 2049 2034 /* 2050 2035 * Candidates for mandatory locking have the setgid bit set ··· 2071 2062 } 2072 2063 2073 2064 static inline int locks_verify_truncate(struct inode *inode, 2074 - struct file *filp, 2065 + struct file *f, 2075 2066 loff_t size) 2076 2067 { 2077 - if (inode->i_flctx && mandatory_lock(inode)) 2078 - return locks_mandatory_area( 2079 - FLOCK_VERIFY_WRITE, inode, filp, 2080 - size < inode->i_size ? size : inode->i_size, 2081 - (size < inode->i_size ? inode->i_size - size 2082 - : size - inode->i_size) 2083 - ); 2084 - return 0; 2068 + if (!inode->i_flctx || !mandatory_lock(inode)) 2069 + return 0; 2070 + 2071 + if (size < inode->i_size) { 2072 + return locks_mandatory_area(inode, f, size, inode->i_size - 1, 2073 + F_WRLCK); 2074 + } else { 2075 + return locks_mandatory_area(inode, f, inode->i_size, size - 1, 2076 + F_WRLCK); 2077 + } 2085 2078 } 2086 2079 2087 2080 #else /* !CONFIG_MANDATORY_FILE_LOCKING */ ··· 2093 2082 return 0; 2094 2083 } 2095 2084 2096 - static inline int locks_mandatory_area(int rw, struct inode *inode, 2097 - struct file *filp, loff_t offset, 2098 - size_t count) 2085 + static inline int locks_mandatory_area(struct inode *inode, struct file *filp, 2086 + loff_t start, loff_t end, unsigned char type) 2099 2087 { 2100 2088 return 0; 2101 2089 }
+2 -2
include/linux/nfs4.h
··· 139 139 Needs to be updated if more operations are defined in future.*/ 140 140 141 141 #define FIRST_NFS4_OP OP_ACCESS 142 - #define LAST_NFS4_OP OP_WRITE_SAME 143 142 #define LAST_NFS40_OP OP_RELEASE_LOCKOWNER 144 143 #define LAST_NFS41_OP OP_RECLAIM_COMPLETE 145 - #define LAST_NFS42_OP OP_WRITE_SAME 144 + #define LAST_NFS42_OP OP_CLONE 145 + #define LAST_NFS4_OP LAST_NFS42_OP 146 146 147 147 enum nfsstat4 { 148 148 NFS4_OK = 0,
+3
include/linux/syscalls.h
··· 886 886 const char __user *const __user *envp, int flags); 887 887 888 888 asmlinkage long sys_membarrier(int cmd, int flags); 889 + asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in, 890 + int fd_out, loff_t __user *off_out, 891 + size_t len, unsigned int flags); 889 892 890 893 asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); 891 894
+3 -1
include/uapi/asm-generic/unistd.h
··· 715 715 __SYSCALL(__NR_membarrier, sys_membarrier) 716 716 #define __NR_mlock2 284 717 717 __SYSCALL(__NR_mlock2, sys_mlock2) 718 + #define __NR_copy_file_range 285 719 + __SYSCALL(__NR_copy_file_range, sys_copy_file_range) 718 720 719 721 #undef __NR_syscalls 720 - #define __NR_syscalls 285 722 + #define __NR_syscalls 286 721 723 722 724 /* 723 725 * All syscalls below here should go away really,
+39
include/uapi/linux/fs.h
··· 39 39 #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ 40 40 #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ 41 41 42 + struct file_clone_range { 43 + __s64 src_fd; 44 + __u64 src_offset; 45 + __u64 src_length; 46 + __u64 dest_offset; 47 + }; 48 + 42 49 struct fstrim_range { 43 50 __u64 start; 44 51 __u64 len; 45 52 __u64 minlen; 53 + }; 54 + 55 + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ 56 + #define FILE_DEDUPE_RANGE_SAME 0 57 + #define FILE_DEDUPE_RANGE_DIFFERS 1 58 + 59 + /* from struct btrfs_ioctl_file_extent_same_info */ 60 + struct file_dedupe_range_info { 61 + __s64 dest_fd; /* in - destination file */ 62 + __u64 dest_offset; /* in - start of extent in destination */ 63 + __u64 bytes_deduped; /* out - total # of bytes we were able 64 + * to dedupe from this file. */ 65 + /* status of this dedupe operation: 66 + * < 0 for error 67 + * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds 68 + * == FILE_DEDUPE_RANGE_DIFFERS if data differs 69 + */ 70 + __s32 status; /* out - see above description */ 71 + __u32 reserved; /* must be zero */ 72 + }; 73 + 74 + /* from struct btrfs_ioctl_file_extent_same_args */ 75 + struct file_dedupe_range { 76 + __u64 src_offset; /* in - start of extent in source */ 77 + __u64 src_length; /* in - length of extent */ 78 + __u16 dest_count; /* in - total elements in info array */ 79 + __u16 reserved1; /* must be zero */ 80 + __u32 reserved2; /* must be zero */ 81 + struct file_dedupe_range_info info[0]; 46 82 }; 47 83 48 84 /* And dynamically-tunable limits and defaults: */ ··· 195 159 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 196 160 #define FITHAW _IOWR('X', 120, int) /* Thaw */ 197 161 #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ 162 + #define FICLONE _IOW(0x94, 9, int) 163 + #define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) 164 + #define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) 198 165 199 166 #define FS_IOC_GETFLAGS _IOR('f', 1, long) 200 167 #define FS_IOC_SETFLAGS _IOW('f', 2, long)
+1
kernel/sys_ni.c
··· 174 174 cond_syscall(sys_setfsgid); 175 175 cond_syscall(sys_capget); 176 176 cond_syscall(sys_capset); 177 + cond_syscall(sys_copy_file_range); 177 178 178 179 /* arch-specific weak syscall entries */ 179 180 cond_syscall(sys_pciconfig_read);