Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'vfs.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs into for-6.8/io_uring

Merge vfs.file from the VFS tree to avoid conflicts with receive_fd() now
having 3 arguments rather than just 2.

* 'vfs.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
file: remove __receive_fd()
file: stop exposing receive_fd_user()
fs: replace f_rcuhead with f_task_work
file: remove pointless wrapper
file: s/close_fd_get_file()/file_close_fd()/g
Improve __fget_files_rcu() code generation (and thus __fget_light())
file: massage cleanup of files that failed to open

+95 -92
+1 -1
drivers/android/binder.c
··· 1921 1921 if (!twcb) 1922 1922 return; 1923 1923 init_task_work(&twcb->twork, binder_do_fd_close); 1924 - twcb->file = close_fd_get_file(fd); 1924 + twcb->file = file_close_fd(fd); 1925 1925 if (twcb->file) { 1926 1926 // pin it until binder_do_fd_close(); see comments there 1927 1927 get_file(twcb->file);
+1 -1
drivers/vdpa/vdpa_user/vduse_dev.c
··· 1157 1157 fput(f); 1158 1158 break; 1159 1159 } 1160 - ret = receive_fd(f, perm_to_file_flags(entry.perm)); 1160 + ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); 1161 1161 fput(f); 1162 1162 break; 1163 1163 }
+54 -45
fs/file.c
··· 629 629 EXPORT_SYMBOL(fd_install); 630 630 631 631 /** 632 - * pick_file - return file associatd with fd 632 + * file_close_fd_locked - return file associated with fd 633 633 * @files: file struct to retrieve file from 634 634 * @fd: file descriptor to retrieve file for 635 + * 636 + * Doesn't take a separate reference count. 635 637 * 636 638 * Context: files_lock must be held. 637 639 * 638 640 * Returns: The file associated with @fd (NULL if @fd is not open) 639 641 */ 640 - static struct file *pick_file(struct files_struct *files, unsigned fd) 642 + struct file *file_close_fd_locked(struct files_struct *files, unsigned fd) 641 643 { 642 644 struct fdtable *fdt = files_fdtable(files); 643 645 struct file *file; 646 + 647 + lockdep_assert_held(&files->file_lock); 644 648 645 649 if (fd >= fdt->max_fds) 646 650 return NULL; ··· 664 660 struct file *file; 665 661 666 662 spin_lock(&files->file_lock); 667 - file = pick_file(files, fd); 663 + file = file_close_fd_locked(files, fd); 668 664 spin_unlock(&files->file_lock); 669 665 if (!file) 670 666 return -EBADF; ··· 711 707 max_fd = min(max_fd, n); 712 708 713 709 for (; fd <= max_fd; fd++) { 714 - file = pick_file(files, fd); 710 + file = file_close_fd_locked(files, fd); 715 711 if (file) { 716 712 spin_unlock(&files->file_lock); 717 713 filp_close(file, files); ··· 799 795 return 0; 800 796 } 801 797 802 - /* 803 - * See close_fd_get_file() below, this variant assumes current->files->file_lock 804 - * is held. 798 + /** 799 + * file_close_fd - return file associated with fd 800 + * @fd: file descriptor to retrieve file for 801 + * 802 + * Doesn't take a separate reference count. 803 + * 804 + * Returns: The file associated with @fd (NULL if @fd is not open) 805 805 */ 806 - struct file *__close_fd_get_file(unsigned int fd) 807 - { 808 - return pick_file(current->files, fd); 809 - } 810 - 811 - /* 812 - * variant of close_fd that gets a ref on the file for later fput. 813 - * The caller must ensure that filp_close() called on the file. 814 - */ 815 - struct file *close_fd_get_file(unsigned int fd) 806 + struct file *file_close_fd(unsigned int fd) 816 807 { 817 808 struct files_struct *files = current->files; 818 809 struct file *file; 819 810 820 811 spin_lock(&files->file_lock); 821 - file = pick_file(files, fd); 812 + file = file_close_fd_locked(files, fd); 822 813 spin_unlock(&files->file_lock); 823 814 824 815 return file; ··· 958 959 struct file *file; 959 960 struct fdtable *fdt = rcu_dereference_raw(files->fdt); 960 961 struct file __rcu **fdentry; 962 + unsigned long nospec_mask; 961 963 962 - if (unlikely(fd >= fdt->max_fds)) 963 - return NULL; 964 - 965 - fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); 964 + /* Mask is a 0 for invalid fd's, ~0 for valid ones */ 965 + nospec_mask = array_index_mask_nospec(fd, fdt->max_fds); 966 966 967 967 /* 968 - * Ok, we have a file pointer. However, because we do 969 - * this all locklessly under RCU, we may be racing with 970 - * that file being closed. 968 + * fdentry points to the 'fd' offset, or fdt->fd[0]. 969 + * Loading from fdt->fd[0] is always safe, because the 970 + * array always exists. 971 + */ 972 + fdentry = fdt->fd + (fd & nospec_mask); 973 + 974 + /* Do the load, then mask any invalid result */ 975 + file = rcu_dereference_raw(*fdentry); 976 + file = (void *)(nospec_mask & (unsigned long)file); 977 + if (unlikely(!file)) 978 + return NULL; 979 + 980 + /* 981 + * Ok, we have a file pointer that was valid at 982 + * some point, but it might have become stale since. 971 983 * 984 + * We need to confirm it by incrementing the refcount 985 + * and then check the lookup again. 986 + * 987 + * atomic_long_inc_not_zero() gives us a full memory 988 + * barrier. We only really need an 'acquire' one to 989 + * protect the loads below, but we don't have that. 990 + */ 991 + if (unlikely(!atomic_long_inc_not_zero(&file->f_count))) 992 + continue; 993 + 994 + /* 972 995 * Such a race can take two forms: 973 996 * 974 997 * (a) the file ref already went down to zero and the 975 998 * file hasn't been reused yet or the file count 976 999 * isn't zero but the file has already been reused. 977 - */ 978 - file = __get_file_rcu(fdentry); 979 - if (unlikely(!file)) 980 - return NULL; 981 - 982 - if (unlikely(IS_ERR(file))) 983 - continue; 984 - 985 - /* 1000 + * 986 1001 * (b) the file table entry has changed under us. 987 1002 * Note that we don't need to re-check the 'fdt->fd' 988 1003 * pointer having changed, because it always goes ··· 1004 991 * 1005 992 * If so, we need to put our ref and try again. 1006 993 */ 1007 - if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) { 994 + if (unlikely(file != rcu_dereference_raw(*fdentry)) || 995 + unlikely(rcu_dereference_raw(files->fdt) != fdt)) { 1008 996 fput(file); 1009 997 continue; 1010 998 } ··· 1142 1128 * atomic_read_acquire() pairs with atomic_dec_and_test() in 1143 1129 * put_files_struct(). 1144 1130 */ 1145 - if (atomic_read_acquire(&files->count) == 1) { 1131 + if (likely(atomic_read_acquire(&files->count) == 1)) { 1146 1132 file = files_lookup_fd_raw(files, fd); 1147 1133 if (!file || unlikely(file->f_mode & mask)) 1148 1134 return 0; 1149 1135 return (unsigned long)file; 1150 1136 } else { 1151 - file = __fget(fd, mask); 1137 + file = __fget_files(files, fd, mask); 1152 1138 if (!file) 1153 1139 return 0; 1154 1140 return FDPUT_FPUT | (unsigned long)file; ··· 1296 1282 } 1297 1283 1298 1284 /** 1299 - * __receive_fd() - Install received file into file descriptor table 1285 + * receive_fd() - Install received file into file descriptor table 1300 1286 * @file: struct file that was received from another process 1301 1287 * @ufd: __user pointer to write new fd number to 1302 1288 * @o_flags: the O_* flags to apply to the new fd entry ··· 1310 1296 * 1311 1297 * Returns newly install fd or -ve on error. 1312 1298 */ 1313 - int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags) 1299 + int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags) 1314 1300 { 1315 1301 int new_fd; 1316 1302 int error; ··· 1335 1321 __receive_sock(file); 1336 1322 return new_fd; 1337 1323 } 1324 + EXPORT_SYMBOL_GPL(receive_fd); 1338 1325 1339 1326 int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags) 1340 1327 { ··· 1350 1335 __receive_sock(file); 1351 1336 return new_fd; 1352 1337 } 1353 - 1354 - int receive_fd(struct file *file, unsigned int o_flags) 1355 - { 1356 - return __receive_fd(file, NULL, o_flags); 1357 - } 1358 - EXPORT_SYMBOL_GPL(receive_fd); 1359 1338 1360 1339 static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) 1361 1340 {
+7 -15
fs/file_table.c
··· 75 75 } 76 76 } 77 77 78 - void release_empty_file(struct file *f) 79 - { 80 - WARN_ON_ONCE(f->f_mode & (FMODE_BACKING | FMODE_OPENED)); 81 - if (atomic_long_dec_and_test(&f->f_count)) { 82 - security_file_free(f); 83 - put_cred(f->f_cred); 84 - if (likely(!(f->f_mode & FMODE_NOACCOUNT))) 85 - percpu_counter_dec(&nr_files); 86 - kmem_cache_free(filp_cachep, f); 87 - } 88 - } 89 - 90 78 /* 91 79 * Return the total number of open files in the system 92 80 */ ··· 407 419 408 420 static void ____fput(struct callback_head *work) 409 421 { 410 - __fput(container_of(work, struct file, f_rcuhead)); 422 + __fput(container_of(work, struct file, f_task_work)); 411 423 } 412 424 413 425 /* ··· 433 445 if (atomic_long_dec_and_test(&file->f_count)) { 434 446 struct task_struct *task = current; 435 447 448 + if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) { 449 + file_free(file); 450 + return; 451 + } 436 452 if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { 437 - init_task_work(&file->f_rcuhead, ____fput); 438 - if (!task_work_add(task, &file->f_rcuhead, TWA_RESUME)) 453 + init_task_work(&file->f_task_work, ____fput); 454 + if (!task_work_add(task, &file->f_task_work, TWA_RESUME)) 439 455 return; 440 456 /* 441 457 * After this task has run exit_task_work(),
+1 -2
fs/internal.h
··· 94 94 struct file *alloc_empty_file(int flags, const struct cred *cred); 95 95 struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred); 96 96 struct file *alloc_empty_backing_file(int flags, const struct cred *cred); 97 - void release_empty_file(struct file *f); 98 97 99 98 static inline void file_put_write_access(struct file *file) 100 99 { ··· 179 180 const char *, const struct open_flags *); 180 181 extern struct open_how build_open_how(int flags, umode_t mode); 181 182 extern int build_open_flags(const struct open_how *how, struct open_flags *op); 182 - extern struct file *__close_fd_get_file(unsigned int fd); 183 + struct file *file_close_fd_locked(struct files_struct *files, unsigned fd); 183 184 184 185 long do_sys_ftruncate(unsigned int fd, loff_t length, int small); 185 186 int chmod_common(const struct path *path, umode_t mode);
+1 -4
fs/namei.c
··· 3785 3785 WARN_ON(1); 3786 3786 error = -EINVAL; 3787 3787 } 3788 - if (unlikely(file->f_mode & FMODE_OPENED)) 3789 - fput(file); 3790 - else 3791 - release_empty_file(file); 3788 + fput(file); 3792 3789 if (error == -EOPENSTALE) { 3793 3790 if (flags & LOOKUP_RCU) 3794 3791 error = -ECHILD;
+1 -1
fs/open.c
··· 1577 1577 int retval; 1578 1578 struct file *file; 1579 1579 1580 - file = close_fd_get_file(fd); 1580 + file = file_close_fd(fd); 1581 1581 if (!file) 1582 1582 return -EBADF; 1583 1583
+11 -6
include/linux/fdtable.h
··· 83 83 static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) 84 84 { 85 85 struct fdtable *fdt = rcu_dereference_raw(files->fdt); 86 + unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds); 87 + struct file *needs_masking; 86 88 87 - if (fd < fdt->max_fds) { 88 - fd = array_index_nospec(fd, fdt->max_fds); 89 - return rcu_dereference_raw(fdt->fd[fd]); 90 - } 91 - return NULL; 89 + /* 90 + * 'mask' is zero for an out-of-bounds fd, all ones for ok. 91 + * 'fd&mask' is 'fd' for ok, or 0 for out of bounds. 92 + * 93 + * Accessing fdt->fd[0] is ok, but needs masking of the result. 94 + */ 95 + needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]); 96 + return (struct file *)(mask & (unsigned long)needs_masking); 92 97 } 93 98 94 99 static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) ··· 119 114 120 115 extern int close_fd(unsigned int fd); 121 116 extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); 122 - extern struct file *close_fd_get_file(unsigned int fd); 117 + extern struct file *file_close_fd(unsigned int fd); 123 118 extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, 124 119 struct files_struct **new_fdp); 125 120
+1 -11
include/linux/file.h
··· 96 96 97 97 extern void fd_install(unsigned int fd, struct file *file); 98 98 99 - extern int __receive_fd(struct file *file, int __user *ufd, 100 - unsigned int o_flags); 99 + int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); 101 100 102 - extern int receive_fd(struct file *file, unsigned int o_flags); 103 - 104 - static inline int receive_fd_user(struct file *file, int __user *ufd, 105 - unsigned int o_flags) 106 - { 107 - if (ufd == NULL) 108 - return -EFAULT; 109 - return __receive_fd(file, ufd, o_flags); 110 - } 111 101 int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags); 112 102 113 103 extern void flush_delayed_fput(void);
+3 -1
include/linux/fs.h
··· 991 991 */ 992 992 struct file { 993 993 union { 994 + /* fput() uses task work when closing and freeing file (default). */ 995 + struct callback_head f_task_work; 996 + /* fput() must use workqueue (most kernel threads). */ 994 997 struct llist_node f_llist; 995 - struct rcu_head f_rcuhead; 996 998 unsigned int f_iocb_flags; 997 999 }; 998 1000
+9
include/net/scm.h
··· 5 5 #include <linux/limits.h> 6 6 #include <linux/net.h> 7 7 #include <linux/cred.h> 8 + #include <linux/file.h> 8 9 #include <linux/security.h> 9 10 #include <linux/pid.h> 10 11 #include <linux/nsproxy.h> ··· 207 206 scm_pidfd_recv(msg, scm); 208 207 209 208 scm_destroy_cred(scm); 209 + } 210 + 211 + static inline int scm_recv_one_fd(struct file *f, int __user *ufd, 212 + unsigned int flags) 213 + { 214 + if (!ufd) 215 + return -EFAULT; 216 + return receive_fd(f, ufd, flags); 210 217 } 211 218 212 219 #endif /* __LINUX_NET_SCM_H */
+1 -1
io_uring/openclose.c
··· 241 241 return -EAGAIN; 242 242 } 243 243 244 - file = __close_fd_get_file(close->fd); 244 + file = file_close_fd_locked(files, close->fd); 245 245 spin_unlock(&files->file_lock); 246 246 if (!file) 247 247 goto err;
+1 -1
kernel/pid.c
··· 700 700 if (IS_ERR(file)) 701 701 return PTR_ERR(file); 702 702 703 - ret = receive_fd(file, O_CLOEXEC); 703 + ret = receive_fd(file, NULL, O_CLOEXEC); 704 704 fput(file); 705 705 706 706 return ret;
+1 -1
kernel/seccomp.c
··· 1072 1072 */ 1073 1073 list_del_init(&addfd->list); 1074 1074 if (!addfd->setfd) 1075 - fd = receive_fd(addfd->file, addfd->flags); 1075 + fd = receive_fd(addfd->file, NULL, addfd->flags); 1076 1076 else 1077 1077 fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags); 1078 1078 addfd->ret = fd;
+1 -1
net/compat.c
··· 297 297 int err = 0, i; 298 298 299 299 for (i = 0; i < fdmax; i++) { 300 - err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags); 300 + err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); 301 301 if (err < 0) 302 302 break; 303 303 }
+1 -1
net/core/scm.c
··· 325 325 } 326 326 327 327 for (i = 0; i < fdmax; i++) { 328 - err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags); 328 + err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); 329 329 if (err < 0) 330 330 break; 331 331 }