Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

+5 -8

arch/alpha/kernel/osf_sys.c

··· 145 145 long __user *, basep) 146 146 { 147 147 int error; 148 - struct file *file; 148 + struct fd arg = fdget(fd); 149 149 struct osf_dirent_callback buf; 150 150 151 - error = -EBADF; 152 - file = fget(fd); 153 - if (!file) 154 - goto out; 151 + if (!arg.file) 152 + return -EBADF; 155 153 156 154 buf.dirent = dirent; 157 155 buf.basep = basep; 158 156 buf.count = count; 159 157 buf.error = 0; 160 158 161 - error = vfs_readdir(file, osf_filldir, &buf); 159 + error = vfs_readdir(arg.file, osf_filldir, &buf); 162 160 if (error >= 0) 163 161 error = buf.error; 164 162 if (count != buf.count) 165 163 error = count - buf.count; 166 164 167 - fput(file); 168 - out: 165 + fdput(arg); 169 166 return error; 170 167 } 171 168

+8 -10

arch/ia64/kernel/perfmon.c

··· 2306 2306 * partially initialize the vma for the sampling buffer 2307 2307 */ 2308 2308 vma->vm_mm = mm; 2309 - vma->vm_file = filp; 2309 + vma->vm_file = get_file(filp); 2310 2310 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; 2311 2311 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2312 2312 ··· 2344 2344 up_write(&task->mm->mmap_sem); 2345 2345 goto error; 2346 2346 } 2347 - 2348 - get_file(filp); 2349 2347 2350 2348 /* 2351 2349 * now insert the vma in the vm list for the process, must be ··· 4780 4782 asmlinkage long 4781 4783 sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4782 4784 { 4783 - struct file *file = NULL; 4785 + struct fd f = {NULL, 0}; 4784 4786 pfm_context_t *ctx = NULL; 4785 4787 unsigned long flags = 0UL; 4786 4788 void *args_k = NULL; ··· 4877 4879 4878 4880 ret = -EBADF; 4879 4881 4880 - file = fget(fd); 4881 - if (unlikely(file == NULL)) { 4882 + f = fdget(fd); 4883 + if (unlikely(f.file == NULL)) { 4882 4884 DPRINT(("invalid fd %d\n", fd)); 4883 4885 goto error_args; 4884 4886 } 4885 - if (unlikely(PFM_IS_FILE(file) == 0)) { 4887 + if (unlikely(PFM_IS_FILE(f.file) == 0)) { 4886 4888 DPRINT(("fd %d not related to perfmon\n", fd)); 4887 4889 goto error_args; 4888 4890 } 4889 4891 4890 - ctx = file->private_data; 4892 + ctx = f.file->private_data; 4891 4893 if (unlikely(ctx == NULL)) { 4892 4894 DPRINT(("no context for fd %d\n", fd)); 4893 4895 goto error_args; ··· 4917 4919 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4918 4920 4919 4921 error_args: 4920 - if (file) 4921 - fput(file); 4922 + if (f.file) 4923 + fdput(f); 4922 4924 4923 4925 kfree(args_k); 4924 4926

+8 -9

arch/parisc/hpux/fs.c

··· 109 109 110 110 int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count) 111 111 { 112 - struct file * file; 112 + struct fd arg; 113 113 struct hpux_dirent __user * lastdirent; 114 114 struct getdents_callback buf; 115 - int error = -EBADF; 115 + int error; 116 116 117 - file = fget(fd); 118 - if (!file) 119 - goto out; 117 + arg = fdget(fd); 118 + if (!arg.file) 119 + return -EBADF; 120 120 121 121 buf.current_dir = dirent; 122 122 buf.previous = NULL; 123 123 buf.count = count; 124 124 buf.error = 0; 125 125 126 - error = vfs_readdir(file, filldir, &buf); 126 + error = vfs_readdir(arg.file, filldir, &buf); 127 127 if (error >= 0) 128 128 error = buf.error; 129 129 lastdirent = buf.previous; 130 130 if (lastdirent) { 131 - if (put_user(file->f_pos, &lastdirent->d_off)) 131 + if (put_user(arg.file->f_pos, &lastdirent->d_off)) 132 132 error = -EFAULT; 133 133 else 134 134 error = count - buf.count; 135 135 } 136 136 137 - fput(file); 138 - out: 137 + fdput(arg); 139 138 return error; 140 139 } 141 140

+2 -2

arch/powerpc/include/asm/systbl.h

··· 189 189 SYSCALL_SPU(capget) 190 190 SYSCALL_SPU(capset) 191 191 COMPAT_SYS(sigaltstack) 192 - SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) 192 + SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile) 193 193 SYSCALL(ni_syscall) 194 194 SYSCALL(ni_syscall) 195 195 PPC_SYS(vfork) ··· 229 229 COMPAT_SYS_SPU(sched_getaffinity) 230 230 SYSCALL(ni_syscall) 231 231 SYSCALL(ni_syscall) 232 - SYS32ONLY(sendfile64) 232 + SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64) 233 233 COMPAT_SYS_SPU(io_setup) 234 234 SYSCALL_SPU(io_destroy) 235 235 COMPAT_SYS_SPU(io_getevents)

+1

arch/powerpc/include/asm/unistd.h

··· 419 419 #define __ARCH_WANT_COMPAT_SYS_TIME 420 420 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND 421 421 #define __ARCH_WANT_SYS_NEWFSTATAT 422 + #define __ARCH_WANT_COMPAT_SYS_SENDFILE 422 423 #endif 423 424 424 425 /*

+7 -38

arch/powerpc/kernel/sys_ppc32.c

··· 143 143 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) 144 144 * and the register representation of a signed int (msr in 64-bit mode) is performed. 145 145 */ 146 - asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) 146 + asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd, 147 + compat_off_t __user *offset, u32 count) 147 148 { 148 - mm_segment_t old_fs = get_fs(); 149 - int ret; 150 - off_t of; 151 - off_t __user *up; 152 - 153 - if (offset && get_user(of, offset)) 154 - return -EFAULT; 155 - 156 - /* The __user pointer cast is valid because of the set_fs() */ 157 - set_fs(KERNEL_DS); 158 - up = offset ? (off_t __user *) &of : NULL; 159 - ret = sys_sendfile((int)out_fd, (int)in_fd, up, count); 160 - set_fs(old_fs); 161 - 162 - if (offset && put_user(of, offset)) 163 - return -EFAULT; 164 - 165 - return ret; 149 + return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count); 166 150 } 167 151 168 - asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) 152 + asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, 153 + compat_loff_t __user *offset, u32 count) 169 154 { 170 - mm_segment_t old_fs = get_fs(); 171 - int ret; 172 - loff_t lof; 173 - loff_t __user *up; 174 - 175 - if (offset && get_user(lof, offset)) 176 - return -EFAULT; 177 - 178 - /* The __user pointer cast is valid because of the set_fs() */ 179 - set_fs(KERNEL_DS); 180 - up = offset ? (loff_t __user *) &lof : NULL; 181 - ret = sys_sendfile64(out_fd, in_fd, up, count); 182 - set_fs(old_fs); 183 - 184 - if (offset && put_user(lof, offset)) 185 - return -EFAULT; 186 - 187 - return ret; 155 + return sys_sendfile((int)out_fd, (int)in_fd, 156 + (off_t __user *)offset, count); 188 157 } 189 158 190 159 long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,

+9 -12

arch/powerpc/platforms/cell/spu_syscalls.c

··· 69 69 umode_t, mode, int, neighbor_fd) 70 70 { 71 71 long ret; 72 - struct file *neighbor; 73 - int fput_needed; 74 72 struct spufs_calls *calls; 75 73 76 74 calls = spufs_calls_get(); ··· 76 78 return -ENOSYS; 77 79 78 80 if (flags & SPU_CREATE_AFFINITY_SPU) { 81 + struct fd neighbor = fdget(neighbor_fd); 79 82 ret = -EBADF; 80 - neighbor = fget_light(neighbor_fd, &fput_needed); 81 - if (neighbor) { 82 - ret = calls->create_thread(name, flags, mode, neighbor); 83 - fput_light(neighbor, fput_needed); 83 + if (neighbor.file) { 84 + ret = calls->create_thread(name, flags, mode, neighbor.file); 85 + fdput(neighbor); 84 86 } 85 87 } else 86 88 ret = calls->create_thread(name, flags, mode, NULL); ··· 92 94 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) 93 95 { 94 96 long ret; 95 - struct file *filp; 96 - int fput_needed; 97 + struct fd arg; 97 98 struct spufs_calls *calls; 98 99 99 100 calls = spufs_calls_get(); ··· 100 103 return -ENOSYS; 101 104 102 105 ret = -EBADF; 103 - filp = fget_light(fd, &fput_needed); 104 - if (filp) { 105 - ret = calls->spu_run(filp, unpc, ustatus); 106 - fput_light(filp, fput_needed); 106 + arg = fdget(fd); 107 + if (arg.file) { 108 + ret = calls->spu_run(arg.file, unpc, ustatus); 109 + fdput(arg); 107 110 } 108 111 109 112 spufs_calls_put(calls);

+20 -20

arch/powerpc/platforms/cell/spufs/coredump.c

··· 106 106 return total; 107 107 } 108 108 109 + static int match_context(const void *v, struct file *file, unsigned fd) 110 + { 111 + struct spu_context *ctx; 112 + if (file->f_op != &spufs_context_fops) 113 + return 0; 114 + ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; 115 + if (ctx->flags & SPU_CREATE_NOSCHED) 116 + return 0; 117 + return fd + 1; 118 + } 119 + 109 120 /* 110 121 * The additional architecture-specific notes for Cell are various 111 122 * context files in the spu context. ··· 126 115 * internal functionality to dump them without needing to actually 127 116 * open the files. 128 117 */ 118 + /* 119 + * descriptor table is not shared, so files can't change or go away. 120 + */ 129 121 static struct spu_context *coredump_next_context(int *fd) 130 122 { 131 - struct fdtable *fdt = files_fdtable(current->files); 132 123 struct file *file; 133 - struct spu_context *ctx = NULL; 134 - 135 - for (; *fd < fdt->max_fds; (*fd)++) { 136 - if (!fd_is_open(*fd, fdt)) 137 - continue; 138 - 139 - file = fcheck(*fd); 140 - 141 - if (!file || file->f_op != &spufs_context_fops) 142 - continue; 143 - 144 - ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; 145 - if (ctx->flags & SPU_CREATE_NOSCHED) 146 - continue; 147 - 148 - break; 149 - } 150 - 151 - return ctx; 124 + int n = iterate_fd(current->files, *fd, match_context, NULL); 125 + if (!n) 126 + return NULL; 127 + *fd = n - 1; 128 + file = fcheck(*fd); 129 + return SPUFS_I(file->f_dentry->d_inode)->i_ctx; 152 130 } 153 131 154 132 int spufs_coredump_extra_notes_size(void)

-2

arch/s390/hypfs/inode.c

··· 72 72 struct dentry *parent; 73 73 74 74 parent = dentry->d_parent; 75 - if (!parent || !parent->d_inode) 76 - return; 77 75 mutex_lock(&parent->d_inode->i_mutex); 78 76 if (hypfs_positive(dentry)) { 79 77 if (S_ISDIR(dentry->d_inode->i_mode))

+1

arch/sparc/include/asm/unistd.h

··· 447 447 #else 448 448 #define __ARCH_WANT_COMPAT_SYS_TIME 449 449 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND 450 + #define __ARCH_WANT_COMPAT_SYS_SENDFILE 450 451 #endif 451 452 452 453 /*

+1 -1

arch/sparc/kernel/sys32.S

··· 90 90 SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) 91 91 SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) 92 92 SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) 93 - SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) 93 + SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1) 94 94 SIGN1(sys32_prctl, sys_prctl, %o0) 95 95 SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) 96 96 SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2)

-46

arch/sparc/kernel/sys_sparc32.c

··· 506 506 advice); 507 507 } 508 508 509 - asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, 510 - compat_off_t __user *offset, 511 - compat_size_t count) 512 - { 513 - mm_segment_t old_fs = get_fs(); 514 - int ret; 515 - off_t of; 516 - 517 - if (offset && get_user(of, offset)) 518 - return -EFAULT; 519 - 520 - set_fs(KERNEL_DS); 521 - ret = sys_sendfile(out_fd, in_fd, 522 - offset ? (off_t __user *) &of : NULL, 523 - count); 524 - set_fs(old_fs); 525 - 526 - if (offset && put_user(of, offset)) 527 - return -EFAULT; 528 - 529 - return ret; 530 - } 531 - 532 - asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, 533 - compat_loff_t __user *offset, 534 - compat_size_t count) 535 - { 536 - mm_segment_t old_fs = get_fs(); 537 - int ret; 538 - loff_t lof; 539 - 540 - if (offset && get_user(lof, offset)) 541 - return -EFAULT; 542 - 543 - set_fs(KERNEL_DS); 544 - ret = sys_sendfile64(out_fd, in_fd, 545 - offset ? (loff_t __user *) &lof : NULL, 546 - count); 547 - set_fs(old_fs); 548 - 549 - if (offset && put_user(lof, offset)) 550 - return -EFAULT; 551 - 552 - return ret; 553 - } 554 - 555 509 /* This is just a version for 32-bit applications which does 556 510 * not force O_LARGEFILE on. 557 511 */

+21 -78

arch/um/drivers/mconsole_kern.c

··· 21 21 #include <linux/un.h> 22 22 #include <linux/workqueue.h> 23 23 #include <linux/mutex.h> 24 + #include <linux/fs.h> 25 + #include <linux/mount.h> 26 + #include <linux/file.h> 24 27 #include <asm/uaccess.h> 25 28 #include <asm/switch_to.h> 26 29 ··· 121 118 mconsole_reply(req, "", 0, 0); 122 119 } 123 120 124 - /* This is a more convoluted version of mconsole_proc, which has some stability 125 - * problems; however, we need it fixed, because it is expected that UML users 126 - * mount HPPFS instead of procfs on /proc. And we want mconsole_proc to still 127 - * show the real procfs content, not the ones from hppfs.*/ 128 - #if 0 129 121 void mconsole_proc(struct mc_request *req) 130 122 { 131 123 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 124 + char *buf; 125 + int len; 132 126 struct file *file; 133 - int n; 134 - char *ptr = req->request.data, *buf; 135 - mm_segment_t old_fs = get_fs(); 127 + int first_chunk = 1; 128 + char *ptr = req->request.data; 136 129 137 130 ptr += strlen("proc"); 138 131 ptr = skip_spaces(ptr); ··· 136 137 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); 137 138 if (IS_ERR(file)) { 138 139 mconsole_reply(req, "Failed to open file", 1, 0); 140 + printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file)); 139 141 goto out; 140 142 } 141 143 ··· 146 146 goto out_fput; 147 147 } 148 148 149 - if (file->f_op->read) { 150 - do { 151 - loff_t pos; 152 - set_fs(KERNEL_DS); 153 - n = vfs_read(file, buf, PAGE_SIZE - 1, &pos); 154 - file_pos_write(file, pos); 155 - set_fs(old_fs); 156 - if (n >= 0) { 157 - buf[n] = '\0'; 158 - mconsole_reply(req, buf, 0, (n > 0)); 159 - } 160 - else { 161 - mconsole_reply(req, "Read of file failed", 162 - 1, 0); 163 - goto out_free; 164 - } 165 - } while (n > 0); 166 - } 167 - else mconsole_reply(req, "", 0, 0); 168 - 169 - out_free: 170 - kfree(buf); 171 - out_fput: 172 - fput(file); 173 - out: ; 174 - } 175 - #endif 176 - 177 - void mconsole_proc(struct mc_request *req) 178 - { 179 - char path[64]; 180 - char *buf; 181 - int len; 182 - int fd; 183 - int first_chunk = 1; 184 - char *ptr = req->request.data; 185 - 186 - ptr += strlen("proc"); 187 - ptr = skip_spaces(ptr); 188 - snprintf(path, sizeof(path), "/proc/%s", ptr); 189 - 190 - fd = sys_open(path, 0, 0); 191 - if (fd < 0) { 192 - mconsole_reply(req, "Failed to open file", 1, 0); 193 - printk(KERN_ERR "open %s: %d\n",path,fd); 194 - goto out; 195 - } 196 - 197 - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 198 - if (buf == NULL) { 199 - mconsole_reply(req, "Failed to allocate buffer", 1, 0); 200 - goto out_close; 201 - } 202 - 203 - for (;;) { 204 - len = sys_read(fd, buf, PAGE_SIZE-1); 149 + do { 150 + loff_t pos; 151 + mm_segment_t old_fs = get_fs(); 152 + set_fs(KERNEL_DS); 153 + len = vfs_read(file, buf, PAGE_SIZE - 1, &pos); 154 + set_fs(old_fs); 155 + file->f_pos = pos; 205 156 if (len < 0) { 206 157 mconsole_reply(req, "Read of file failed", 1, 0); 207 158 goto out_free; ··· 162 211 mconsole_reply(req, "\n", 0, 1); 163 212 first_chunk = 0; 164 213 } 165 - if (len == PAGE_SIZE-1) { 166 - buf[len] = '\0'; 167 - mconsole_reply(req, buf, 0, 1); 168 - } else { 169 - buf[len] = '\0'; 170 - mconsole_reply(req, buf, 0, 0); 171 - break; 172 - } 173 - } 174 - 214 + buf[len] = '\0'; 215 + mconsole_reply(req, buf, 0, (len > 0)); 216 + } while (len > 0); 175 217 out_free: 176 218 kfree(buf); 177 - out_close: 178 - sys_close(fd); 179 - out: 180 - /* nothing */; 219 + out_fput: 220 + fput(file); 221 + out: ; 181 222 } 182 223 183 224 #define UML_MCONSOLE_HELPTEXT \

+1 -2

drivers/base/dma-buf.c

··· 460 460 if (vma->vm_file) 461 461 fput(vma->vm_file); 462 462 463 - vma->vm_file = dmabuf->file; 464 - get_file(vma->vm_file); 463 + vma->vm_file = get_file(dmabuf->file); 465 464 466 465 vma->vm_pgoff = pgoff; 467 466

+5 -5

drivers/infiniband/core/ucma.c

··· 1183 1183 struct rdma_ucm_migrate_id cmd; 1184 1184 struct rdma_ucm_migrate_resp resp; 1185 1185 struct ucma_context *ctx; 1186 - struct file *filp; 1186 + struct fd f; 1187 1187 struct ucma_file *cur_file; 1188 1188 int ret = 0; 1189 1189 ··· 1191 1191 return -EFAULT; 1192 1192 1193 1193 /* Get current fd to protect against it being closed */ 1194 - filp = fget(cmd.fd); 1195 - if (!filp) 1194 + f = fdget(cmd.fd); 1195 + if (!f.file) 1196 1196 return -ENOENT; 1197 1197 1198 1198 /* Validate current fd and prevent destruction of id. */ 1199 - ctx = ucma_get_ctx(filp->private_data, cmd.id); 1199 + ctx = ucma_get_ctx(f.file->private_data, cmd.id); 1200 1200 if (IS_ERR(ctx)) { 1201 1201 ret = PTR_ERR(ctx); 1202 1202 goto file_put; ··· 1230 1230 1231 1231 ucma_put_ctx(ctx); 1232 1232 file_put: 1233 - fput(filp); 1233 + fdput(f); 1234 1234 return ret; 1235 1235 } 1236 1236

+8 -13

drivers/infiniband/core/uverbs_cmd.c

··· 705 705 struct ib_udata udata; 706 706 struct ib_uxrcd_object *obj; 707 707 struct ib_xrcd *xrcd = NULL; 708 - struct file *f = NULL; 708 + struct fd f = {NULL, 0}; 709 709 struct inode *inode = NULL; 710 710 int ret = 0; 711 711 int new_xrcd = 0; ··· 724 724 725 725 if (cmd.fd != -1) { 726 726 /* search for file descriptor */ 727 - f = fget(cmd.fd); 728 - if (!f) { 727 + f = fdget(cmd.fd); 728 + if (!f.file) { 729 729 ret = -EBADF; 730 730 goto err_tree_mutex_unlock; 731 731 } 732 732 733 - inode = f->f_dentry->d_inode; 734 - if (!inode) { 735 - ret = -EBADF; 736 - goto err_tree_mutex_unlock; 737 - } 738 - 733 + inode = f.file->f_path.dentry->d_inode; 739 734 xrcd = find_xrcd(file->device, inode); 740 735 if (!xrcd && !(cmd.oflags & O_CREAT)) { 741 736 /* no file descriptor. Need CREATE flag */ ··· 795 800 goto err_copy; 796 801 } 797 802 798 - if (f) 799 - fput(f); 803 + if (f.file) 804 + fdput(f); 800 805 801 806 mutex_lock(&file->mutex); 802 807 list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); ··· 825 830 put_uobj_write(&obj->uobject); 826 831 827 832 err_tree_mutex_unlock: 828 - if (f) 829 - fput(f); 833 + if (f.file) 834 + fdput(f); 830 835 831 836 mutex_unlock(&file->device->xrcd_tree_mutex); 832 837

+5 -6

drivers/infiniband/core/uverbs_main.c

··· 541 541 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) 542 542 { 543 543 struct ib_uverbs_event_file *ev_file = NULL; 544 - struct file *filp; 544 + struct fd f = fdget(fd); 545 545 546 - filp = fget(fd); 547 - if (!filp) 546 + if (!f.file) 548 547 return NULL; 549 548 550 - if (filp->f_op != &uverbs_event_fops) 549 + if (f.file->f_op != &uverbs_event_fops) 551 550 goto out; 552 551 553 - ev_file = filp->private_data; 552 + ev_file = f.file->private_data; 554 553 if (ev_file->is_async) { 555 554 ev_file = NULL; 556 555 goto out; ··· 558 559 kref_get(&ev_file->ref); 559 560 560 561 out: 561 - fput(filp); 562 + fdput(f); 562 563 return ev_file; 563 564 } 564 565

+13 -98

drivers/staging/android/binder.c

··· 362 362 static void 363 363 binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); 364 364 365 - /* 366 - * copied from get_unused_fd_flags 367 - */ 368 365 static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) 369 366 { 370 367 struct files_struct *files = proc->files; 371 - int fd, error; 372 - struct fdtable *fdt; 373 368 unsigned long rlim_cur; 374 369 unsigned long irqs; 375 370 376 371 if (files == NULL) 377 372 return -ESRCH; 378 373 379 - error = -EMFILE; 380 - spin_lock(&files->file_lock); 374 + if (!lock_task_sighand(proc->tsk, &irqs)) 375 + return -EMFILE; 381 376 382 - repeat: 383 - fdt = files_fdtable(files); 384 - fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd); 377 + rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); 378 + unlock_task_sighand(proc->tsk, &irqs); 385 379 386 - /* 387 - * N.B. For clone tasks sharing a files structure, this test 388 - * will limit the total number of files that can be opened. 389 - */ 390 - rlim_cur = 0; 391 - if (lock_task_sighand(proc->tsk, &irqs)) { 392 - rlim_cur = proc->tsk->signal->rlim[RLIMIT_NOFILE].rlim_cur; 393 - unlock_task_sighand(proc->tsk, &irqs); 394 - } 395 - if (fd >= rlim_cur) 396 - goto out; 397 - 398 - /* Do we need to expand the fd array or fd set? */ 399 - error = expand_files(files, fd); 400 - if (error < 0) 401 - goto out; 402 - 403 - if (error) { 404 - /* 405 - * If we needed to expand the fs array we 406 - * might have blocked - try again. 407 - */ 408 - error = -EMFILE; 409 - goto repeat; 410 - } 411 - 412 - __set_open_fd(fd, fdt); 413 - if (flags & O_CLOEXEC) 414 - __set_close_on_exec(fd, fdt); 415 - else 416 - __clear_close_on_exec(fd, fdt); 417 - files->next_fd = fd + 1; 418 - 419 - /* Sanity check */ 420 - if (fdt->fd[fd] != NULL) { 421 - pr_warn("get_unused_fd: slot %d not NULL!\n", fd); 422 - fdt->fd[fd] = NULL; 423 - } 424 - 425 - error = fd; 426 - 427 - out: 428 - spin_unlock(&files->file_lock); 429 - return error; 380 + return __alloc_fd(files, 0, rlim_cur, flags); 430 381 } 431 382 432 383 /* ··· 386 435 static void task_fd_install( 387 436 struct binder_proc *proc, unsigned int fd, struct file *file) 388 437 { 389 - struct files_struct *files = proc->files; 390 - struct fdtable *fdt; 391 - 392 - if (files == NULL) 393 - return; 394 - 395 - spin_lock(&files->file_lock); 396 - fdt = files_fdtable(files); 397 - BUG_ON(fdt->fd[fd] != NULL); 398 - rcu_assign_pointer(fdt->fd[fd], file); 399 - spin_unlock(&files->file_lock); 400 - } 401 - 402 - /* 403 - * copied from __put_unused_fd in open.c 404 - */ 405 - static void __put_unused_fd(struct files_struct *files, unsigned int fd) 406 - { 407 - struct fdtable *fdt = files_fdtable(files); 408 - __clear_open_fd(fd, fdt); 409 - if (fd < files->next_fd) 410 - files->next_fd = fd; 438 + if (proc->files) 439 + __fd_install(proc->files, fd, file); 411 440 } 412 441 413 442 /* ··· 395 464 */ 396 465 static long task_close_fd(struct binder_proc *proc, unsigned int fd) 397 466 { 398 - struct file *filp; 399 - struct files_struct *files = proc->files; 400 - struct fdtable *fdt; 401 467 int retval; 402 468 403 - if (files == NULL) 469 + if (proc->files == NULL) 404 470 return -ESRCH; 405 471 406 - spin_lock(&files->file_lock); 407 - fdt = files_fdtable(files); 408 - if (fd >= fdt->max_fds) 409 - goto out_unlock; 410 - filp = fdt->fd[fd]; 411 - if (!filp) 412 - goto out_unlock; 413 - rcu_assign_pointer(fdt->fd[fd], NULL); 414 - __clear_close_on_exec(fd, fdt); 415 - __put_unused_fd(files, fd); 416 - spin_unlock(&files->file_lock); 417 - retval = filp_close(filp, files); 418 - 472 + retval = __close_fd(proc->files, fd); 419 473 /* can't restart close syscall because file table entry was cleared */ 420 474 if (unlikely(retval == -ERESTARTSYS || 421 475 retval == -ERESTARTNOINTR || ··· 409 493 retval = -EINTR; 410 494 411 495 return retval; 412 - 413 - out_unlock: 414 - spin_unlock(&files->file_lock); 415 - return -EBADF; 416 496 } 417 497 418 498 static void binder_set_nice(long nice) ··· 2705 2793 const char *failure_string; 2706 2794 struct binder_buffer *buffer; 2707 2795 2796 + if (proc->tsk != current) 2797 + return -EINVAL; 2798 + 2708 2799 if ((vma->vm_end - vma->vm_start) > SZ_4M) 2709 2800 vma->vm_end = vma->vm_start + SZ_4M; 2710 2801 ··· 2772 2857 binder_insert_free_buffer(proc, buffer); 2773 2858 proc->free_async_space = proc->buffer_size / 2; 2774 2859 barrier(); 2775 - proc->files = get_files_struct(proc->tsk); 2860 + proc->files = get_files_struct(current); 2776 2861 proc->vma = vma; 2777 2862 proc->vma_vm_mm = vma->vm_mm; 2778 2863

+1 -2

drivers/staging/omapdrm/omap_gem.c

··· 592 592 * in particular in the case of mmap'd dmabufs) 593 593 */ 594 594 fput(vma->vm_file); 595 - get_file(obj->filp); 596 595 vma->vm_pgoff = 0; 597 - vma->vm_file = obj->filp; 596 + vma->vm_file = get_file(obj->filp); 598 597 599 598 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 600 599 }

+16 -29

drivers/tty/tty_io.c

··· 1166 1166 struct file *p = NULL; 1167 1167 1168 1168 spin_lock(&redirect_lock); 1169 - if (redirect) { 1170 - get_file(redirect); 1171 - p = redirect; 1172 - } 1169 + if (redirect) 1170 + p = get_file(redirect); 1173 1171 spin_unlock(&redirect_lock); 1174 1172 1175 1173 if (p) { ··· 2262 2264 spin_unlock(&redirect_lock); 2263 2265 return -EBUSY; 2264 2266 } 2265 - get_file(file); 2266 - redirect = file; 2267 + redirect = get_file(file); 2267 2268 spin_unlock(&redirect_lock); 2268 2269 return 0; 2269 2270 } ··· 2806 2809 } 2807 2810 #endif 2808 2811 2812 + static int this_tty(const void *t, struct file *file, unsigned fd) 2813 + { 2814 + if (likely(file->f_op->read != tty_read)) 2815 + return 0; 2816 + return file_tty(file) != t ? 0 : fd + 1; 2817 + } 2818 + 2809 2819 /* 2810 2820 * This implements the "Secure Attention Key" --- the idea is to 2811 2821 * prevent trojan horses by killing all processes associated with this ··· 2840 2836 struct task_struct *g, *p; 2841 2837 struct pid *session; 2842 2838 int i; 2843 - struct file *filp; 2844 - struct fdtable *fdt; 2845 2839 2846 2840 if (!tty) 2847 2841 return; ··· 2869 2867 continue; 2870 2868 } 2871 2869 task_lock(p); 2872 - if (p->files) { 2873 - /* 2874 - * We don't take a ref to the file, so we must 2875 - * hold ->file_lock instead. 2876 - */ 2877 - spin_lock(&p->files->file_lock); 2878 - fdt = files_fdtable(p->files); 2879 - for (i = 0; i < fdt->max_fds; i++) { 2880 - filp = fcheck_files(p->files, i); 2881 - if (!filp) 2882 - continue; 2883 - if (filp->f_op->read == tty_read && 2884 - file_tty(filp) == tty) { 2885 - printk(KERN_NOTICE "SAK: killed process %d" 2886 - " (%s): fd#%d opened to the tty\n", 2887 - task_pid_nr(p), p->comm, i); 2888 - force_sig(SIGKILL, p); 2889 - break; 2890 - } 2891 - } 2892 - spin_unlock(&p->files->file_lock); 2870 + i = iterate_fd(p->files, 0, this_tty, tty); 2871 + if (i != 0) { 2872 + printk(KERN_NOTICE "SAK: killed process %d" 2873 + " (%s): fd#%d opened to the tty\n", 2874 + task_pid_nr(p), p->comm, i - 1); 2875 + force_sig(SIGKILL, p); 2893 2876 } 2894 2877 task_unlock(p); 2895 2878 } while_each_thread(g, p);

+2 -2

drivers/usb/gadget/f_fs.c

··· 340 340 341 341 static int ffs_mutex_lock(struct mutex *mutex, unsigned nonblock) 342 342 __attribute__((warn_unused_result, nonnull)); 343 - static char *ffs_prepare_buffer(const char * __user buf, size_t len) 343 + static char *ffs_prepare_buffer(const char __user *buf, size_t len) 344 344 __attribute__((warn_unused_result, nonnull)); 345 345 346 346 ··· 2445 2445 : mutex_lock_interruptible(mutex); 2446 2446 } 2447 2447 2448 - static char *ffs_prepare_buffer(const char * __user buf, size_t len) 2448 + static char *ffs_prepare_buffer(const char __user *buf, size_t len) 2449 2449 { 2450 2450 char *data; 2451 2451

+7 -8

drivers/vfio/vfio.c

··· 1014 1014 1015 1015 static int vfio_group_set_container(struct vfio_group *group, int container_fd) 1016 1016 { 1017 - struct file *filep; 1017 + struct fd f; 1018 1018 struct vfio_container *container; 1019 1019 struct vfio_iommu_driver *driver; 1020 1020 int ret = 0; ··· 1022 1022 if (atomic_read(&group->container_users)) 1023 1023 return -EINVAL; 1024 1024 1025 - filep = fget(container_fd); 1026 - if (!filep) 1025 + f = fdget(container_fd); 1026 + if (!f.file) 1027 1027 return -EBADF; 1028 1028 1029 1029 /* Sanity check, is this really our fd? */ 1030 - if (filep->f_op != &vfio_fops) { 1031 - fput(filep); 1030 + if (f.file->f_op != &vfio_fops) { 1031 + fdput(f); 1032 1032 return -EINVAL; 1033 1033 } 1034 1034 1035 - container = filep->private_data; 1035 + container = f.file->private_data; 1036 1036 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1037 1037 1038 1038 mutex_lock(&container->group_lock); ··· 1054 1054 1055 1055 unlock_out: 1056 1056 mutex_unlock(&container->group_lock); 1057 - fput(filep); 1058 - 1057 + fdput(f); 1059 1058 return ret; 1060 1059 } 1061 1060

+4 -4

drivers/vhost/vhost.c

··· 636 636 637 637 static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) 638 638 { 639 - struct file *eventfp, *filep = NULL, 640 - *pollstart = NULL, *pollstop = NULL; 639 + struct file *eventfp, *filep = NULL; 640 + bool pollstart = false, pollstop = false; 641 641 struct eventfd_ctx *ctx = NULL; 642 642 u32 __user *idxp = argp; 643 643 struct vhost_virtqueue *vq; ··· 763 763 break; 764 764 } 765 765 if (eventfp != vq->kick) { 766 - pollstop = filep = vq->kick; 767 - pollstart = vq->kick = eventfp; 766 + pollstop = (filep = vq->kick) != NULL; 767 + pollstart = (vq->kick = eventfp) != NULL; 768 768 } else 769 769 filep = eventfp; 770 770 break;

+5 -7

drivers/video/msm/mdp.c

··· 257 257 unsigned long *start, unsigned long *len, 258 258 struct file **filep) 259 259 { 260 - int put_needed, ret = 0; 261 - struct file *file; 262 - 263 - file = fget_light(img->memory_id, &put_needed); 264 - if (file == NULL) 260 + int ret = 0; 261 + struct fd f = fdget(img->memory_id); 262 + if (f.file == NULL) 265 263 return -1; 266 264 267 - if (MAJOR(file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { 265 + if (MAJOR(f.file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { 268 266 *start = info->fix.smem_start; 269 267 *len = info->fix.smem_len; 270 268 } else 271 269 ret = -1; 272 - fput_light(file, put_needed); 270 + fdput(f); 273 271 274 272 return ret; 275 273 }

+5

fs/9p/v9fs.c

··· 560 560 */ 561 561 static void v9fs_destroy_inode_cache(void) 562 562 { 563 + /* 564 + * Make sure all delayed rcu free inodes are flushed before we 565 + * destroy cache. 566 + */ 567 + rcu_barrier(); 563 568 kmem_cache_destroy(v9fs_inode_cache); 564 569 } 565 570

+1 -1

fs/Makefile

··· 11 11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 12 12 seq_file.o xattr.o libfs.o fs-writeback.o \ 13 13 pnode.o drop_caches.o splice.o sync.o utimes.o \ 14 - stack.o fs_struct.o statfs.o 14 + stack.o fs_struct.o statfs.o coredump.o 15 15 16 16 ifeq ($(CONFIG_BLOCK),y) 17 17 obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o

+5

fs/adfs/super.c

··· 280 280 281 281 static void destroy_inodecache(void) 282 282 { 283 + /* 284 + * Make sure all delayed rcu free inodes are flushed before we 285 + * destroy cache. 286 + */ 287 + rcu_barrier(); 283 288 kmem_cache_destroy(adfs_inode_cachep); 284 289 } 285 290

+5

fs/affs/super.c

··· 147 147 148 148 static void destroy_inodecache(void) 149 149 { 150 + /* 151 + * Make sure all delayed rcu free inodes are flushed before we 152 + * destroy cache. 153 + */ 154 + rcu_barrier(); 150 155 kmem_cache_destroy(affs_inode_cachep); 151 156 } 152 157

+5

fs/afs/super.c

··· 123 123 BUG(); 124 124 } 125 125 126 + /* 127 + * Make sure all delayed rcu free inodes are flushed before we 128 + * destroy cache. 129 + */ 130 + rcu_barrier(); 126 131 kmem_cache_destroy(afs_inode_cachep); 127 132 _leave(""); 128 133 }

+2 -16

fs/autofs4/dev-ioctl.c

··· 221 221 return ino && ino->sbi->type & *(unsigned *)p; 222 222 } 223 223 224 - static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) 225 - { 226 - struct files_struct *files = current->files; 227 - struct fdtable *fdt; 228 - 229 - spin_lock(&files->file_lock); 230 - fdt = files_fdtable(files); 231 - BUG_ON(fdt->fd[fd] != NULL); 232 - rcu_assign_pointer(fdt->fd[fd], file); 233 - __set_close_on_exec(fd, fdt); 234 - spin_unlock(&files->file_lock); 235 - } 236 - 237 - 238 224 /* 239 225 * Open a file descriptor on the autofs mount point corresponding 240 226 * to the given path and device number (aka. new_encode_dev(sb->s_dev)). ··· 229 243 { 230 244 int err, fd; 231 245 232 - fd = get_unused_fd(); 246 + fd = get_unused_fd_flags(O_CLOEXEC); 233 247 if (likely(fd >= 0)) { 234 248 struct file *filp; 235 249 struct path path; ··· 250 264 goto out; 251 265 } 252 266 253 - autofs_dev_ioctl_fd_install(fd, filp); 267 + fd_install(fd, filp); 254 268 } 255 269 256 270 return fd;

+1 -2

fs/autofs4/waitq.c

··· 175 175 return; 176 176 } 177 177 178 - pipe = sbi->pipe; 179 - get_file(pipe); 178 + pipe = get_file(sbi->pipe); 180 179 181 180 mutex_unlock(&sbi->wq_mutex); 182 181

+5

fs/befs/linuxvfs.c

··· 457 457 static void 458 458 befs_destroy_inodecache(void) 459 459 { 460 + /* 461 + * Make sure all delayed rcu free inodes are flushed before we 462 + * destroy cache. 463 + */ 464 + rcu_barrier(); 460 465 kmem_cache_destroy(befs_inode_cachep); 461 466 } 462 467

+5

fs/bfs/inode.c

··· 280 280 281 281 static void destroy_inodecache(void) 282 282 { 283 + /* 284 + * Make sure all delayed rcu free inodes are flushed before we 285 + * destroy cache. 286 + */ 287 + rcu_barrier(); 283 288 kmem_cache_destroy(bfs_inode_cachep); 284 289 } 285 290

+4 -15

fs/binfmt_elf.c

··· 1696 1696 return 0; 1697 1697 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1698 1698 if (!info->psinfo) 1699 - goto notes_free; 1699 + return 0; 1700 1700 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1701 1701 if (!info->prstatus) 1702 - goto psinfo_free; 1702 + return 0; 1703 1703 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1704 1704 if (!info->fpu) 1705 - goto prstatus_free; 1705 + return 0; 1706 1706 #ifdef ELF_CORE_COPY_XFPREGS 1707 1707 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1708 1708 if (!info->xfpu) 1709 - goto fpu_free; 1709 + return 0; 1710 1710 #endif 1711 1711 return 1; 1712 - #ifdef ELF_CORE_COPY_XFPREGS 1713 - fpu_free: 1714 - kfree(info->fpu); 1715 - #endif 1716 - prstatus_free: 1717 - kfree(info->prstatus); 1718 - psinfo_free: 1719 - kfree(info->psinfo); 1720 - notes_free: 1721 - kfree(info->notes); 1722 - return 0; 1723 1712 } 1724 1713 1725 1714 static int fill_note_info(struct elfhdr *elf, int phdrs,

+6

fs/btrfs/extent_io.c

··· 107 107 list_del(&eb->leak_list); 108 108 kmem_cache_free(extent_buffer_cache, eb); 109 109 } 110 + 111 + /* 112 + * Make sure all delayed rcu free are flushed before we 113 + * destroy caches. 114 + */ 115 + rcu_barrier(); 110 116 if (extent_state_cache) 111 117 kmem_cache_destroy(extent_state_cache); 112 118 if (extent_buffer_cache)

+5

fs/btrfs/inode.c

··· 7076 7076 7077 7077 void btrfs_destroy_cachep(void) 7078 7078 { 7079 + /* 7080 + * Make sure all delayed rcu free inodes are flushed before we 7081 + * destroy cache. 7082 + */ 7083 + rcu_barrier(); 7079 7084 if (btrfs_inode_cachep) 7080 7085 kmem_cache_destroy(btrfs_inode_cachep); 7081 7086 if (btrfs_trans_handle_cachep)

+15 -17

fs/btrfs/ioctl.c

··· 1397 1397 u64 *transid, bool readonly, 1398 1398 struct btrfs_qgroup_inherit **inherit) 1399 1399 { 1400 - struct file *src_file; 1401 1400 int namelen; 1402 1401 int ret = 0; 1403 1402 ··· 1420 1421 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1421 1422 NULL, transid, readonly, inherit); 1422 1423 } else { 1424 + struct fd src = fdget(fd); 1423 1425 struct inode *src_inode; 1424 - src_file = fget(fd); 1425 - if (!src_file) { 1426 + if (!src.file) { 1426 1427 ret = -EINVAL; 1427 1428 goto out_drop_write; 1428 1429 } 1429 1430 1430 - src_inode = src_file->f_path.dentry->d_inode; 1431 + src_inode = src.file->f_path.dentry->d_inode; 1431 1432 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1432 1433 printk(KERN_INFO "btrfs: Snapshot src from " 1433 1434 "another FS\n"); 1434 1435 ret = -EINVAL; 1435 - fput(src_file); 1436 - goto out_drop_write; 1436 + } else { 1437 + ret = btrfs_mksubvol(&file->f_path, name, namelen, 1438 + BTRFS_I(src_inode)->root, 1439 + transid, readonly, inherit); 1437 1440 } 1438 - ret = btrfs_mksubvol(&file->f_path, name, namelen, 1439 - BTRFS_I(src_inode)->root, 1440 - transid, readonly, inherit); 1441 - fput(src_file); 1441 + fdput(src); 1442 1442 } 1443 1443 out_drop_write: 1444 1444 mnt_drop_write_file(file); ··· 2339 2341 { 2340 2342 struct inode *inode = fdentry(file)->d_inode; 2341 2343 struct btrfs_root *root = BTRFS_I(inode)->root; 2342 - struct file *src_file; 2344 + struct fd src_file; 2343 2345 struct inode *src; 2344 2346 struct btrfs_trans_handle *trans; 2345 2347 struct btrfs_path *path; ··· 2374 2376 if (ret) 2375 2377 return ret; 2376 2378 2377 - src_file = fget(srcfd); 2378 - if (!src_file) { 2379 + src_file = fdget(srcfd); 2380 + if (!src_file.file) { 2379 2381 ret = -EBADF; 2380 2382 goto out_drop_write; 2381 2383 } 2382 2384 2383 2385 ret = -EXDEV; 2384 - if (src_file->f_path.mnt != file->f_path.mnt) 2386 + if (src_file.file->f_path.mnt != file->f_path.mnt) 2385 2387 goto out_fput; 2386 2388 2387 - src = src_file->f_dentry->d_inode; 2389 + src = src_file.file->f_dentry->d_inode; 2388 2390 2389 2391 ret = -EINVAL; 2390 2392 if (src == inode) 2391 2393 goto out_fput; 2392 2394 2393 2395 /* the src must be open for reading */ 2394 - if (!(src_file->f_mode & FMODE_READ)) 2396 + if (!(src_file.file->f_mode & FMODE_READ)) 2395 2397 goto out_fput; 2396 2398 2397 2399 /* don't make the dst file partly checksummed */ ··· 2722 2724 vfree(buf); 2723 2725 btrfs_free_path(path); 2724 2726 out_fput: 2725 - fput(src_file); 2727 + fdput(src_file); 2726 2728 out_drop_write: 2727 2729 mnt_drop_write_file(file); 2728 2730 return ret;

+7 -11

fs/btrfs/reada.c

··· 68 68 u32 blocksize; 69 69 int err; 70 70 struct list_head extctl; 71 - struct kref refcnt; 71 + int refcnt; 72 72 spinlock_t lock; 73 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 74 74 int nzones; ··· 126 126 spin_lock(&fs_info->reada_lock); 127 127 re = radix_tree_lookup(&fs_info->reada_tree, index); 128 128 if (re) 129 - kref_get(&re->refcnt); 129 + re->refcnt++; 130 130 spin_unlock(&fs_info->reada_lock); 131 131 132 132 if (!re) ··· 336 336 spin_lock(&fs_info->reada_lock); 337 337 re = radix_tree_lookup(&fs_info->reada_tree, index); 338 338 if (re) 339 - kref_get(&re->refcnt); 339 + re->refcnt++; 340 340 spin_unlock(&fs_info->reada_lock); 341 341 342 342 if (re) ··· 352 352 re->top = *top; 353 353 INIT_LIST_HEAD(&re->extctl); 354 354 spin_lock_init(&re->lock); 355 - kref_init(&re->refcnt); 355 + re->refcnt = 1; 356 356 357 357 /* 358 358 * map block ··· 398 398 if (ret == -EEXIST) { 399 399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index); 400 400 BUG_ON(!re_exist); 401 - kref_get(&re_exist->refcnt); 401 + re_exist->refcnt++; 402 402 spin_unlock(&fs_info->reada_lock); 403 403 goto error; 404 404 } ··· 465 465 return re_exist; 466 466 } 467 467 468 - static void reada_kref_dummy(struct kref *kr) 469 - { 470 - } 471 - 472 468 static void reada_extent_put(struct btrfs_fs_info *fs_info, 473 469 struct reada_extent *re) 474 470 { ··· 472 476 unsigned long index = re->logical >> PAGE_CACHE_SHIFT; 473 477 474 478 spin_lock(&fs_info->reada_lock); 475 - if (!kref_put(&re->refcnt, reada_kref_dummy)) { 479 + if (--re->refcnt) { 476 480 spin_unlock(&fs_info->reada_lock); 477 481 return; 478 482 } ··· 667 671 return 0; 668 672 } 669 673 dev->reada_next = re->logical + re->blocksize; 670 - kref_get(&re->refcnt); 674 + re->refcnt++; 671 675 672 676 spin_unlock(&fs_info->reada_lock); 673 677

+2 -2

fs/ceph/inode.c

··· 1104 1104 pr_err("fill_trace bad get_inode " 1105 1105 "%llx.%llx\n", vino.ino, vino.snap); 1106 1106 err = PTR_ERR(in); 1107 - d_delete(dn); 1107 + d_drop(dn); 1108 1108 goto done; 1109 1109 } 1110 1110 dn = splice_dentry(dn, in, &have_lease, true); ··· 1277 1277 in = ceph_get_inode(parent->d_sb, vino); 1278 1278 if (IS_ERR(in)) { 1279 1279 dout("new_inode badness\n"); 1280 - d_delete(dn); 1280 + d_drop(dn); 1281 1281 dput(dn); 1282 1282 err = PTR_ERR(in); 1283 1283 goto out;

+5

fs/ceph/super.c

··· 603 603 604 604 static void destroy_caches(void) 605 605 { 606 + /* 607 + * Make sure all delayed rcu free inodes are flushed before we 608 + * destroy cache. 609 + */ 610 + rcu_barrier(); 606 611 kmem_cache_destroy(ceph_inode_cachep); 607 612 kmem_cache_destroy(ceph_cap_cachep); 608 613 kmem_cache_destroy(ceph_dentry_cachep);

+5

fs/cifs/cifsfs.c

··· 968 968 static void 969 969 cifs_destroy_inodecache(void) 970 970 { 971 + /* 972 + * Make sure all delayed rcu free inodes are flushed before we 973 + * destroy cache. 974 + */ 975 + rcu_barrier(); 971 976 kmem_cache_destroy(cifs_inode_cachep); 972 977 } 973 978

+20 -17

fs/coda/inode.c

··· 85 85 86 86 void coda_destroy_inodecache(void) 87 87 { 88 + /* 89 + * Make sure all delayed rcu free inodes are flushed before we 90 + * destroy cache. 91 + */ 92 + rcu_barrier(); 88 93 kmem_cache_destroy(coda_inode_cachep); 89 94 } 90 95 ··· 112 107 113 108 static int get_device_index(struct coda_mount_data *data) 114 109 { 115 - struct file *file; 110 + struct fd f; 116 111 struct inode *inode; 117 112 int idx; 118 113 119 - if(data == NULL) { 114 + if (data == NULL) { 120 115 printk("coda_read_super: Bad mount data\n"); 121 116 return -1; 122 117 } 123 118 124 - if(data->version != CODA_MOUNT_VERSION) { 119 + if (data->version != CODA_MOUNT_VERSION) { 125 120 printk("coda_read_super: Bad mount version\n"); 126 121 return -1; 127 122 } 128 123 129 - file = fget(data->fd); 130 - inode = NULL; 131 - if(file) 132 - inode = file->f_path.dentry->d_inode; 133 - 134 - if(!inode || !S_ISCHR(inode->i_mode) || 135 - imajor(inode) != CODA_PSDEV_MAJOR) { 136 - if(file) 137 - fput(file); 138 - 139 - printk("coda_read_super: Bad file\n"); 140 - return -1; 124 + f = fdget(data->fd); 125 + if (!f.file) 126 + goto Ebadf; 127 + inode = f.file->f_path.dentry->d_inode; 128 + if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { 129 + fdput(f); 130 + goto Ebadf; 141 131 } 142 132 143 133 idx = iminor(inode); 144 - fput(file); 134 + fdput(f); 145 135 146 - if(idx < 0 || idx >= MAX_CODADEVS) { 136 + if (idx < 0 || idx >= MAX_CODADEVS) { 147 137 printk("coda_read_super: Bad minor number\n"); 148 138 return -1; 149 139 } 150 140 151 141 return idx; 142 + Ebadf: 143 + printk("coda_read_super: Bad file\n"); 144 + return -1; 152 145 } 153 146 154 147 static int coda_fill_super(struct super_block *sb, void *data, int silent)

+62 -50

fs/compat.c

··· 870 870 struct compat_old_linux_dirent __user *dirent, unsigned int count) 871 871 { 872 872 int error; 873 - struct file *file; 874 - int fput_needed; 873 + struct fd f = fdget(fd); 875 874 struct compat_readdir_callback buf; 876 875 877 - file = fget_light(fd, &fput_needed); 878 - if (!file) 876 + if (!f.file) 879 877 return -EBADF; 880 878 881 879 buf.result = 0; 882 880 buf.dirent = dirent; 883 881 884 - error = vfs_readdir(file, compat_fillonedir, &buf); 882 + error = vfs_readdir(f.file, compat_fillonedir, &buf); 885 883 if (buf.result) 886 884 error = buf.result; 887 885 888 - fput_light(file, fput_needed); 886 + fdput(f); 889 887 return error; 890 888 } 891 889 ··· 947 949 asmlinkage long compat_sys_getdents(unsigned int fd, 948 950 struct compat_linux_dirent __user *dirent, unsigned int count) 949 951 { 950 - struct file * file; 952 + struct fd f; 951 953 struct compat_linux_dirent __user * lastdirent; 952 954 struct compat_getdents_callback buf; 953 - int fput_needed; 954 955 int error; 955 956 956 957 if (!access_ok(VERIFY_WRITE, dirent, count)) 957 958 return -EFAULT; 958 959 959 - file = fget_light(fd, &fput_needed); 960 - if (!file) 960 + f = fdget(fd); 961 + if (!f.file) 961 962 return -EBADF; 962 963 963 964 buf.current_dir = dirent; ··· 964 967 buf.count = count; 965 968 buf.error = 0; 966 969 967 - error = vfs_readdir(file, compat_filldir, &buf); 970 + error = vfs_readdir(f.file, compat_filldir, &buf); 968 971 if (error >= 0) 969 972 error = buf.error; 970 973 lastdirent = buf.previous; 971 974 if (lastdirent) { 972 - if (put_user(file->f_pos, &lastdirent->d_off)) 975 + if (put_user(f.file->f_pos, &lastdirent->d_off)) 973 976 error = -EFAULT; 974 977 else 975 978 error = count - buf.count; 976 979 } 977 - fput_light(file, fput_needed); 980 + fdput(f); 978 981 return error; 979 982 } 980 983 ··· 1032 1035 asmlinkage long compat_sys_getdents64(unsigned int fd, 1033 1036 struct linux_dirent64 __user * dirent, unsigned int count) 1034 1037 { 1035 - struct file * file; 1038 + struct fd f; 1036 1039 struct linux_dirent64 __user * lastdirent; 1037 1040 struct compat_getdents_callback64 buf; 1038 - int fput_needed; 1039 1041 int error; 1040 1042 1041 1043 if (!access_ok(VERIFY_WRITE, dirent, count)) 1042 1044 return -EFAULT; 1043 1045 1044 - file = fget_light(fd, &fput_needed); 1045 - if (!file) 1046 + f = fdget(fd); 1047 + if (!f.file) 1046 1048 return -EBADF; 1047 1049 1048 1050 buf.current_dir = dirent; ··· 1049 1053 buf.count = count; 1050 1054 buf.error = 0; 1051 1055 1052 - error = vfs_readdir(file, compat_filldir64, &buf); 1056 + error = vfs_readdir(f.file, compat_filldir64, &buf); 1053 1057 if (error >= 0) 1054 1058 error = buf.error; 1055 1059 lastdirent = buf.previous; 1056 1060 if (lastdirent) { 1057 - typeof(lastdirent->d_off) d_off = file->f_pos; 1061 + typeof(lastdirent->d_off) d_off = f.file->f_pos; 1058 1062 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1059 1063 error = -EFAULT; 1060 1064 else 1061 1065 error = count - buf.count; 1062 1066 } 1063 - fput_light(file, fput_needed); 1067 + fdput(f); 1064 1068 return error; 1065 1069 } 1066 1070 #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ ··· 1148 1152 compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, 1149 1153 unsigned long vlen) 1150 1154 { 1151 - struct file *file; 1152 - int fput_needed; 1155 + struct fd f = fdget(fd); 1153 1156 ssize_t ret; 1154 1157 loff_t pos; 1155 1158 1156 - file = fget_light(fd, &fput_needed); 1157 - if (!file) 1159 + if (!f.file) 1158 1160 return -EBADF; 1159 - pos = file->f_pos; 1160 - ret = compat_readv(file, vec, vlen, &pos); 1161 - file->f_pos = pos; 1162 - fput_light(file, fput_needed); 1161 + pos = f.file->f_pos; 1162 + ret = compat_readv(f.file, vec, vlen, &pos); 1163 + f.file->f_pos = pos; 1164 + fdput(f); 1163 1165 return ret; 1164 1166 } 1165 1167 ··· 1165 1171 compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, 1166 1172 unsigned long vlen, loff_t pos) 1167 1173 { 1168 - struct file *file; 1169 - int fput_needed; 1174 + struct fd f; 1170 1175 ssize_t ret; 1171 1176 1172 1177 if (pos < 0) 1173 1178 return -EINVAL; 1174 - file = fget_light(fd, &fput_needed); 1175 - if (!file) 1179 + f = fdget(fd); 1180 + if (!f.file) 1176 1181 return -EBADF; 1177 1182 ret = -ESPIPE; 1178 - if (file->f_mode & FMODE_PREAD) 1179 - ret = compat_readv(file, vec, vlen, &pos); 1180 - fput_light(file, fput_needed); 1183 + if (f.file->f_mode & FMODE_PREAD) 1184 + ret = compat_readv(f.file, vec, vlen, &pos); 1185 + fdput(f); 1181 1186 return ret; 1182 1187 } 1183 1188 ··· 1214 1221 compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, 1215 1222 unsigned long vlen) 1216 1223 { 1217 - struct file *file; 1218 - int fput_needed; 1224 + struct fd f = fdget(fd); 1219 1225 ssize_t ret; 1220 1226 loff_t pos; 1221 1227 1222 - file = fget_light(fd, &fput_needed); 1223 - if (!file) 1228 + if (!f.file) 1224 1229 return -EBADF; 1225 - pos = file->f_pos; 1226 - ret = compat_writev(file, vec, vlen, &pos); 1227 - file->f_pos = pos; 1228 - fput_light(file, fput_needed); 1230 + pos = f.file->f_pos; 1231 + ret = compat_writev(f.file, vec, vlen, &pos); 1232 + f.file->f_pos = pos; 1233 + fdput(f); 1229 1234 return ret; 1230 1235 } 1231 1236 ··· 1231 1240 compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, 1232 1241 unsigned long vlen, loff_t pos) 1233 1242 { 1234 - struct file *file; 1235 - int fput_needed; 1243 + struct fd f; 1236 1244 ssize_t ret; 1237 1245 1238 1246 if (pos < 0) 1239 1247 return -EINVAL; 1240 - file = fget_light(fd, &fput_needed); 1241 - if (!file) 1248 + f = fdget(fd); 1249 + if (!f.file) 1242 1250 return -EBADF; 1243 1251 ret = -ESPIPE; 1244 - if (file->f_mode & FMODE_PWRITE) 1245 - ret = compat_writev(file, vec, vlen, &pos); 1246 - fput_light(file, fput_needed); 1252 + if (f.file->f_mode & FMODE_PWRITE) 1253 + ret = compat_writev(f.file, vec, vlen, &pos); 1254 + fdput(f); 1247 1255 return ret; 1248 1256 } 1249 1257 ··· 1792 1802 return do_handle_open(mountdirfd, handle, flags); 1793 1803 } 1794 1804 #endif 1805 + 1806 + #ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE 1807 + asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, 1808 + compat_off_t __user *offset, compat_size_t count) 1809 + { 1810 + loff_t pos; 1811 + off_t off; 1812 + ssize_t ret; 1813 + 1814 + if (offset) { 1815 + if (unlikely(get_user(off, offset))) 1816 + return -EFAULT; 1817 + pos = off; 1818 + ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1819 + if (unlikely(put_user(pos, offset))) 1820 + return -EFAULT; 1821 + return ret; 1822 + } 1823 + 1824 + return do_sendfile(out_fd, in_fd, NULL, count, 0); 1825 + } 1826 + #endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */

+12 -15

fs/compat_ioctl.c

··· 1539 1539 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1540 1540 unsigned long arg) 1541 1541 { 1542 - struct file *filp; 1542 + struct fd f = fdget(fd); 1543 1543 int error = -EBADF; 1544 - int fput_needed; 1545 - 1546 - filp = fget_light(fd, &fput_needed); 1547 - if (!filp) 1544 + if (!f.file) 1548 1545 goto out; 1549 1546 1550 1547 /* RED-PEN how should LSM module know it's handling 32bit? */ 1551 - error = security_file_ioctl(filp, cmd, arg); 1548 + error = security_file_ioctl(f.file, cmd, arg); 1552 1549 if (error) 1553 1550 goto out_fput; 1554 1551 ··· 1565 1568 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1566 1569 case FS_IOC_RESVSP_32: 1567 1570 case FS_IOC_RESVSP64_32: 1568 - error = compat_ioctl_preallocate(filp, compat_ptr(arg)); 1571 + error = compat_ioctl_preallocate(f.file, compat_ptr(arg)); 1569 1572 goto out_fput; 1570 1573 #else 1571 1574 case FS_IOC_RESVSP: 1572 1575 case FS_IOC_RESVSP64: 1573 - error = ioctl_preallocate(filp, compat_ptr(arg)); 1576 + error = ioctl_preallocate(f.file, compat_ptr(arg)); 1574 1577 goto out_fput; 1575 1578 #endif 1576 1579 1577 1580 case FIBMAP: 1578 1581 case FIGETBSZ: 1579 1582 case FIONREAD: 1580 - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 1583 + if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) 1581 1584 break; 1582 1585 /*FALL THROUGH*/ 1583 1586 1584 1587 default: 1585 - if (filp->f_op && filp->f_op->compat_ioctl) { 1586 - error = filp->f_op->compat_ioctl(filp, cmd, arg); 1588 + if (f.file->f_op && f.file->f_op->compat_ioctl) { 1589 + error = f.file->f_op->compat_ioctl(f.file, cmd, arg); 1587 1590 if (error != -ENOIOCTLCMD) 1588 1591 goto out_fput; 1589 1592 } 1590 1593 1591 - if (!filp->f_op || !filp->f_op->unlocked_ioctl) 1594 + if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) 1592 1595 goto do_ioctl; 1593 1596 break; 1594 1597 } ··· 1596 1599 if (compat_ioctl_check_table(XFORM(cmd))) 1597 1600 goto found_handler; 1598 1601 1599 - error = do_ioctl_trans(fd, cmd, arg, filp); 1602 + error = do_ioctl_trans(fd, cmd, arg, f.file); 1600 1603 if (error == -ENOIOCTLCMD) 1601 1604 error = -ENOTTY; 1602 1605 ··· 1605 1608 found_handler: 1606 1609 arg = (unsigned long)compat_ptr(arg); 1607 1610 do_ioctl: 1608 - error = do_vfs_ioctl(filp, fd, cmd, arg); 1611 + error = do_vfs_ioctl(f.file, fd, cmd, arg); 1609 1612 out_fput: 1610 - fput_light(filp, fput_needed); 1613 + fdput(f); 1611 1614 out: 1612 1615 return error; 1613 1616 }

+686

fs/coredump.c

··· 1 + #include <linux/slab.h> 2 + #include <linux/file.h> 3 + #include <linux/fdtable.h> 4 + #include <linux/mm.h> 5 + #include <linux/stat.h> 6 + #include <linux/fcntl.h> 7 + #include <linux/swap.h> 8 + #include <linux/string.h> 9 + #include <linux/init.h> 10 + #include <linux/pagemap.h> 11 + #include <linux/perf_event.h> 12 + #include <linux/highmem.h> 13 + #include <linux/spinlock.h> 14 + #include <linux/key.h> 15 + #include <linux/personality.h> 16 + #include <linux/binfmts.h> 17 + #include <linux/utsname.h> 18 + #include <linux/pid_namespace.h> 19 + #include <linux/module.h> 20 + #include <linux/namei.h> 21 + #include <linux/mount.h> 22 + #include <linux/security.h> 23 + #include <linux/syscalls.h> 24 + #include <linux/tsacct_kern.h> 25 + #include <linux/cn_proc.h> 26 + #include <linux/audit.h> 27 + #include <linux/tracehook.h> 28 + #include <linux/kmod.h> 29 + #include <linux/fsnotify.h> 30 + #include <linux/fs_struct.h> 31 + #include <linux/pipe_fs_i.h> 32 + #include <linux/oom.h> 33 + #include <linux/compat.h> 34 + 35 + #include <asm/uaccess.h> 36 + #include <asm/mmu_context.h> 37 + #include <asm/tlb.h> 38 + #include <asm/exec.h> 39 + 40 + #include <trace/events/task.h> 41 + #include "internal.h" 42 + 43 + #include <trace/events/sched.h> 44 + 45 + int core_uses_pid; 46 + char core_pattern[CORENAME_MAX_SIZE] = "core"; 47 + unsigned int core_pipe_limit; 48 + 49 + struct core_name { 50 + char *corename; 51 + int used, size; 52 + }; 53 + static atomic_t call_count = ATOMIC_INIT(1); 54 + 55 + /* The maximal length of core_pattern is also specified in sysctl.c */ 56 + 57 + static int expand_corename(struct core_name *cn) 58 + { 59 + char *old_corename = cn->corename; 60 + 61 + cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); 62 + cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); 63 + 64 + if (!cn->corename) { 65 + kfree(old_corename); 66 + return -ENOMEM; 67 + } 68 + 69 + return 0; 70 + } 71 + 72 + static int cn_printf(struct core_name *cn, const char *fmt, ...) 73 + { 74 + char *cur; 75 + int need; 76 + int ret; 77 + va_list arg; 78 + 79 + va_start(arg, fmt); 80 + need = vsnprintf(NULL, 0, fmt, arg); 81 + va_end(arg); 82 + 83 + if (likely(need < cn->size - cn->used - 1)) 84 + goto out_printf; 85 + 86 + ret = expand_corename(cn); 87 + if (ret) 88 + goto expand_fail; 89 + 90 + out_printf: 91 + cur = cn->corename + cn->used; 92 + va_start(arg, fmt); 93 + vsnprintf(cur, need + 1, fmt, arg); 94 + va_end(arg); 95 + cn->used += need; 96 + return 0; 97 + 98 + expand_fail: 99 + return ret; 100 + } 101 + 102 + static void cn_escape(char *str) 103 + { 104 + for (; *str; str++) 105 + if (*str == '/') 106 + *str = '!'; 107 + } 108 + 109 + static int cn_print_exe_file(struct core_name *cn) 110 + { 111 + struct file *exe_file; 112 + char *pathbuf, *path; 113 + int ret; 114 + 115 + exe_file = get_mm_exe_file(current->mm); 116 + if (!exe_file) { 117 + char *commstart = cn->corename + cn->used; 118 + ret = cn_printf(cn, "%s (path unknown)", current->comm); 119 + cn_escape(commstart); 120 + return ret; 121 + } 122 + 123 + pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 124 + if (!pathbuf) { 125 + ret = -ENOMEM; 126 + goto put_exe_file; 127 + } 128 + 129 + path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 130 + if (IS_ERR(path)) { 131 + ret = PTR_ERR(path); 132 + goto free_buf; 133 + } 134 + 135 + cn_escape(path); 136 + 137 + ret = cn_printf(cn, "%s", path); 138 + 139 + free_buf: 140 + kfree(pathbuf); 141 + put_exe_file: 142 + fput(exe_file); 143 + return ret; 144 + } 145 + 146 + /* format_corename will inspect the pattern parameter, and output a 147 + * name into corename, which must have space for at least 148 + * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 149 + */ 150 + static int format_corename(struct core_name *cn, long signr) 151 + { 152 + const struct cred *cred = current_cred(); 153 + const char *pat_ptr = core_pattern; 154 + int ispipe = (*pat_ptr == '|'); 155 + int pid_in_pattern = 0; 156 + int err = 0; 157 + 158 + cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); 159 + cn->corename = kmalloc(cn->size, GFP_KERNEL); 160 + cn->used = 0; 161 + 162 + if (!cn->corename) 163 + return -ENOMEM; 164 + 165 + /* Repeat as long as we have more pattern to process and more output 166 + space */ 167 + while (*pat_ptr) { 168 + if (*pat_ptr != '%') { 169 + if (*pat_ptr == 0) 170 + goto out; 171 + err = cn_printf(cn, "%c", *pat_ptr++); 172 + } else { 173 + switch (*++pat_ptr) { 174 + /* single % at the end, drop that */ 175 + case 0: 176 + goto out; 177 + /* Double percent, output one percent */ 178 + case '%': 179 + err = cn_printf(cn, "%c", '%'); 180 + break; 181 + /* pid */ 182 + case 'p': 183 + pid_in_pattern = 1; 184 + err = cn_printf(cn, "%d", 185 + task_tgid_vnr(current)); 186 + break; 187 + /* uid */ 188 + case 'u': 189 + err = cn_printf(cn, "%d", cred->uid); 190 + break; 191 + /* gid */ 192 + case 'g': 193 + err = cn_printf(cn, "%d", cred->gid); 194 + break; 195 + /* signal that caused the coredump */ 196 + case 's': 197 + err = cn_printf(cn, "%ld", signr); 198 + break; 199 + /* UNIX time of coredump */ 200 + case 't': { 201 + struct timeval tv; 202 + do_gettimeofday(&tv); 203 + err = cn_printf(cn, "%lu", tv.tv_sec); 204 + break; 205 + } 206 + /* hostname */ 207 + case 'h': { 208 + char *namestart = cn->corename + cn->used; 209 + down_read(&uts_sem); 210 + err = cn_printf(cn, "%s", 211 + utsname()->nodename); 212 + up_read(&uts_sem); 213 + cn_escape(namestart); 214 + break; 215 + } 216 + /* executable */ 217 + case 'e': { 218 + char *commstart = cn->corename + cn->used; 219 + err = cn_printf(cn, "%s", current->comm); 220 + cn_escape(commstart); 221 + break; 222 + } 223 + case 'E': 224 + err = cn_print_exe_file(cn); 225 + break; 226 + /* core limit size */ 227 + case 'c': 228 + err = cn_printf(cn, "%lu", 229 + rlimit(RLIMIT_CORE)); 230 + break; 231 + default: 232 + break; 233 + } 234 + ++pat_ptr; 235 + } 236 + 237 + if (err) 238 + return err; 239 + } 240 + 241 + /* Backward compatibility with core_uses_pid: 242 + * 243 + * If core_pattern does not include a %p (as is the default) 244 + * and core_uses_pid is set, then .%pid will be appended to 245 + * the filename. Do not do this for piped commands. */ 246 + if (!ispipe && !pid_in_pattern && core_uses_pid) { 247 + err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 248 + if (err) 249 + return err; 250 + } 251 + out: 252 + return ispipe; 253 + } 254 + 255 + static int zap_process(struct task_struct *start, int exit_code) 256 + { 257 + struct task_struct *t; 258 + int nr = 0; 259 + 260 + start->signal->flags = SIGNAL_GROUP_EXIT; 261 + start->signal->group_exit_code = exit_code; 262 + start->signal->group_stop_count = 0; 263 + 264 + t = start; 265 + do { 266 + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 267 + if (t != current && t->mm) { 268 + sigaddset(&t->pending.signal, SIGKILL); 269 + signal_wake_up(t, 1); 270 + nr++; 271 + } 272 + } while_each_thread(start, t); 273 + 274 + return nr; 275 + } 276 + 277 + static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 278 + struct core_state *core_state, int exit_code) 279 + { 280 + struct task_struct *g, *p; 281 + unsigned long flags; 282 + int nr = -EAGAIN; 283 + 284 + spin_lock_irq(&tsk->sighand->siglock); 285 + if (!signal_group_exit(tsk->signal)) { 286 + mm->core_state = core_state; 287 + nr = zap_process(tsk, exit_code); 288 + } 289 + spin_unlock_irq(&tsk->sighand->siglock); 290 + if (unlikely(nr < 0)) 291 + return nr; 292 + 293 + if (atomic_read(&mm->mm_users) == nr + 1) 294 + goto done; 295 + /* 296 + * We should find and kill all tasks which use this mm, and we should 297 + * count them correctly into ->nr_threads. We don't take tasklist 298 + * lock, but this is safe wrt: 299 + * 300 + * fork: 301 + * None of sub-threads can fork after zap_process(leader). All 302 + * processes which were created before this point should be 303 + * visible to zap_threads() because copy_process() adds the new 304 + * process to the tail of init_task.tasks list, and lock/unlock 305 + * of ->siglock provides a memory barrier. 306 + * 307 + * do_exit: 308 + * The caller holds mm->mmap_sem. This means that the task which 309 + * uses this mm can't pass exit_mm(), so it can't exit or clear 310 + * its ->mm. 311 + * 312 + * de_thread: 313 + * It does list_replace_rcu(&leader->tasks, &current->tasks), 314 + * we must see either old or new leader, this does not matter. 315 + * However, it can change p->sighand, so lock_task_sighand(p) 316 + * must be used. Since p->mm != NULL and we hold ->mmap_sem 317 + * it can't fail. 318 + * 319 + * Note also that "g" can be the old leader with ->mm == NULL 320 + * and already unhashed and thus removed from ->thread_group. 321 + * This is OK, __unhash_process()->list_del_rcu() does not 322 + * clear the ->next pointer, we will find the new leader via 323 + * next_thread(). 324 + */ 325 + rcu_read_lock(); 326 + for_each_process(g) { 327 + if (g == tsk->group_leader) 328 + continue; 329 + if (g->flags & PF_KTHREAD) 330 + continue; 331 + p = g; 332 + do { 333 + if (p->mm) { 334 + if (unlikely(p->mm == mm)) { 335 + lock_task_sighand(p, &flags); 336 + nr += zap_process(p, exit_code); 337 + unlock_task_sighand(p, &flags); 338 + } 339 + break; 340 + } 341 + } while_each_thread(g, p); 342 + } 343 + rcu_read_unlock(); 344 + done: 345 + atomic_set(&core_state->nr_threads, nr); 346 + return nr; 347 + } 348 + 349 + static int coredump_wait(int exit_code, struct core_state *core_state) 350 + { 351 + struct task_struct *tsk = current; 352 + struct mm_struct *mm = tsk->mm; 353 + int core_waiters = -EBUSY; 354 + 355 + init_completion(&core_state->startup); 356 + core_state->dumper.task = tsk; 357 + core_state->dumper.next = NULL; 358 + 359 + down_write(&mm->mmap_sem); 360 + if (!mm->core_state) 361 + core_waiters = zap_threads(tsk, mm, core_state, exit_code); 362 + up_write(&mm->mmap_sem); 363 + 364 + if (core_waiters > 0) { 365 + struct core_thread *ptr; 366 + 367 + wait_for_completion(&core_state->startup); 368 + /* 369 + * Wait for all the threads to become inactive, so that 370 + * all the thread context (extended register state, like 371 + * fpu etc) gets copied to the memory. 372 + */ 373 + ptr = core_state->dumper.next; 374 + while (ptr != NULL) { 375 + wait_task_inactive(ptr->task, 0); 376 + ptr = ptr->next; 377 + } 378 + } 379 + 380 + return core_waiters; 381 + } 382 + 383 + static void coredump_finish(struct mm_struct *mm) 384 + { 385 + struct core_thread *curr, *next; 386 + struct task_struct *task; 387 + 388 + next = mm->core_state->dumper.next; 389 + while ((curr = next) != NULL) { 390 + next = curr->next; 391 + task = curr->task; 392 + /* 393 + * see exit_mm(), curr->task must not see 394 + * ->task == NULL before we read ->next. 395 + */ 396 + smp_mb(); 397 + curr->task = NULL; 398 + wake_up_process(task); 399 + } 400 + 401 + mm->core_state = NULL; 402 + } 403 + 404 + static void wait_for_dump_helpers(struct file *file) 405 + { 406 + struct pipe_inode_info *pipe; 407 + 408 + pipe = file->f_path.dentry->d_inode->i_pipe; 409 + 410 + pipe_lock(pipe); 411 + pipe->readers++; 412 + pipe->writers--; 413 + 414 + while ((pipe->readers > 1) && (!signal_pending(current))) { 415 + wake_up_interruptible_sync(&pipe->wait); 416 + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 417 + pipe_wait(pipe); 418 + } 419 + 420 + pipe->readers--; 421 + pipe->writers++; 422 + pipe_unlock(pipe); 423 + 424 + } 425 + 426 + /* 427 + * umh_pipe_setup 428 + * helper function to customize the process used 429 + * to collect the core in userspace. Specifically 430 + * it sets up a pipe and installs it as fd 0 (stdin) 431 + * for the process. Returns 0 on success, or 432 + * PTR_ERR on failure. 433 + * Note that it also sets the core limit to 1. This 434 + * is a special value that we use to trap recursive 435 + * core dumps 436 + */ 437 + static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 438 + { 439 + struct file *files[2]; 440 + struct coredump_params *cp = (struct coredump_params *)info->data; 441 + int err = create_pipe_files(files, 0); 442 + if (err) 443 + return err; 444 + 445 + cp->file = files[1]; 446 + 447 + replace_fd(0, files[0], 0); 448 + /* and disallow core files too */ 449 + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 450 + 451 + return 0; 452 + } 453 + 454 + void do_coredump(long signr, int exit_code, struct pt_regs *regs) 455 + { 456 + struct core_state core_state; 457 + struct core_name cn; 458 + struct mm_struct *mm = current->mm; 459 + struct linux_binfmt * binfmt; 460 + const struct cred *old_cred; 461 + struct cred *cred; 462 + int retval = 0; 463 + int flag = 0; 464 + int ispipe; 465 + struct files_struct *displaced; 466 + bool need_nonrelative = false; 467 + static atomic_t core_dump_count = ATOMIC_INIT(0); 468 + struct coredump_params cprm = { 469 + .signr = signr, 470 + .regs = regs, 471 + .limit = rlimit(RLIMIT_CORE), 472 + /* 473 + * We must use the same mm->flags while dumping core to avoid 474 + * inconsistency of bit flags, since this flag is not protected 475 + * by any locks. 476 + */ 477 + .mm_flags = mm->flags, 478 + }; 479 + 480 + audit_core_dumps(signr); 481 + 482 + binfmt = mm->binfmt; 483 + if (!binfmt || !binfmt->core_dump) 484 + goto fail; 485 + if (!__get_dumpable(cprm.mm_flags)) 486 + goto fail; 487 + 488 + cred = prepare_creds(); 489 + if (!cred) 490 + goto fail; 491 + /* 492 + * We cannot trust fsuid as being the "true" uid of the process 493 + * nor do we know its entire history. We only know it was tainted 494 + * so we dump it as root in mode 2, and only into a controlled 495 + * environment (pipe handler or fully qualified path). 496 + */ 497 + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { 498 + /* Setuid core dump mode */ 499 + flag = O_EXCL; /* Stop rewrite attacks */ 500 + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 501 + need_nonrelative = true; 502 + } 503 + 504 + retval = coredump_wait(exit_code, &core_state); 505 + if (retval < 0) 506 + goto fail_creds; 507 + 508 + old_cred = override_creds(cred); 509 + 510 + /* 511 + * Clear any false indication of pending signals that might 512 + * be seen by the filesystem code called to write the core file. 513 + */ 514 + clear_thread_flag(TIF_SIGPENDING); 515 + 516 + ispipe = format_corename(&cn, signr); 517 + 518 + if (ispipe) { 519 + int dump_count; 520 + char **helper_argv; 521 + 522 + if (ispipe < 0) { 523 + printk(KERN_WARNING "format_corename failed\n"); 524 + printk(KERN_WARNING "Aborting core\n"); 525 + goto fail_corename; 526 + } 527 + 528 + if (cprm.limit == 1) { 529 + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 530 + * 531 + * Normally core limits are irrelevant to pipes, since 532 + * we're not writing to the file system, but we use 533 + * cprm.limit of 1 here as a speacial value, this is a 534 + * consistent way to catch recursive crashes. 535 + * We can still crash if the core_pattern binary sets 536 + * RLIM_CORE = !1, but it runs as root, and can do 537 + * lots of stupid things. 538 + * 539 + * Note that we use task_tgid_vnr here to grab the pid 540 + * of the process group leader. That way we get the 541 + * right pid if a thread in a multi-threaded 542 + * core_pattern process dies. 543 + */ 544 + printk(KERN_WARNING 545 + "Process %d(%s) has RLIMIT_CORE set to 1\n", 546 + task_tgid_vnr(current), current->comm); 547 + printk(KERN_WARNING "Aborting core\n"); 548 + goto fail_unlock; 549 + } 550 + cprm.limit = RLIM_INFINITY; 551 + 552 + dump_count = atomic_inc_return(&core_dump_count); 553 + if (core_pipe_limit && (core_pipe_limit < dump_count)) { 554 + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 555 + task_tgid_vnr(current), current->comm); 556 + printk(KERN_WARNING "Skipping core dump\n"); 557 + goto fail_dropcount; 558 + } 559 + 560 + helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); 561 + if (!helper_argv) { 562 + printk(KERN_WARNING "%s failed to allocate memory\n", 563 + __func__); 564 + goto fail_dropcount; 565 + } 566 + 567 + retval = call_usermodehelper_fns(helper_argv[0], helper_argv, 568 + NULL, UMH_WAIT_EXEC, umh_pipe_setup, 569 + NULL, &cprm); 570 + argv_free(helper_argv); 571 + if (retval) { 572 + printk(KERN_INFO "Core dump to %s pipe failed\n", 573 + cn.corename); 574 + goto close_fail; 575 + } 576 + } else { 577 + struct inode *inode; 578 + 579 + if (cprm.limit < binfmt->min_coredump) 580 + goto fail_unlock; 581 + 582 + if (need_nonrelative && cn.corename[0] != '/') { 583 + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 584 + "to fully qualified path!\n", 585 + task_tgid_vnr(current), current->comm); 586 + printk(KERN_WARNING "Skipping core dump\n"); 587 + goto fail_unlock; 588 + } 589 + 590 + cprm.file = filp_open(cn.corename, 591 + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 592 + 0600); 593 + if (IS_ERR(cprm.file)) 594 + goto fail_unlock; 595 + 596 + inode = cprm.file->f_path.dentry->d_inode; 597 + if (inode->i_nlink > 1) 598 + goto close_fail; 599 + if (d_unhashed(cprm.file->f_path.dentry)) 600 + goto close_fail; 601 + /* 602 + * AK: actually i see no reason to not allow this for named 603 + * pipes etc, but keep the previous behaviour for now. 604 + */ 605 + if (!S_ISREG(inode->i_mode)) 606 + goto close_fail; 607 + /* 608 + * Dont allow local users get cute and trick others to coredump 609 + * into their pre-created files. 610 + */ 611 + if (!uid_eq(inode->i_uid, current_fsuid())) 612 + goto close_fail; 613 + if (!cprm.file->f_op || !cprm.file->f_op->write) 614 + goto close_fail; 615 + if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 616 + goto close_fail; 617 + } 618 + 619 + /* get us an unshared descriptor table; almost always a no-op */ 620 + retval = unshare_files(&displaced); 621 + if (retval) 622 + goto close_fail; 623 + if (displaced) 624 + put_files_struct(displaced); 625 + retval = binfmt->core_dump(&cprm); 626 + if (retval) 627 + current->signal->group_exit_code |= 0x80; 628 + 629 + if (ispipe && core_pipe_limit) 630 + wait_for_dump_helpers(cprm.file); 631 + close_fail: 632 + if (cprm.file) 633 + filp_close(cprm.file, NULL); 634 + fail_dropcount: 635 + if (ispipe) 636 + atomic_dec(&core_dump_count); 637 + fail_unlock: 638 + kfree(cn.corename); 639 + fail_corename: 640 + coredump_finish(mm); 641 + revert_creds(old_cred); 642 + fail_creds: 643 + put_cred(cred); 644 + fail: 645 + return; 646 + } 647 + 648 + /* 649 + * Core dumping helper functions. These are the only things you should 650 + * do on a core-file: use only these functions to write out all the 651 + * necessary info. 652 + */ 653 + int dump_write(struct file *file, const void *addr, int nr) 654 + { 655 + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; 656 + } 657 + EXPORT_SYMBOL(dump_write); 658 + 659 + int dump_seek(struct file *file, loff_t off) 660 + { 661 + int ret = 1; 662 + 663 + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 664 + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) 665 + return 0; 666 + } else { 667 + char *buf = (char *)get_zeroed_page(GFP_KERNEL); 668 + 669 + if (!buf) 670 + return 0; 671 + while (off > 0) { 672 + unsigned long n = off; 673 + 674 + if (n > PAGE_SIZE) 675 + n = PAGE_SIZE; 676 + if (!dump_write(file, buf, n)) { 677 + ret = 0; 678 + break; 679 + } 680 + off -= n; 681 + } 682 + free_page((unsigned long)buf); 683 + } 684 + return ret; 685 + } 686 + EXPORT_SYMBOL(dump_seek);

+1 -1

fs/dcache.c

··· 2113 2113 inode = dentry->d_inode; 2114 2114 isdir = S_ISDIR(inode->i_mode); 2115 2115 if (dentry->d_count == 1) { 2116 - if (inode && !spin_trylock(&inode->i_lock)) { 2116 + if (!spin_trylock(&inode->i_lock)) { 2117 2117 spin_unlock(&dentry->d_lock); 2118 2118 cpu_relax(); 2119 2119 goto again;

+6

fs/ecryptfs/main.c

··· 711 711 { 712 712 int i; 713 713 714 + /* 715 + * Make sure all delayed rcu free inodes are flushed before we 716 + * destroy cache. 717 + */ 718 + rcu_barrier(); 719 + 714 720 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { 715 721 struct ecryptfs_cache_info *info; 716 722

+5

fs/efs/super.c

··· 96 96 97 97 static void destroy_inodecache(void) 98 98 { 99 + /* 100 + * Make sure all delayed rcu free inodes are flushed before we 101 + * destroy cache. 102 + */ 103 + rcu_barrier(); 99 104 kmem_cache_destroy(efs_inode_cachep); 100 105 } 101 106

+9 -14

fs/eventpoll.c

··· 1810 1810 int, maxevents, int, timeout) 1811 1811 { 1812 1812 int error; 1813 - struct file *file; 1813 + struct fd f; 1814 1814 struct eventpoll *ep; 1815 1815 1816 1816 /* The maximum number of event must be greater than zero */ ··· 1818 1818 return -EINVAL; 1819 1819 1820 1820 /* Verify that the area passed by the user is writeable */ 1821 - if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { 1822 - error = -EFAULT; 1823 - goto error_return; 1824 - } 1821 + if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) 1822 + return -EFAULT; 1825 1823 1826 1824 /* Get the "struct file *" for the eventpoll file */ 1827 - error = -EBADF; 1828 - file = fget(epfd); 1829 - if (!file) 1830 - goto error_return; 1825 + f = fdget(epfd); 1826 + if (!f.file) 1827 + return -EBADF; 1831 1828 1832 1829 /* 1833 1830 * We have to check that the file structure underneath the fd 1834 1831 * the user passed to us _is_ an eventpoll file. 1835 1832 */ 1836 1833 error = -EINVAL; 1837 - if (!is_file_epoll(file)) 1834 + if (!is_file_epoll(f.file)) 1838 1835 goto error_fput; 1839 1836 1840 1837 /* 1841 1838 * At this point it is safe to assume that the "private_data" contains 1842 1839 * our own data structure. 1843 1840 */ 1844 - ep = file->private_data; 1841 + ep = f.file->private_data; 1845 1842 1846 1843 /* Time to fish for events ... */ 1847 1844 error = ep_poll(ep, events, maxevents, timeout); 1848 1845 1849 1846 error_fput: 1850 - fput(file); 1851 - error_return: 1852 - 1847 + fdput(f); 1853 1848 return error; 1854 1849 } 1855 1850

+7 -681

fs/exec.c

··· 66 66 67 67 #include <trace/events/sched.h> 68 68 69 - int core_uses_pid; 70 - char core_pattern[CORENAME_MAX_SIZE] = "core"; 71 - unsigned int core_pipe_limit; 72 69 int suid_dumpable = 0; 73 - 74 - struct core_name { 75 - char *corename; 76 - int used, size; 77 - }; 78 - static atomic_t call_count = ATOMIC_INIT(1); 79 - 80 - /* The maximal length of core_pattern is also specified in sysctl.c */ 81 70 82 71 static LIST_HEAD(formats); 83 72 static DEFINE_RWLOCK(binfmt_lock); ··· 995 1006 return 0; 996 1007 } 997 1008 998 - /* 999 - * These functions flushes out all traces of the currently running executable 1000 - * so that a new one can be started 1001 - */ 1002 - static void flush_old_files(struct files_struct * files) 1003 - { 1004 - long j = -1; 1005 - struct fdtable *fdt; 1006 - 1007 - spin_lock(&files->file_lock); 1008 - for (;;) { 1009 - unsigned long set, i; 1010 - 1011 - j++; 1012 - i = j * BITS_PER_LONG; 1013 - fdt = files_fdtable(files); 1014 - if (i >= fdt->max_fds) 1015 - break; 1016 - set = fdt->close_on_exec[j]; 1017 - if (!set) 1018 - continue; 1019 - fdt->close_on_exec[j] = 0; 1020 - spin_unlock(&files->file_lock); 1021 - for ( ; set ; i++,set >>= 1) { 1022 - if (set & 1) { 1023 - sys_close(i); 1024 - } 1025 - } 1026 - spin_lock(&files->file_lock); 1027 - 1028 - } 1029 - spin_unlock(&files->file_lock); 1030 - } 1031 - 1032 1009 char *get_task_comm(char *buf, struct task_struct *tsk) 1033 1010 { 1034 1011 /* buf must be at least sizeof(tsk->comm) in size */ ··· 1004 1049 return buf; 1005 1050 } 1006 1051 EXPORT_SYMBOL_GPL(get_task_comm); 1052 + 1053 + /* 1054 + * These functions flushes out all traces of the currently running executable 1055 + * so that a new one can be started 1056 + */ 1007 1057 1008 1058 void set_task_comm(struct task_struct *tsk, char *buf) 1009 1059 { ··· 1131 1171 current->self_exec_id++; 1132 1172 1133 1173 flush_signal_handlers(current, 0); 1134 - flush_old_files(current->files); 1174 + do_close_on_exec(current->files); 1135 1175 } 1136 1176 EXPORT_SYMBOL(setup_new_exec); 1137 1177 ··· 1592 1632 1593 1633 EXPORT_SYMBOL(set_binfmt); 1594 1634 1595 - static int expand_corename(struct core_name *cn) 1596 - { 1597 - char *old_corename = cn->corename; 1598 - 1599 - cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); 1600 - cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); 1601 - 1602 - if (!cn->corename) { 1603 - kfree(old_corename); 1604 - return -ENOMEM; 1605 - } 1606 - 1607 - return 0; 1608 - } 1609 - 1610 - static int cn_printf(struct core_name *cn, const char *fmt, ...) 1611 - { 1612 - char *cur; 1613 - int need; 1614 - int ret; 1615 - va_list arg; 1616 - 1617 - va_start(arg, fmt); 1618 - need = vsnprintf(NULL, 0, fmt, arg); 1619 - va_end(arg); 1620 - 1621 - if (likely(need < cn->size - cn->used - 1)) 1622 - goto out_printf; 1623 - 1624 - ret = expand_corename(cn); 1625 - if (ret) 1626 - goto expand_fail; 1627 - 1628 - out_printf: 1629 - cur = cn->corename + cn->used; 1630 - va_start(arg, fmt); 1631 - vsnprintf(cur, need + 1, fmt, arg); 1632 - va_end(arg); 1633 - cn->used += need; 1634 - return 0; 1635 - 1636 - expand_fail: 1637 - return ret; 1638 - } 1639 - 1640 - static void cn_escape(char *str) 1641 - { 1642 - for (; *str; str++) 1643 - if (*str == '/') 1644 - *str = '!'; 1645 - } 1646 - 1647 - static int cn_print_exe_file(struct core_name *cn) 1648 - { 1649 - struct file *exe_file; 1650 - char *pathbuf, *path; 1651 - int ret; 1652 - 1653 - exe_file = get_mm_exe_file(current->mm); 1654 - if (!exe_file) { 1655 - char *commstart = cn->corename + cn->used; 1656 - ret = cn_printf(cn, "%s (path unknown)", current->comm); 1657 - cn_escape(commstart); 1658 - return ret; 1659 - } 1660 - 1661 - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 1662 - if (!pathbuf) { 1663 - ret = -ENOMEM; 1664 - goto put_exe_file; 1665 - } 1666 - 1667 - path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 1668 - if (IS_ERR(path)) { 1669 - ret = PTR_ERR(path); 1670 - goto free_buf; 1671 - } 1672 - 1673 - cn_escape(path); 1674 - 1675 - ret = cn_printf(cn, "%s", path); 1676 - 1677 - free_buf: 1678 - kfree(pathbuf); 1679 - put_exe_file: 1680 - fput(exe_file); 1681 - return ret; 1682 - } 1683 - 1684 - /* format_corename will inspect the pattern parameter, and output a 1685 - * name into corename, which must have space for at least 1686 - * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1687 - */ 1688 - static int format_corename(struct core_name *cn, long signr) 1689 - { 1690 - const struct cred *cred = current_cred(); 1691 - const char *pat_ptr = core_pattern; 1692 - int ispipe = (*pat_ptr == '|'); 1693 - int pid_in_pattern = 0; 1694 - int err = 0; 1695 - 1696 - cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); 1697 - cn->corename = kmalloc(cn->size, GFP_KERNEL); 1698 - cn->used = 0; 1699 - 1700 - if (!cn->corename) 1701 - return -ENOMEM; 1702 - 1703 - /* Repeat as long as we have more pattern to process and more output 1704 - space */ 1705 - while (*pat_ptr) { 1706 - if (*pat_ptr != '%') { 1707 - if (*pat_ptr == 0) 1708 - goto out; 1709 - err = cn_printf(cn, "%c", *pat_ptr++); 1710 - } else { 1711 - switch (*++pat_ptr) { 1712 - /* single % at the end, drop that */ 1713 - case 0: 1714 - goto out; 1715 - /* Double percent, output one percent */ 1716 - case '%': 1717 - err = cn_printf(cn, "%c", '%'); 1718 - break; 1719 - /* pid */ 1720 - case 'p': 1721 - pid_in_pattern = 1; 1722 - err = cn_printf(cn, "%d", 1723 - task_tgid_vnr(current)); 1724 - break; 1725 - /* uid */ 1726 - case 'u': 1727 - err = cn_printf(cn, "%d", cred->uid); 1728 - break; 1729 - /* gid */ 1730 - case 'g': 1731 - err = cn_printf(cn, "%d", cred->gid); 1732 - break; 1733 - /* signal that caused the coredump */ 1734 - case 's': 1735 - err = cn_printf(cn, "%ld", signr); 1736 - break; 1737 - /* UNIX time of coredump */ 1738 - case 't': { 1739 - struct timeval tv; 1740 - do_gettimeofday(&tv); 1741 - err = cn_printf(cn, "%lu", tv.tv_sec); 1742 - break; 1743 - } 1744 - /* hostname */ 1745 - case 'h': { 1746 - char *namestart = cn->corename + cn->used; 1747 - down_read(&uts_sem); 1748 - err = cn_printf(cn, "%s", 1749 - utsname()->nodename); 1750 - up_read(&uts_sem); 1751 - cn_escape(namestart); 1752 - break; 1753 - } 1754 - /* executable */ 1755 - case 'e': { 1756 - char *commstart = cn->corename + cn->used; 1757 - err = cn_printf(cn, "%s", current->comm); 1758 - cn_escape(commstart); 1759 - break; 1760 - } 1761 - case 'E': 1762 - err = cn_print_exe_file(cn); 1763 - break; 1764 - /* core limit size */ 1765 - case 'c': 1766 - err = cn_printf(cn, "%lu", 1767 - rlimit(RLIMIT_CORE)); 1768 - break; 1769 - default: 1770 - break; 1771 - } 1772 - ++pat_ptr; 1773 - } 1774 - 1775 - if (err) 1776 - return err; 1777 - } 1778 - 1779 - /* Backward compatibility with core_uses_pid: 1780 - * 1781 - * If core_pattern does not include a %p (as is the default) 1782 - * and core_uses_pid is set, then .%pid will be appended to 1783 - * the filename. Do not do this for piped commands. */ 1784 - if (!ispipe && !pid_in_pattern && core_uses_pid) { 1785 - err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 1786 - if (err) 1787 - return err; 1788 - } 1789 - out: 1790 - return ispipe; 1791 - } 1792 - 1793 - static int zap_process(struct task_struct *start, int exit_code) 1794 - { 1795 - struct task_struct *t; 1796 - int nr = 0; 1797 - 1798 - start->signal->flags = SIGNAL_GROUP_EXIT; 1799 - start->signal->group_exit_code = exit_code; 1800 - start->signal->group_stop_count = 0; 1801 - 1802 - t = start; 1803 - do { 1804 - task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 1805 - if (t != current && t->mm) { 1806 - sigaddset(&t->pending.signal, SIGKILL); 1807 - signal_wake_up(t, 1); 1808 - nr++; 1809 - } 1810 - } while_each_thread(start, t); 1811 - 1812 - return nr; 1813 - } 1814 - 1815 - static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 1816 - struct core_state *core_state, int exit_code) 1817 - { 1818 - struct task_struct *g, *p; 1819 - unsigned long flags; 1820 - int nr = -EAGAIN; 1821 - 1822 - spin_lock_irq(&tsk->sighand->siglock); 1823 - if (!signal_group_exit(tsk->signal)) { 1824 - mm->core_state = core_state; 1825 - nr = zap_process(tsk, exit_code); 1826 - } 1827 - spin_unlock_irq(&tsk->sighand->siglock); 1828 - if (unlikely(nr < 0)) 1829 - return nr; 1830 - 1831 - if (atomic_read(&mm->mm_users) == nr + 1) 1832 - goto done; 1833 - /* 1834 - * We should find and kill all tasks which use this mm, and we should 1835 - * count them correctly into ->nr_threads. We don't take tasklist 1836 - * lock, but this is safe wrt: 1837 - * 1838 - * fork: 1839 - * None of sub-threads can fork after zap_process(leader). All 1840 - * processes which were created before this point should be 1841 - * visible to zap_threads() because copy_process() adds the new 1842 - * process to the tail of init_task.tasks list, and lock/unlock 1843 - * of ->siglock provides a memory barrier. 1844 - * 1845 - * do_exit: 1846 - * The caller holds mm->mmap_sem. This means that the task which 1847 - * uses this mm can't pass exit_mm(), so it can't exit or clear 1848 - * its ->mm. 1849 - * 1850 - * de_thread: 1851 - * It does list_replace_rcu(&leader->tasks, &current->tasks), 1852 - * we must see either old or new leader, this does not matter. 1853 - * However, it can change p->sighand, so lock_task_sighand(p) 1854 - * must be used. Since p->mm != NULL and we hold ->mmap_sem 1855 - * it can't fail. 1856 - * 1857 - * Note also that "g" can be the old leader with ->mm == NULL 1858 - * and already unhashed and thus removed from ->thread_group. 1859 - * This is OK, __unhash_process()->list_del_rcu() does not 1860 - * clear the ->next pointer, we will find the new leader via 1861 - * next_thread(). 1862 - */ 1863 - rcu_read_lock(); 1864 - for_each_process(g) { 1865 - if (g == tsk->group_leader) 1866 - continue; 1867 - if (g->flags & PF_KTHREAD) 1868 - continue; 1869 - p = g; 1870 - do { 1871 - if (p->mm) { 1872 - if (unlikely(p->mm == mm)) { 1873 - lock_task_sighand(p, &flags); 1874 - nr += zap_process(p, exit_code); 1875 - unlock_task_sighand(p, &flags); 1876 - } 1877 - break; 1878 - } 1879 - } while_each_thread(g, p); 1880 - } 1881 - rcu_read_unlock(); 1882 - done: 1883 - atomic_set(&core_state->nr_threads, nr); 1884 - return nr; 1885 - } 1886 - 1887 - static int coredump_wait(int exit_code, struct core_state *core_state) 1888 - { 1889 - struct task_struct *tsk = current; 1890 - struct mm_struct *mm = tsk->mm; 1891 - int core_waiters = -EBUSY; 1892 - 1893 - init_completion(&core_state->startup); 1894 - core_state->dumper.task = tsk; 1895 - core_state->dumper.next = NULL; 1896 - 1897 - down_write(&mm->mmap_sem); 1898 - if (!mm->core_state) 1899 - core_waiters = zap_threads(tsk, mm, core_state, exit_code); 1900 - up_write(&mm->mmap_sem); 1901 - 1902 - if (core_waiters > 0) { 1903 - struct core_thread *ptr; 1904 - 1905 - wait_for_completion(&core_state->startup); 1906 - /* 1907 - * Wait for all the threads to become inactive, so that 1908 - * all the thread context (extended register state, like 1909 - * fpu etc) gets copied to the memory. 1910 - */ 1911 - ptr = core_state->dumper.next; 1912 - while (ptr != NULL) { 1913 - wait_task_inactive(ptr->task, 0); 1914 - ptr = ptr->next; 1915 - } 1916 - } 1917 - 1918 - return core_waiters; 1919 - } 1920 - 1921 - static void coredump_finish(struct mm_struct *mm) 1922 - { 1923 - struct core_thread *curr, *next; 1924 - struct task_struct *task; 1925 - 1926 - next = mm->core_state->dumper.next; 1927 - while ((curr = next) != NULL) { 1928 - next = curr->next; 1929 - task = curr->task; 1930 - /* 1931 - * see exit_mm(), curr->task must not see 1932 - * ->task == NULL before we read ->next. 1933 - */ 1934 - smp_mb(); 1935 - curr->task = NULL; 1936 - wake_up_process(task); 1937 - } 1938 - 1939 - mm->core_state = NULL; 1940 - } 1941 - 1942 1635 /* 1943 1636 * set_dumpable converts traditional three-value dumpable to two flags and 1944 1637 * stores them into mm->flags. It modifies lower two bits of mm->flags, but ··· 1633 2020 } 1634 2021 } 1635 2022 1636 - static int __get_dumpable(unsigned long mm_flags) 2023 + int __get_dumpable(unsigned long mm_flags) 1637 2024 { 1638 2025 int ret; 1639 2026 ··· 1645 2032 { 1646 2033 return __get_dumpable(mm->flags); 1647 2034 } 1648 - 1649 - static void wait_for_dump_helpers(struct file *file) 1650 - { 1651 - struct pipe_inode_info *pipe; 1652 - 1653 - pipe = file->f_path.dentry->d_inode->i_pipe; 1654 - 1655 - pipe_lock(pipe); 1656 - pipe->readers++; 1657 - pipe->writers--; 1658 - 1659 - while ((pipe->readers > 1) && (!signal_pending(current))) { 1660 - wake_up_interruptible_sync(&pipe->wait); 1661 - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 1662 - pipe_wait(pipe); 1663 - } 1664 - 1665 - pipe->readers--; 1666 - pipe->writers++; 1667 - pipe_unlock(pipe); 1668 - 1669 - } 1670 - 1671 - 1672 - /* 1673 - * umh_pipe_setup 1674 - * helper function to customize the process used 1675 - * to collect the core in userspace. Specifically 1676 - * it sets up a pipe and installs it as fd 0 (stdin) 1677 - * for the process. Returns 0 on success, or 1678 - * PTR_ERR on failure. 1679 - * Note that it also sets the core limit to 1. This 1680 - * is a special value that we use to trap recursive 1681 - * core dumps 1682 - */ 1683 - static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 1684 - { 1685 - struct file *files[2]; 1686 - struct fdtable *fdt; 1687 - struct coredump_params *cp = (struct coredump_params *)info->data; 1688 - struct files_struct *cf = current->files; 1689 - int err = create_pipe_files(files, 0); 1690 - if (err) 1691 - return err; 1692 - 1693 - cp->file = files[1]; 1694 - 1695 - sys_close(0); 1696 - fd_install(0, files[0]); 1697 - spin_lock(&cf->file_lock); 1698 - fdt = files_fdtable(cf); 1699 - __set_open_fd(0, fdt); 1700 - __clear_close_on_exec(0, fdt); 1701 - spin_unlock(&cf->file_lock); 1702 - 1703 - /* and disallow core files too */ 1704 - current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 1705 - 1706 - return 0; 1707 - } 1708 - 1709 - void do_coredump(long signr, int exit_code, struct pt_regs *regs) 1710 - { 1711 - struct core_state core_state; 1712 - struct core_name cn; 1713 - struct mm_struct *mm = current->mm; 1714 - struct linux_binfmt * binfmt; 1715 - const struct cred *old_cred; 1716 - struct cred *cred; 1717 - int retval = 0; 1718 - int flag = 0; 1719 - int ispipe; 1720 - bool need_nonrelative = false; 1721 - static atomic_t core_dump_count = ATOMIC_INIT(0); 1722 - struct coredump_params cprm = { 1723 - .signr = signr, 1724 - .regs = regs, 1725 - .limit = rlimit(RLIMIT_CORE), 1726 - /* 1727 - * We must use the same mm->flags while dumping core to avoid 1728 - * inconsistency of bit flags, since this flag is not protected 1729 - * by any locks. 1730 - */ 1731 - .mm_flags = mm->flags, 1732 - }; 1733 - 1734 - audit_core_dumps(signr); 1735 - 1736 - binfmt = mm->binfmt; 1737 - if (!binfmt || !binfmt->core_dump) 1738 - goto fail; 1739 - if (!__get_dumpable(cprm.mm_flags)) 1740 - goto fail; 1741 - 1742 - cred = prepare_creds(); 1743 - if (!cred) 1744 - goto fail; 1745 - /* 1746 - * We cannot trust fsuid as being the "true" uid of the process 1747 - * nor do we know its entire history. We only know it was tainted 1748 - * so we dump it as root in mode 2, and only into a controlled 1749 - * environment (pipe handler or fully qualified path). 1750 - */ 1751 - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { 1752 - /* Setuid core dump mode */ 1753 - flag = O_EXCL; /* Stop rewrite attacks */ 1754 - cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 1755 - need_nonrelative = true; 1756 - } 1757 - 1758 - retval = coredump_wait(exit_code, &core_state); 1759 - if (retval < 0) 1760 - goto fail_creds; 1761 - 1762 - old_cred = override_creds(cred); 1763 - 1764 - /* 1765 - * Clear any false indication of pending signals that might 1766 - * be seen by the filesystem code called to write the core file. 1767 - */ 1768 - clear_thread_flag(TIF_SIGPENDING); 1769 - 1770 - ispipe = format_corename(&cn, signr); 1771 - 1772 - if (ispipe) { 1773 - int dump_count; 1774 - char **helper_argv; 1775 - 1776 - if (ispipe < 0) { 1777 - printk(KERN_WARNING "format_corename failed\n"); 1778 - printk(KERN_WARNING "Aborting core\n"); 1779 - goto fail_corename; 1780 - } 1781 - 1782 - if (cprm.limit == 1) { 1783 - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 1784 - * 1785 - * Normally core limits are irrelevant to pipes, since 1786 - * we're not writing to the file system, but we use 1787 - * cprm.limit of 1 here as a speacial value, this is a 1788 - * consistent way to catch recursive crashes. 1789 - * We can still crash if the core_pattern binary sets 1790 - * RLIM_CORE = !1, but it runs as root, and can do 1791 - * lots of stupid things. 1792 - * 1793 - * Note that we use task_tgid_vnr here to grab the pid 1794 - * of the process group leader. That way we get the 1795 - * right pid if a thread in a multi-threaded 1796 - * core_pattern process dies. 1797 - */ 1798 - printk(KERN_WARNING 1799 - "Process %d(%s) has RLIMIT_CORE set to 1\n", 1800 - task_tgid_vnr(current), current->comm); 1801 - printk(KERN_WARNING "Aborting core\n"); 1802 - goto fail_unlock; 1803 - } 1804 - cprm.limit = RLIM_INFINITY; 1805 - 1806 - dump_count = atomic_inc_return(&core_dump_count); 1807 - if (core_pipe_limit && (core_pipe_limit < dump_count)) { 1808 - printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 1809 - task_tgid_vnr(current), current->comm); 1810 - printk(KERN_WARNING "Skipping core dump\n"); 1811 - goto fail_dropcount; 1812 - } 1813 - 1814 - helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); 1815 - if (!helper_argv) { 1816 - printk(KERN_WARNING "%s failed to allocate memory\n", 1817 - __func__); 1818 - goto fail_dropcount; 1819 - } 1820 - 1821 - retval = call_usermodehelper_fns(helper_argv[0], helper_argv, 1822 - NULL, UMH_WAIT_EXEC, umh_pipe_setup, 1823 - NULL, &cprm); 1824 - argv_free(helper_argv); 1825 - if (retval) { 1826 - printk(KERN_INFO "Core dump to %s pipe failed\n", 1827 - cn.corename); 1828 - goto close_fail; 1829 - } 1830 - } else { 1831 - struct inode *inode; 1832 - 1833 - if (cprm.limit < binfmt->min_coredump) 1834 - goto fail_unlock; 1835 - 1836 - if (need_nonrelative && cn.corename[0] != '/') { 1837 - printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 1838 - "to fully qualified path!\n", 1839 - task_tgid_vnr(current), current->comm); 1840 - printk(KERN_WARNING "Skipping core dump\n"); 1841 - goto fail_unlock; 1842 - } 1843 - 1844 - cprm.file = filp_open(cn.corename, 1845 - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 1846 - 0600); 1847 - if (IS_ERR(cprm.file)) 1848 - goto fail_unlock; 1849 - 1850 - inode = cprm.file->f_path.dentry->d_inode; 1851 - if (inode->i_nlink > 1) 1852 - goto close_fail; 1853 - if (d_unhashed(cprm.file->f_path.dentry)) 1854 - goto close_fail; 1855 - /* 1856 - * AK: actually i see no reason to not allow this for named 1857 - * pipes etc, but keep the previous behaviour for now. 1858 - */ 1859 - if (!S_ISREG(inode->i_mode)) 1860 - goto close_fail; 1861 - /* 1862 - * Dont allow local users get cute and trick others to coredump 1863 - * into their pre-created files. 1864 - */ 1865 - if (!uid_eq(inode->i_uid, current_fsuid())) 1866 - goto close_fail; 1867 - if (!cprm.file->f_op || !cprm.file->f_op->write) 1868 - goto close_fail; 1869 - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 1870 - goto close_fail; 1871 - } 1872 - 1873 - retval = binfmt->core_dump(&cprm); 1874 - if (retval) 1875 - current->signal->group_exit_code |= 0x80; 1876 - 1877 - if (ispipe && core_pipe_limit) 1878 - wait_for_dump_helpers(cprm.file); 1879 - close_fail: 1880 - if (cprm.file) 1881 - filp_close(cprm.file, NULL); 1882 - fail_dropcount: 1883 - if (ispipe) 1884 - atomic_dec(&core_dump_count); 1885 - fail_unlock: 1886 - kfree(cn.corename); 1887 - fail_corename: 1888 - coredump_finish(mm); 1889 - revert_creds(old_cred); 1890 - fail_creds: 1891 - put_cred(cred); 1892 - fail: 1893 - return; 1894 - } 1895 - 1896 - /* 1897 - * Core dumping helper functions. These are the only things you should 1898 - * do on a core-file: use only these functions to write out all the 1899 - * necessary info. 1900 - */ 1901 - int dump_write(struct file *file, const void *addr, int nr) 1902 - { 1903 - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; 1904 - } 1905 - EXPORT_SYMBOL(dump_write); 1906 - 1907 - int dump_seek(struct file *file, loff_t off) 1908 - { 1909 - int ret = 1; 1910 - 1911 - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 1912 - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) 1913 - return 0; 1914 - } else { 1915 - char *buf = (char *)get_zeroed_page(GFP_KERNEL); 1916 - 1917 - if (!buf) 1918 - return 0; 1919 - while (off > 0) { 1920 - unsigned long n = off; 1921 - 1922 - if (n > PAGE_SIZE) 1923 - n = PAGE_SIZE; 1924 - if (!dump_write(file, buf, n)) { 1925 - ret = 0; 1926 - break; 1927 - } 1928 - off -= n; 1929 - } 1930 - free_page((unsigned long)buf); 1931 - } 1932 - return ret; 1933 - } 1934 - EXPORT_SYMBOL(dump_seek);

+5

fs/exofs/super.c

··· 206 206 */ 207 207 static void destroy_inodecache(void) 208 208 { 209 + /* 210 + * Make sure all delayed rcu free inodes are flushed before we 211 + * destroy cache. 212 + */ 213 + rcu_barrier(); 209 214 kmem_cache_destroy(exofs_inode_cachep); 210 215 } 211 216

+5

fs/ext2/super.c

··· 206 206 207 207 static void destroy_inodecache(void) 208 208 { 209 + /* 210 + * Make sure all delayed rcu free inodes are flushed before we 211 + * destroy cache. 212 + */ 213 + rcu_barrier(); 209 214 kmem_cache_destroy(ext2_inode_cachep); 210 215 } 211 216

+5

fs/ext3/super.c

··· 532 532 533 533 static void destroy_inodecache(void) 534 534 { 535 + /* 536 + * Make sure all delayed rcu free inodes are flushed before we 537 + * destroy cache. 538 + */ 539 + rcu_barrier(); 535 540 kmem_cache_destroy(ext3_inode_cachep); 536 541 } 537 542

+8 -7

fs/ext4/ioctl.c

··· 233 233 234 234 case EXT4_IOC_MOVE_EXT: { 235 235 struct move_extent me; 236 - struct file *donor_filp; 236 + struct fd donor; 237 237 int err; 238 238 239 239 if (!(filp->f_mode & FMODE_READ) || ··· 245 245 return -EFAULT; 246 246 me.moved_len = 0; 247 247 248 - donor_filp = fget(me.donor_fd); 249 - if (!donor_filp) 248 + donor = fdget(me.donor_fd); 249 + if (!donor.file) 250 250 return -EBADF; 251 251 252 - if (!(donor_filp->f_mode & FMODE_WRITE)) { 252 + if (!(donor.file->f_mode & FMODE_WRITE)) { 253 253 err = -EBADF; 254 254 goto mext_out; 255 255 } ··· 258 258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 259 259 ext4_msg(sb, KERN_ERR, 260 260 "Online defrag not supported with bigalloc"); 261 - return -EOPNOTSUPP; 261 + err = -EOPNOTSUPP; 262 + goto mext_out; 262 263 } 263 264 264 265 err = mnt_want_write_file(filp); 265 266 if (err) 266 267 goto mext_out; 267 268 268 - err = ext4_move_extents(filp, donor_filp, me.orig_start, 269 + err = ext4_move_extents(filp, donor.file, me.orig_start, 269 270 me.donor_start, me.len, &me.moved_len); 270 271 mnt_drop_write_file(filp); 271 272 ··· 274 273 &me, sizeof(me))) 275 274 err = -EFAULT; 276 275 mext_out: 277 - fput(donor_filp); 276 + fdput(donor); 278 277 return err; 279 278 } 280 279

+5

fs/ext4/super.c

··· 1019 1019 1020 1020 static void destroy_inodecache(void) 1021 1021 { 1022 + /* 1023 + * Make sure all delayed rcu free inodes are flushed before we 1024 + * destroy cache. 1025 + */ 1026 + rcu_barrier(); 1022 1027 kmem_cache_destroy(ext4_inode_cachep); 1023 1028 } 1024 1029

+5

fs/fat/inode.c

··· 521 521 522 522 static void __exit fat_destroy_inodecache(void) 523 523 { 524 + /* 525 + * Make sure all delayed rcu free inodes are flushed before we 526 + * destroy cache. 527 + */ 528 + rcu_barrier(); 524 529 kmem_cache_destroy(fat_inode_cachep); 525 530 } 526 531

+20 -146

fs/fcntl.c

··· 26 26 #include <asm/siginfo.h> 27 27 #include <asm/uaccess.h> 28 28 29 - void set_close_on_exec(unsigned int fd, int flag) 30 - { 31 - struct files_struct *files = current->files; 32 - struct fdtable *fdt; 33 - spin_lock(&files->file_lock); 34 - fdt = files_fdtable(files); 35 - if (flag) 36 - __set_close_on_exec(fd, fdt); 37 - else 38 - __clear_close_on_exec(fd, fdt); 39 - spin_unlock(&files->file_lock); 40 - } 41 - 42 - static bool get_close_on_exec(unsigned int fd) 43 - { 44 - struct files_struct *files = current->files; 45 - struct fdtable *fdt; 46 - bool res; 47 - rcu_read_lock(); 48 - fdt = files_fdtable(files); 49 - res = close_on_exec(fd, fdt); 50 - rcu_read_unlock(); 51 - return res; 52 - } 53 - 54 - SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) 55 - { 56 - int err = -EBADF; 57 - struct file * file, *tofree; 58 - struct files_struct * files = current->files; 59 - struct fdtable *fdt; 60 - 61 - if ((flags & ~O_CLOEXEC) != 0) 62 - return -EINVAL; 63 - 64 - if (unlikely(oldfd == newfd)) 65 - return -EINVAL; 66 - 67 - spin_lock(&files->file_lock); 68 - err = expand_files(files, newfd); 69 - file = fcheck(oldfd); 70 - if (unlikely(!file)) 71 - goto Ebadf; 72 - if (unlikely(err < 0)) { 73 - if (err == -EMFILE) 74 - goto Ebadf; 75 - goto out_unlock; 76 - } 77 - /* 78 - * We need to detect attempts to do dup2() over allocated but still 79 - * not finished descriptor. NB: OpenBSD avoids that at the price of 80 - * extra work in their equivalent of fget() - they insert struct 81 - * file immediately after grabbing descriptor, mark it larval if 82 - * more work (e.g. actual opening) is needed and make sure that 83 - * fget() treats larval files as absent. Potentially interesting, 84 - * but while extra work in fget() is trivial, locking implications 85 - * and amount of surgery on open()-related paths in VFS are not. 86 - * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" 87 - * deadlocks in rather amusing ways, AFAICS. All of that is out of 88 - * scope of POSIX or SUS, since neither considers shared descriptor 89 - * tables and this condition does not arise without those. 90 - */ 91 - err = -EBUSY; 92 - fdt = files_fdtable(files); 93 - tofree = fdt->fd[newfd]; 94 - if (!tofree && fd_is_open(newfd, fdt)) 95 - goto out_unlock; 96 - get_file(file); 97 - rcu_assign_pointer(fdt->fd[newfd], file); 98 - __set_open_fd(newfd, fdt); 99 - if (flags & O_CLOEXEC) 100 - __set_close_on_exec(newfd, fdt); 101 - else 102 - __clear_close_on_exec(newfd, fdt); 103 - spin_unlock(&files->file_lock); 104 - 105 - if (tofree) 106 - filp_close(tofree, files); 107 - 108 - return newfd; 109 - 110 - Ebadf: 111 - err = -EBADF; 112 - out_unlock: 113 - spin_unlock(&files->file_lock); 114 - return err; 115 - } 116 - 117 - SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) 118 - { 119 - if (unlikely(newfd == oldfd)) { /* corner case */ 120 - struct files_struct *files = current->files; 121 - int retval = oldfd; 122 - 123 - rcu_read_lock(); 124 - if (!fcheck_files(files, oldfd)) 125 - retval = -EBADF; 126 - rcu_read_unlock(); 127 - return retval; 128 - } 129 - return sys_dup3(oldfd, newfd, 0); 130 - } 131 - 132 - SYSCALL_DEFINE1(dup, unsigned int, fildes) 133 - { 134 - int ret = -EBADF; 135 - struct file *file = fget_raw(fildes); 136 - 137 - if (file) { 138 - ret = get_unused_fd(); 139 - if (ret >= 0) 140 - fd_install(ret, file); 141 - else 142 - fput(file); 143 - } 144 - return ret; 145 - } 146 - 147 29 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 148 30 149 31 static int setfl(int fd, struct file * filp, unsigned long arg) ··· 149 267 150 268 static int f_setown_ex(struct file *filp, unsigned long arg) 151 269 { 152 - struct f_owner_ex * __user owner_p = (void * __user)arg; 270 + struct f_owner_ex __user *owner_p = (void __user *)arg; 153 271 struct f_owner_ex owner; 154 272 struct pid *pid; 155 273 int type; ··· 189 307 190 308 static int f_getown_ex(struct file *filp, unsigned long arg) 191 309 { 192 - struct f_owner_ex * __user owner_p = (void * __user)arg; 310 + struct f_owner_ex __user *owner_p = (void __user *)arg; 193 311 struct f_owner_ex owner; 194 312 int ret = 0; 195 313 ··· 227 345 static int f_getowner_uids(struct file *filp, unsigned long arg) 228 346 { 229 347 struct user_namespace *user_ns = current_user_ns(); 230 - uid_t * __user dst = (void * __user)arg; 348 + uid_t __user *dst = (void __user *)arg; 231 349 uid_t src[2]; 232 350 int err; 233 351 ··· 255 373 256 374 switch (cmd) { 257 375 case F_DUPFD: 376 + err = f_dupfd(arg, filp, 0); 377 + break; 258 378 case F_DUPFD_CLOEXEC: 259 - if (arg >= rlimit(RLIMIT_NOFILE)) 260 - break; 261 - err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); 262 - if (err >= 0) { 263 - get_file(filp); 264 - fd_install(err, filp); 265 - } 379 + err = f_dupfd(arg, filp, FD_CLOEXEC); 266 380 break; 267 381 case F_GETFD: 268 382 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; ··· 348 470 349 471 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 350 472 { 351 - struct file *filp; 352 - int fput_needed; 473 + struct fd f = fdget_raw(fd); 353 474 long err = -EBADF; 354 475 355 - filp = fget_raw_light(fd, &fput_needed); 356 - if (!filp) 476 + if (!f.file) 357 477 goto out; 358 478 359 - if (unlikely(filp->f_mode & FMODE_PATH)) { 479 + if (unlikely(f.file->f_mode & FMODE_PATH)) { 360 480 if (!check_fcntl_cmd(cmd)) 361 481 goto out1; 362 482 } 363 483 364 - err = security_file_fcntl(filp, cmd, arg); 484 + err = security_file_fcntl(f.file, cmd, arg); 365 485 if (!err) 366 - err = do_fcntl(fd, cmd, arg, filp); 486 + err = do_fcntl(fd, cmd, arg, f.file); 367 487 368 488 out1: 369 - fput_light(filp, fput_needed); 489 + fdput(f); 370 490 out: 371 491 return err; 372 492 } ··· 373 497 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 374 498 unsigned long, arg) 375 499 { 376 - struct file * filp; 500 + struct fd f = fdget_raw(fd); 377 501 long err = -EBADF; 378 - int fput_needed; 379 502 380 - filp = fget_raw_light(fd, &fput_needed); 381 - if (!filp) 503 + if (!f.file) 382 504 goto out; 383 505 384 - if (unlikely(filp->f_mode & FMODE_PATH)) { 506 + if (unlikely(f.file->f_mode & FMODE_PATH)) { 385 507 if (!check_fcntl_cmd(cmd)) 386 508 goto out1; 387 509 } 388 510 389 - err = security_file_fcntl(filp, cmd, arg); 511 + err = security_file_fcntl(f.file, cmd, arg); 390 512 if (err) 391 513 goto out1; 392 514 393 515 switch (cmd) { 394 516 case F_GETLK64: 395 - err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 517 + err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); 396 518 break; 397 519 case F_SETLK64: 398 520 case F_SETLKW64: 399 - err = fcntl_setlk64(fd, filp, cmd, 521 + err = fcntl_setlk64(fd, f.file, cmd, 400 522 (struct flock64 __user *) arg); 401 523 break; 402 524 default: 403 - err = do_fcntl(fd, cmd, arg, filp); 525 + err = do_fcntl(fd, cmd, arg, f.file); 404 526 break; 405 527 } 406 528 out1: 407 - fput_light(filp, fput_needed); 529 + fdput(f); 408 530 out: 409 531 return err; 410 532 }

+7 -10

fs/fhandle.c

··· 113 113 114 114 static struct vfsmount *get_vfsmount_from_fd(int fd) 115 115 { 116 - struct path path; 116 + struct vfsmount *mnt; 117 117 118 118 if (fd == AT_FDCWD) { 119 119 struct fs_struct *fs = current->fs; 120 120 spin_lock(&fs->lock); 121 - path = fs->pwd; 122 - mntget(path.mnt); 121 + mnt = mntget(fs->pwd.mnt); 123 122 spin_unlock(&fs->lock); 124 123 } else { 125 - int fput_needed; 126 - struct file *file = fget_light(fd, &fput_needed); 127 - if (!file) 124 + struct fd f = fdget(fd); 125 + if (!f.file) 128 126 return ERR_PTR(-EBADF); 129 - path = file->f_path; 130 - mntget(path.mnt); 131 - fput_light(file, fput_needed); 127 + mnt = mntget(f.file->f_path.mnt); 128 + fdput(f); 132 129 } 133 - return path.mnt; 130 + return mnt; 134 131 } 135 132 136 133 static int vfs_dentry_acceptable(void *context, struct dentry *dentry)

+549 -24

fs/file.c

··· 6 6 * Manage the dynamic fd arrays in the process files_struct. 7 7 */ 8 8 9 + #include <linux/syscalls.h> 9 10 #include <linux/export.h> 10 11 #include <linux/fs.h> 11 12 #include <linux/mm.h> ··· 85 84 } 86 85 } 87 86 88 - void free_fdtable_rcu(struct rcu_head *rcu) 87 + static void free_fdtable_rcu(struct rcu_head *rcu) 89 88 { 90 89 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 91 90 struct fdtable_defer *fddef; 92 91 93 92 BUG_ON(!fdt); 93 + BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); 94 94 95 - if (fdt->max_fds <= NR_OPEN_DEFAULT) { 96 - /* 97 - * This fdtable is embedded in the files structure and that 98 - * structure itself is getting destroyed. 99 - */ 100 - kmem_cache_free(files_cachep, 101 - container_of(fdt, struct files_struct, fdtab)); 102 - return; 103 - } 104 95 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { 105 96 kfree(fdt->fd); 106 97 kfree(fdt->open_fds); ··· 222 229 copy_fdtable(new_fdt, cur_fdt); 223 230 rcu_assign_pointer(files->fdt, new_fdt); 224 231 if (cur_fdt->max_fds > NR_OPEN_DEFAULT) 225 - free_fdtable(cur_fdt); 232 + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 226 233 } else { 227 234 /* Somebody else expanded, so undo our attempt */ 228 235 __free_fdtable(new_fdt); ··· 238 245 * expanded and execution may have blocked. 239 246 * The files->file_lock should be held on entry, and will be held on exit. 240 247 */ 241 - int expand_files(struct files_struct *files, int nr) 248 + static int expand_files(struct files_struct *files, int nr) 242 249 { 243 250 struct fdtable *fdt; 244 251 245 252 fdt = files_fdtable(files); 246 - 247 - /* 248 - * N.B. For clone tasks sharing a files structure, this test 249 - * will limit the total number of files that can be opened. 250 - */ 251 - if (nr >= rlimit(RLIMIT_NOFILE)) 252 - return -EMFILE; 253 253 254 254 /* Do we need to expand? */ 255 255 if (nr < fdt->max_fds) ··· 254 268 255 269 /* All good, so we try */ 256 270 return expand_fdtable(files, nr); 271 + } 272 + 273 + static inline void __set_close_on_exec(int fd, struct fdtable *fdt) 274 + { 275 + __set_bit(fd, fdt->close_on_exec); 276 + } 277 + 278 + static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) 279 + { 280 + __clear_bit(fd, fdt->close_on_exec); 281 + } 282 + 283 + static inline void __set_open_fd(int fd, struct fdtable *fdt) 284 + { 285 + __set_bit(fd, fdt->open_fds); 286 + } 287 + 288 + static inline void __clear_open_fd(int fd, struct fdtable *fdt) 289 + { 290 + __clear_bit(fd, fdt->open_fds); 257 291 } 258 292 259 293 static int count_open_files(struct fdtable *fdt) ··· 401 395 return NULL; 402 396 } 403 397 398 + static void close_files(struct files_struct * files) 399 + { 400 + int i, j; 401 + struct fdtable *fdt; 402 + 403 + j = 0; 404 + 405 + /* 406 + * It is safe to dereference the fd table without RCU or 407 + * ->file_lock because this is the last reference to the 408 + * files structure. But use RCU to shut RCU-lockdep up. 409 + */ 410 + rcu_read_lock(); 411 + fdt = files_fdtable(files); 412 + rcu_read_unlock(); 413 + for (;;) { 414 + unsigned long set; 415 + i = j * BITS_PER_LONG; 416 + if (i >= fdt->max_fds) 417 + break; 418 + set = fdt->open_fds[j++]; 419 + while (set) { 420 + if (set & 1) { 421 + struct file * file = xchg(&fdt->fd[i], NULL); 422 + if (file) { 423 + filp_close(file, files); 424 + cond_resched(); 425 + } 426 + } 427 + i++; 428 + set >>= 1; 429 + } 430 + } 431 + } 432 + 433 + struct files_struct *get_files_struct(struct task_struct *task) 434 + { 435 + struct files_struct *files; 436 + 437 + task_lock(task); 438 + files = task->files; 439 + if (files) 440 + atomic_inc(&files->count); 441 + task_unlock(task); 442 + 443 + return files; 444 + } 445 + 446 + void put_files_struct(struct files_struct *files) 447 + { 448 + struct fdtable *fdt; 449 + 450 + if (atomic_dec_and_test(&files->count)) { 451 + close_files(files); 452 + /* not really needed, since nobody can see us */ 453 + rcu_read_lock(); 454 + fdt = files_fdtable(files); 455 + rcu_read_unlock(); 456 + /* free the arrays if they are not embedded */ 457 + if (fdt != &files->fdtab) 458 + __free_fdtable(fdt); 459 + kmem_cache_free(files_cachep, files); 460 + } 461 + } 462 + 463 + void reset_files_struct(struct files_struct *files) 464 + { 465 + struct task_struct *tsk = current; 466 + struct files_struct *old; 467 + 468 + old = tsk->files; 469 + task_lock(tsk); 470 + tsk->files = files; 471 + task_unlock(tsk); 472 + put_files_struct(old); 473 + } 474 + 475 + void exit_files(struct task_struct *tsk) 476 + { 477 + struct files_struct * files = tsk->files; 478 + 479 + if (files) { 480 + task_lock(tsk); 481 + tsk->files = NULL; 482 + task_unlock(tsk); 483 + put_files_struct(files); 484 + } 485 + } 486 + 404 487 static void __devinit fdtable_defer_list_init(int cpu) 405 488 { 406 489 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); ··· 519 424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 520 425 }; 521 426 427 + void daemonize_descriptors(void) 428 + { 429 + atomic_inc(&init_files.count); 430 + reset_files_struct(&init_files); 431 + } 432 + 522 433 /* 523 434 * allocate a file descriptor, mark it busy. 524 435 */ 525 - int alloc_fd(unsigned start, unsigned flags) 436 + int __alloc_fd(struct files_struct *files, 437 + unsigned start, unsigned end, unsigned flags) 526 438 { 527 - struct files_struct *files = current->files; 528 439 unsigned int fd; 529 440 int error; 530 441 struct fdtable *fdt; ··· 544 443 545 444 if (fd < fdt->max_fds) 546 445 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 446 + 447 + /* 448 + * N.B. For clone tasks sharing a files structure, this test 449 + * will limit the total number of files that can be opened. 450 + */ 451 + error = -EMFILE; 452 + if (fd >= end) 453 + goto out; 547 454 548 455 error = expand_files(files, fd); 549 456 if (error < 0) ··· 586 477 return error; 587 478 } 588 479 589 - int get_unused_fd(void) 480 + static int alloc_fd(unsigned start, unsigned flags) 590 481 { 591 - return alloc_fd(0, 0); 482 + return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); 592 483 } 593 - EXPORT_SYMBOL(get_unused_fd); 484 + 485 + int get_unused_fd_flags(unsigned flags) 486 + { 487 + return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); 488 + } 489 + EXPORT_SYMBOL(get_unused_fd_flags); 490 + 491 + static void __put_unused_fd(struct files_struct *files, unsigned int fd) 492 + { 493 + struct fdtable *fdt = files_fdtable(files); 494 + __clear_open_fd(fd, fdt); 495 + if (fd < files->next_fd) 496 + files->next_fd = fd; 497 + } 498 + 499 + void put_unused_fd(unsigned int fd) 500 + { 501 + struct files_struct *files = current->files; 502 + spin_lock(&files->file_lock); 503 + __put_unused_fd(files, fd); 504 + spin_unlock(&files->file_lock); 505 + } 506 + 507 + EXPORT_SYMBOL(put_unused_fd); 508 + 509 + /* 510 + * Install a file pointer in the fd array. 511 + * 512 + * The VFS is full of places where we drop the files lock between 513 + * setting the open_fds bitmap and installing the file in the file 514 + * array. At any such point, we are vulnerable to a dup2() race 515 + * installing a file in the array before us. We need to detect this and 516 + * fput() the struct file we are about to overwrite in this case. 517 + * 518 + * It should never happen - if we allow dup2() do it, _really_ bad things 519 + * will follow. 520 + * 521 + * NOTE: __fd_install() variant is really, really low-level; don't 522 + * use it unless you are forced to by truly lousy API shoved down 523 + * your throat. 'files' *MUST* be either current->files or obtained 524 + * by get_files_struct(current) done by whoever had given it to you, 525 + * or really bad things will happen. Normally you want to use 526 + * fd_install() instead. 527 + */ 528 + 529 + void __fd_install(struct files_struct *files, unsigned int fd, 530 + struct file *file) 531 + { 532 + struct fdtable *fdt; 533 + spin_lock(&files->file_lock); 534 + fdt = files_fdtable(files); 535 + BUG_ON(fdt->fd[fd] != NULL); 536 + rcu_assign_pointer(fdt->fd[fd], file); 537 + spin_unlock(&files->file_lock); 538 + } 539 + 540 + void fd_install(unsigned int fd, struct file *file) 541 + { 542 + __fd_install(current->files, fd, file); 543 + } 544 + 545 + EXPORT_SYMBOL(fd_install); 546 + 547 + /* 548 + * The same warnings as for __alloc_fd()/__fd_install() apply here... 549 + */ 550 + int __close_fd(struct files_struct *files, unsigned fd) 551 + { 552 + struct file *file; 553 + struct fdtable *fdt; 554 + 555 + spin_lock(&files->file_lock); 556 + fdt = files_fdtable(files); 557 + if (fd >= fdt->max_fds) 558 + goto out_unlock; 559 + file = fdt->fd[fd]; 560 + if (!file) 561 + goto out_unlock; 562 + rcu_assign_pointer(fdt->fd[fd], NULL); 563 + __clear_close_on_exec(fd, fdt); 564 + __put_unused_fd(files, fd); 565 + spin_unlock(&files->file_lock); 566 + return filp_close(file, files); 567 + 568 + out_unlock: 569 + spin_unlock(&files->file_lock); 570 + return -EBADF; 571 + } 572 + 573 + void do_close_on_exec(struct files_struct *files) 574 + { 575 + unsigned i; 576 + struct fdtable *fdt; 577 + 578 + /* exec unshares first */ 579 + BUG_ON(atomic_read(&files->count) != 1); 580 + spin_lock(&files->file_lock); 581 + for (i = 0; ; i++) { 582 + unsigned long set; 583 + unsigned fd = i * BITS_PER_LONG; 584 + fdt = files_fdtable(files); 585 + if (fd >= fdt->max_fds) 586 + break; 587 + set = fdt->close_on_exec[i]; 588 + if (!set) 589 + continue; 590 + fdt->close_on_exec[i] = 0; 591 + for ( ; set ; fd++, set >>= 1) { 592 + struct file *file; 593 + if (!(set & 1)) 594 + continue; 595 + file = fdt->fd[fd]; 596 + if (!file) 597 + continue; 598 + rcu_assign_pointer(fdt->fd[fd], NULL); 599 + __put_unused_fd(files, fd); 600 + spin_unlock(&files->file_lock); 601 + filp_close(file, files); 602 + cond_resched(); 603 + spin_lock(&files->file_lock); 604 + } 605 + 606 + } 607 + spin_unlock(&files->file_lock); 608 + } 609 + 610 + struct file *fget(unsigned int fd) 611 + { 612 + struct file *file; 613 + struct files_struct *files = current->files; 614 + 615 + rcu_read_lock(); 616 + file = fcheck_files(files, fd); 617 + if (file) { 618 + /* File object ref couldn't be taken */ 619 + if (file->f_mode & FMODE_PATH || 620 + !atomic_long_inc_not_zero(&file->f_count)) 621 + file = NULL; 622 + } 623 + rcu_read_unlock(); 624 + 625 + return file; 626 + } 627 + 628 + EXPORT_SYMBOL(fget); 629 + 630 + struct file *fget_raw(unsigned int fd) 631 + { 632 + struct file *file; 633 + struct files_struct *files = current->files; 634 + 635 + rcu_read_lock(); 636 + file = fcheck_files(files, fd); 637 + if (file) { 638 + /* File object ref couldn't be taken */ 639 + if (!atomic_long_inc_not_zero(&file->f_count)) 640 + file = NULL; 641 + } 642 + rcu_read_unlock(); 643 + 644 + return file; 645 + } 646 + 647 + EXPORT_SYMBOL(fget_raw); 648 + 649 + /* 650 + * Lightweight file lookup - no refcnt increment if fd table isn't shared. 651 + * 652 + * You can use this instead of fget if you satisfy all of the following 653 + * conditions: 654 + * 1) You must call fput_light before exiting the syscall and returning control 655 + * to userspace (i.e. you cannot remember the returned struct file * after 656 + * returning to userspace). 657 + * 2) You must not call filp_close on the returned struct file * in between 658 + * calls to fget_light and fput_light. 659 + * 3) You must not clone the current task in between the calls to fget_light 660 + * and fput_light. 661 + * 662 + * The fput_needed flag returned by fget_light should be passed to the 663 + * corresponding fput_light. 664 + */ 665 + struct file *fget_light(unsigned int fd, int *fput_needed) 666 + { 667 + struct file *file; 668 + struct files_struct *files = current->files; 669 + 670 + *fput_needed = 0; 671 + if (atomic_read(&files->count) == 1) { 672 + file = fcheck_files(files, fd); 673 + if (file && (file->f_mode & FMODE_PATH)) 674 + file = NULL; 675 + } else { 676 + rcu_read_lock(); 677 + file = fcheck_files(files, fd); 678 + if (file) { 679 + if (!(file->f_mode & FMODE_PATH) && 680 + atomic_long_inc_not_zero(&file->f_count)) 681 + *fput_needed = 1; 682 + else 683 + /* Didn't get the reference, someone's freed */ 684 + file = NULL; 685 + } 686 + rcu_read_unlock(); 687 + } 688 + 689 + return file; 690 + } 691 + EXPORT_SYMBOL(fget_light); 692 + 693 + struct file *fget_raw_light(unsigned int fd, int *fput_needed) 694 + { 695 + struct file *file; 696 + struct files_struct *files = current->files; 697 + 698 + *fput_needed = 0; 699 + if (atomic_read(&files->count) == 1) { 700 + file = fcheck_files(files, fd); 701 + } else { 702 + rcu_read_lock(); 703 + file = fcheck_files(files, fd); 704 + if (file) { 705 + if (atomic_long_inc_not_zero(&file->f_count)) 706 + *fput_needed = 1; 707 + else 708 + /* Didn't get the reference, someone's freed */ 709 + file = NULL; 710 + } 711 + rcu_read_unlock(); 712 + } 713 + 714 + return file; 715 + } 716 + 717 + void set_close_on_exec(unsigned int fd, int flag) 718 + { 719 + struct files_struct *files = current->files; 720 + struct fdtable *fdt; 721 + spin_lock(&files->file_lock); 722 + fdt = files_fdtable(files); 723 + if (flag) 724 + __set_close_on_exec(fd, fdt); 725 + else 726 + __clear_close_on_exec(fd, fdt); 727 + spin_unlock(&files->file_lock); 728 + } 729 + 730 + bool get_close_on_exec(unsigned int fd) 731 + { 732 + struct files_struct *files = current->files; 733 + struct fdtable *fdt; 734 + bool res; 735 + rcu_read_lock(); 736 + fdt = files_fdtable(files); 737 + res = close_on_exec(fd, fdt); 738 + rcu_read_unlock(); 739 + return res; 740 + } 741 + 742 + static int do_dup2(struct files_struct *files, 743 + struct file *file, unsigned fd, unsigned flags) 744 + { 745 + struct file *tofree; 746 + struct fdtable *fdt; 747 + 748 + /* 749 + * We need to detect attempts to do dup2() over allocated but still 750 + * not finished descriptor. NB: OpenBSD avoids that at the price of 751 + * extra work in their equivalent of fget() - they insert struct 752 + * file immediately after grabbing descriptor, mark it larval if 753 + * more work (e.g. actual opening) is needed and make sure that 754 + * fget() treats larval files as absent. Potentially interesting, 755 + * but while extra work in fget() is trivial, locking implications 756 + * and amount of surgery on open()-related paths in VFS are not. 757 + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" 758 + * deadlocks in rather amusing ways, AFAICS. All of that is out of 759 + * scope of POSIX or SUS, since neither considers shared descriptor 760 + * tables and this condition does not arise without those. 761 + */ 762 + fdt = files_fdtable(files); 763 + tofree = fdt->fd[fd]; 764 + if (!tofree && fd_is_open(fd, fdt)) 765 + goto Ebusy; 766 + get_file(file); 767 + rcu_assign_pointer(fdt->fd[fd], file); 768 + __set_open_fd(fd, fdt); 769 + if (flags & O_CLOEXEC) 770 + __set_close_on_exec(fd, fdt); 771 + else 772 + __clear_close_on_exec(fd, fdt); 773 + spin_unlock(&files->file_lock); 774 + 775 + if (tofree) 776 + filp_close(tofree, files); 777 + 778 + return fd; 779 + 780 + Ebusy: 781 + spin_unlock(&files->file_lock); 782 + return -EBUSY; 783 + } 784 + 785 + int replace_fd(unsigned fd, struct file *file, unsigned flags) 786 + { 787 + int err; 788 + struct files_struct *files = current->files; 789 + 790 + if (!file) 791 + return __close_fd(files, fd); 792 + 793 + if (fd >= rlimit(RLIMIT_NOFILE)) 794 + return -EMFILE; 795 + 796 + spin_lock(&files->file_lock); 797 + err = expand_files(files, fd); 798 + if (unlikely(err < 0)) 799 + goto out_unlock; 800 + return do_dup2(files, file, fd, flags); 801 + 802 + out_unlock: 803 + spin_unlock(&files->file_lock); 804 + return err; 805 + } 806 + 807 + SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) 808 + { 809 + int err = -EBADF; 810 + struct file *file; 811 + struct files_struct *files = current->files; 812 + 813 + if ((flags & ~O_CLOEXEC) != 0) 814 + return -EINVAL; 815 + 816 + if (newfd >= rlimit(RLIMIT_NOFILE)) 817 + return -EMFILE; 818 + 819 + spin_lock(&files->file_lock); 820 + err = expand_files(files, newfd); 821 + file = fcheck(oldfd); 822 + if (unlikely(!file)) 823 + goto Ebadf; 824 + if (unlikely(err < 0)) { 825 + if (err == -EMFILE) 826 + goto Ebadf; 827 + goto out_unlock; 828 + } 829 + return do_dup2(files, file, newfd, flags); 830 + 831 + Ebadf: 832 + err = -EBADF; 833 + out_unlock: 834 + spin_unlock(&files->file_lock); 835 + return err; 836 + } 837 + 838 + SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) 839 + { 840 + if (unlikely(newfd == oldfd)) { /* corner case */ 841 + struct files_struct *files = current->files; 842 + int retval = oldfd; 843 + 844 + rcu_read_lock(); 845 + if (!fcheck_files(files, oldfd)) 846 + retval = -EBADF; 847 + rcu_read_unlock(); 848 + return retval; 849 + } 850 + return sys_dup3(oldfd, newfd, 0); 851 + } 852 + 853 + SYSCALL_DEFINE1(dup, unsigned int, fildes) 854 + { 855 + int ret = -EBADF; 856 + struct file *file = fget_raw(fildes); 857 + 858 + if (file) { 859 + ret = get_unused_fd(); 860 + if (ret >= 0) 861 + fd_install(ret, file); 862 + else 863 + fput(file); 864 + } 865 + return ret; 866 + } 867 + 868 + int f_dupfd(unsigned int from, struct file *file, unsigned flags) 869 + { 870 + int err; 871 + if (from >= rlimit(RLIMIT_NOFILE)) 872 + return -EINVAL; 873 + err = alloc_fd(from, flags); 874 + if (err >= 0) { 875 + get_file(file); 876 + fd_install(err, file); 877 + } 878 + return err; 879 + } 880 + 881 + int iterate_fd(struct files_struct *files, unsigned n, 882 + int (*f)(const void *, struct file *, unsigned), 883 + const void *p) 884 + { 885 + struct fdtable *fdt; 886 + struct file *file; 887 + int res = 0; 888 + if (!files) 889 + return 0; 890 + spin_lock(&files->file_lock); 891 + fdt = files_fdtable(files); 892 + while (!res && n < fdt->max_fds) { 893 + file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); 894 + if (file) 895 + res = f(p, file, n); 896 + } 897 + spin_unlock(&files->file_lock); 898 + return res; 899 + } 900 + EXPORT_SYMBOL(iterate_fd);

-106

fs/file_table.c

··· 339 339 340 340 EXPORT_SYMBOL(fput); 341 341 342 - struct file *fget(unsigned int fd) 343 - { 344 - struct file *file; 345 - struct files_struct *files = current->files; 346 - 347 - rcu_read_lock(); 348 - file = fcheck_files(files, fd); 349 - if (file) { 350 - /* File object ref couldn't be taken */ 351 - if (file->f_mode & FMODE_PATH || 352 - !atomic_long_inc_not_zero(&file->f_count)) 353 - file = NULL; 354 - } 355 - rcu_read_unlock(); 356 - 357 - return file; 358 - } 359 - 360 - EXPORT_SYMBOL(fget); 361 - 362 - struct file *fget_raw(unsigned int fd) 363 - { 364 - struct file *file; 365 - struct files_struct *files = current->files; 366 - 367 - rcu_read_lock(); 368 - file = fcheck_files(files, fd); 369 - if (file) { 370 - /* File object ref couldn't be taken */ 371 - if (!atomic_long_inc_not_zero(&file->f_count)) 372 - file = NULL; 373 - } 374 - rcu_read_unlock(); 375 - 376 - return file; 377 - } 378 - 379 - EXPORT_SYMBOL(fget_raw); 380 - 381 - /* 382 - * Lightweight file lookup - no refcnt increment if fd table isn't shared. 383 - * 384 - * You can use this instead of fget if you satisfy all of the following 385 - * conditions: 386 - * 1) You must call fput_light before exiting the syscall and returning control 387 - * to userspace (i.e. you cannot remember the returned struct file * after 388 - * returning to userspace). 389 - * 2) You must not call filp_close on the returned struct file * in between 390 - * calls to fget_light and fput_light. 391 - * 3) You must not clone the current task in between the calls to fget_light 392 - * and fput_light. 393 - * 394 - * The fput_needed flag returned by fget_light should be passed to the 395 - * corresponding fput_light. 396 - */ 397 - struct file *fget_light(unsigned int fd, int *fput_needed) 398 - { 399 - struct file *file; 400 - struct files_struct *files = current->files; 401 - 402 - *fput_needed = 0; 403 - if (atomic_read(&files->count) == 1) { 404 - file = fcheck_files(files, fd); 405 - if (file && (file->f_mode & FMODE_PATH)) 406 - file = NULL; 407 - } else { 408 - rcu_read_lock(); 409 - file = fcheck_files(files, fd); 410 - if (file) { 411 - if (!(file->f_mode & FMODE_PATH) && 412 - atomic_long_inc_not_zero(&file->f_count)) 413 - *fput_needed = 1; 414 - else 415 - /* Didn't get the reference, someone's freed */ 416 - file = NULL; 417 - } 418 - rcu_read_unlock(); 419 - } 420 - 421 - return file; 422 - } 423 - 424 - struct file *fget_raw_light(unsigned int fd, int *fput_needed) 425 - { 426 - struct file *file; 427 - struct files_struct *files = current->files; 428 - 429 - *fput_needed = 0; 430 - if (atomic_read(&files->count) == 1) { 431 - file = fcheck_files(files, fd); 432 - } else { 433 - rcu_read_lock(); 434 - file = fcheck_files(files, fd); 435 - if (file) { 436 - if (atomic_long_inc_not_zero(&file->f_count)) 437 - *fput_needed = 1; 438 - else 439 - /* Didn't get the reference, someone's freed */ 440 - file = NULL; 441 - } 442 - rcu_read_unlock(); 443 - } 444 - 445 - return file; 446 - } 447 - 448 342 void put_filp(struct file *file) 449 343 { 450 344 if (atomic_long_dec_and_test(&file->f_count)) {

+5

fs/freevxfs/vxfs_super.c

··· 279 279 vxfs_cleanup(void) 280 280 { 281 281 unregister_filesystem(&vxfs_fs_type); 282 + /* 283 + * Make sure all delayed rcu free inodes are flushed before we 284 + * destroy cache. 285 + */ 286 + rcu_barrier(); 282 287 kmem_cache_destroy(vxfs_inode_cachep); 283 288 } 284 289

+1 -2

fs/fuse/dev.c

··· 148 148 if (ff->reserved_req) { 149 149 req = ff->reserved_req; 150 150 ff->reserved_req = NULL; 151 - get_file(file); 152 - req->stolen_file = file; 151 + req->stolen_file = get_file(file); 153 152 } 154 153 spin_unlock(&fc->lock); 155 154 } while (!req);

+6

fs/fuse/inode.c

··· 1197 1197 { 1198 1198 unregister_filesystem(&fuse_fs_type); 1199 1199 unregister_fuseblk(); 1200 + 1201 + /* 1202 + * Make sure all delayed rcu free inodes are flushed before we 1203 + * destroy cache. 1204 + */ 1205 + rcu_barrier(); 1200 1206 kmem_cache_destroy(fuse_inode_cachep); 1201 1207 } 1202 1208

+6

fs/hfs/super.c

··· 492 492 static void __exit exit_hfs_fs(void) 493 493 { 494 494 unregister_filesystem(&hfs_fs_type); 495 + 496 + /* 497 + * Make sure all delayed rcu free inodes are flushed before we 498 + * destroy cache. 499 + */ 500 + rcu_barrier(); 495 501 kmem_cache_destroy(hfs_inode_cachep); 496 502 } 497 503

+6

fs/hfsplus/super.c

··· 635 635 static void __exit exit_hfsplus_fs(void) 636 636 { 637 637 unregister_filesystem(&hfsplus_fs_type); 638 + 639 + /* 640 + * Make sure all delayed rcu free inodes are flushed before we 641 + * destroy cache. 642 + */ 643 + rcu_barrier(); 638 644 kmem_cache_destroy(hfsplus_inode_cachep); 639 645 } 640 646

+5

fs/hpfs/super.c

··· 210 210 211 211 static void destroy_inodecache(void) 212 212 { 213 + /* 214 + * Make sure all delayed rcu free inodes are flushed before we 215 + * destroy cache. 216 + */ 217 + rcu_barrier(); 213 218 kmem_cache_destroy(hpfs_inode_cachep); 214 219 } 215 220

+5

fs/hugetlbfs/inode.c

··· 1048 1048 1049 1049 static void __exit exit_hugetlbfs_fs(void) 1050 1050 { 1051 + /* 1052 + * Make sure all delayed rcu free inodes are flushed before we 1053 + * destroy cache. 1054 + */ 1055 + rcu_barrier(); 1051 1056 kmem_cache_destroy(hugetlbfs_inode_cachep); 1052 1057 kern_unmount(hugetlbfs_vfsmount); 1053 1058 unregister_filesystem(&hugetlbfs_fs_type);

+8 -15

fs/ioctl.c

··· 603 603 604 604 SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 605 605 { 606 - struct file *filp; 607 - int error = -EBADF; 608 - int fput_needed; 606 + int error; 607 + struct fd f = fdget(fd); 609 608 610 - filp = fget_light(fd, &fput_needed); 611 - if (!filp) 612 - goto out; 613 - 614 - error = security_file_ioctl(filp, cmd, arg); 615 - if (error) 616 - goto out_fput; 617 - 618 - error = do_vfs_ioctl(filp, fd, cmd, arg); 619 - out_fput: 620 - fput_light(filp, fput_needed); 621 - out: 609 + if (!f.file) 610 + return -EBADF; 611 + error = security_file_ioctl(f.file, cmd, arg); 612 + if (!error) 613 + error = do_vfs_ioctl(f.file, fd, cmd, arg); 614 + fdput(f); 622 615 return error; 623 616 }

+5

fs/isofs/inode.c

··· 115 115 116 116 static void destroy_inodecache(void) 117 117 { 118 + /* 119 + * Make sure all delayed rcu free inodes are flushed before we 120 + * destroy cache. 121 + */ 122 + rcu_barrier(); 118 123 kmem_cache_destroy(isofs_inode_cachep); 119 124 } 120 125

+6

fs/jffs2/super.c

··· 418 418 unregister_filesystem(&jffs2_fs_type); 419 419 jffs2_destroy_slab_caches(); 420 420 jffs2_compressors_exit(); 421 + 422 + /* 423 + * Make sure all delayed rcu free inodes are flushed before we 424 + * destroy cache. 425 + */ 426 + rcu_barrier(); 421 427 kmem_cache_destroy(jffs2_inode_cachep); 422 428 } 423 429

+6

fs/jfs/super.c

··· 911 911 jfs_proc_clean(); 912 912 #endif 913 913 unregister_filesystem(&jfs_fs_type); 914 + 915 + /* 916 + * Make sure all delayed rcu free inodes are flushed before we 917 + * destroy cache. 918 + */ 919 + rcu_barrier(); 914 920 kmem_cache_destroy(jfs_inode_cachep); 915 921 } 916 922

+9 -11

fs/locks.c

··· 1625 1625 */ 1626 1626 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) 1627 1627 { 1628 - struct file *filp; 1629 - int fput_needed; 1628 + struct fd f = fdget(fd); 1630 1629 struct file_lock *lock; 1631 1630 int can_sleep, unlock; 1632 1631 int error; 1633 1632 1634 1633 error = -EBADF; 1635 - filp = fget_light(fd, &fput_needed); 1636 - if (!filp) 1634 + if (!f.file) 1637 1635 goto out; 1638 1636 1639 1637 can_sleep = !(cmd & LOCK_NB); ··· 1639 1641 unlock = (cmd == LOCK_UN); 1640 1642 1641 1643 if (!unlock && !(cmd & LOCK_MAND) && 1642 - !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) 1644 + !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) 1643 1645 goto out_putf; 1644 1646 1645 - error = flock_make_lock(filp, &lock, cmd); 1647 + error = flock_make_lock(f.file, &lock, cmd); 1646 1648 if (error) 1647 1649 goto out_putf; 1648 1650 if (can_sleep) 1649 1651 lock->fl_flags |= FL_SLEEP; 1650 1652 1651 - error = security_file_lock(filp, lock->fl_type); 1653 + error = security_file_lock(f.file, lock->fl_type); 1652 1654 if (error) 1653 1655 goto out_free; 1654 1656 1655 - if (filp->f_op && filp->f_op->flock) 1656 - error = filp->f_op->flock(filp, 1657 + if (f.file->f_op && f.file->f_op->flock) 1658 + error = f.file->f_op->flock(f.file, 1657 1659 (can_sleep) ? F_SETLKW : F_SETLK, 1658 1660 lock); 1659 1661 else 1660 - error = flock_lock_file_wait(filp, lock); 1662 + error = flock_lock_file_wait(f.file, lock); 1661 1663 1662 1664 out_free: 1663 1665 locks_free_lock(lock); 1664 1666 1665 1667 out_putf: 1666 - fput_light(filp, fput_needed); 1668 + fdput(f); 1667 1669 out: 1668 1670 return error; 1669 1671 }

+5

fs/logfs/inode.c

··· 417 417 418 418 void logfs_destroy_inode_cache(void) 419 419 { 420 + /* 421 + * Make sure all delayed rcu free inodes are flushed before we 422 + * destroy cache. 423 + */ 424 + rcu_barrier(); 420 425 kmem_cache_destroy(logfs_inode_cache); 421 426 }

+5

fs/minix/inode.c

··· 100 100 101 101 static void destroy_inodecache(void) 102 102 { 103 + /* 104 + * Make sure all delayed rcu free inodes are flushed before we 105 + * destroy cache. 106 + */ 107 + rcu_barrier(); 103 108 kmem_cache_destroy(minix_inode_cachep); 104 109 } 105 110

+18 -23

fs/namei.c

··· 1797 1797 struct nameidata *nd, struct file **fp) 1798 1798 { 1799 1799 int retval = 0; 1800 - int fput_needed; 1801 - struct file *file; 1802 1800 1803 1801 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1804 1802 nd->flags = flags | LOOKUP_JUMPED; ··· 1848 1850 get_fs_pwd(current->fs, &nd->path); 1849 1851 } 1850 1852 } else { 1853 + struct fd f = fdget_raw(dfd); 1851 1854 struct dentry *dentry; 1852 1855 1853 - file = fget_raw_light(dfd, &fput_needed); 1854 - retval = -EBADF; 1855 - if (!file) 1856 - goto out_fail; 1856 + if (!f.file) 1857 + return -EBADF; 1857 1858 1858 - dentry = file->f_path.dentry; 1859 + dentry = f.file->f_path.dentry; 1859 1860 1860 1861 if (*name) { 1861 - retval = -ENOTDIR; 1862 - if (!S_ISDIR(dentry->d_inode->i_mode)) 1863 - goto fput_fail; 1862 + if (!S_ISDIR(dentry->d_inode->i_mode)) { 1863 + fdput(f); 1864 + return -ENOTDIR; 1865 + } 1864 1866 1865 1867 retval = inode_permission(dentry->d_inode, MAY_EXEC); 1866 - if (retval) 1867 - goto fput_fail; 1868 + if (retval) { 1869 + fdput(f); 1870 + return retval; 1871 + } 1868 1872 } 1869 1873 1870 - nd->path = file->f_path; 1874 + nd->path = f.file->f_path; 1871 1875 if (flags & LOOKUP_RCU) { 1872 - if (fput_needed) 1873 - *fp = file; 1876 + if (f.need_put) 1877 + *fp = f.file; 1874 1878 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1875 1879 lock_rcu_walk(); 1876 1880 } else { 1877 - path_get(&file->f_path); 1878 - fput_light(file, fput_needed); 1881 + path_get(&nd->path); 1882 + fdput(f); 1879 1883 } 1880 1884 } 1881 1885 1882 1886 nd->inode = nd->path.dentry->d_inode; 1883 1887 return 0; 1884 - 1885 - fput_fail: 1886 - fput_light(file, fput_needed); 1887 - out_fail: 1888 - return retval; 1889 1888 } 1890 1889 1891 1890 static inline int lookup_last(struct nameidata *nd, struct path *path) ··· 3966 3971 EXPORT_SYMBOL(follow_down_one); 3967 3972 EXPORT_SYMBOL(follow_down); 3968 3973 EXPORT_SYMBOL(follow_up); 3969 - EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 3974 + EXPORT_SYMBOL(get_write_access); /* nfsd */ 3970 3975 EXPORT_SYMBOL(getname); 3971 3976 EXPORT_SYMBOL(lock_rename); 3972 3977 EXPORT_SYMBOL(lookup_one_len);

+5

fs/ncpfs/inode.c

··· 89 89 90 90 static void destroy_inodecache(void) 91 91 { 92 + /* 93 + * Make sure all delayed rcu free inodes are flushed before we 94 + * destroy cache. 95 + */ 96 + rcu_barrier(); 92 97 kmem_cache_destroy(ncp_inode_cachep); 93 98 } 94 99

+5

fs/nfs/inode.c

··· 1571 1571 1572 1572 static void nfs_destroy_inodecache(void) 1573 1573 { 1574 + /* 1575 + * Make sure all delayed rcu free inodes are flushed before we 1576 + * destroy cache. 1577 + */ 1578 + rcu_barrier(); 1574 1579 kmem_cache_destroy(nfs_inode_cachep); 1575 1580 } 1576 1581

+1 -2

fs/nfsd/nfs4state.c

··· 2837 2837 return -ENOMEM; 2838 2838 } 2839 2839 fp->fi_lease = fl; 2840 - fp->fi_deleg_file = fl->fl_file; 2841 - get_file(fp->fi_deleg_file); 2840 + fp->fi_deleg_file = get_file(fl->fl_file); 2842 2841 atomic_set(&fp->fi_delegees, 1); 2843 2842 list_add(&dp->dl_perfile, &fp->fi_delegations); 2844 2843 return 0;

+6

fs/nilfs2/super.c

··· 1382 1382 1383 1383 static void nilfs_destroy_cachep(void) 1384 1384 { 1385 + /* 1386 + * Make sure all delayed rcu free inodes are flushed before we 1387 + * destroy cache. 1388 + */ 1389 + rcu_barrier(); 1390 + 1385 1391 if (nilfs_inode_cachep) 1386 1392 kmem_cache_destroy(nilfs_inode_cachep); 1387 1393 if (nilfs_transaction_cachep)

+33 -54

fs/notify/fanotify/fanotify_user.c

··· 58 58 return fsnotify_remove_notify_event(group); 59 59 } 60 60 61 - static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) 61 + static int create_fd(struct fsnotify_group *group, 62 + struct fsnotify_event *event, 63 + struct file **file) 62 64 { 63 65 int client_fd; 64 66 struct file *new_file; ··· 100 98 put_unused_fd(client_fd); 101 99 client_fd = PTR_ERR(new_file); 102 100 } else { 103 - fd_install(client_fd, new_file); 101 + *file = new_file; 104 102 } 105 103 106 104 return client_fd; ··· 108 106 109 107 static int fill_event_metadata(struct fsnotify_group *group, 110 108 struct fanotify_event_metadata *metadata, 111 - struct fsnotify_event *event) 109 + struct fsnotify_event *event, 110 + struct file **file) 112 111 { 113 112 int ret = 0; 114 113 115 114 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 116 115 group, metadata, event); 117 116 117 + *file = NULL; 118 118 metadata->event_len = FAN_EVENT_METADATA_LEN; 119 119 metadata->metadata_len = FAN_EVENT_METADATA_LEN; 120 120 metadata->vers = FANOTIFY_METADATA_VERSION; ··· 125 121 if (unlikely(event->mask & FAN_Q_OVERFLOW)) 126 122 metadata->fd = FAN_NOFD; 127 123 else { 128 - metadata->fd = create_fd(group, event); 124 + metadata->fd = create_fd(group, event, file); 129 125 if (metadata->fd < 0) 130 126 ret = metadata->fd; 131 127 } ··· 224 220 return 0; 225 221 } 226 222 227 - static void remove_access_response(struct fsnotify_group *group, 228 - struct fsnotify_event *event, 229 - __s32 fd) 230 - { 231 - struct fanotify_response_event *re; 232 - 233 - if (!(event->mask & FAN_ALL_PERM_EVENTS)) 234 - return; 235 - 236 - re = dequeue_re(group, fd); 237 - if (!re) 238 - return; 239 - 240 - BUG_ON(re->event != event); 241 - 242 - kmem_cache_free(fanotify_response_event_cache, re); 243 - 244 - return; 245 - } 246 223 #else 247 224 static int prepare_for_access_response(struct fsnotify_group *group, 248 225 struct fsnotify_event *event, ··· 232 247 return 0; 233 248 } 234 249 235 - static void remove_access_response(struct fsnotify_group *group, 236 - struct fsnotify_event *event, 237 - __s32 fd) 238 - { 239 - return; 240 - } 241 250 #endif 242 251 243 252 static ssize_t copy_event_to_user(struct fsnotify_group *group, ··· 239 260 char __user *buf) 240 261 { 241 262 struct fanotify_event_metadata fanotify_event_metadata; 263 + struct file *f; 242 264 int fd, ret; 243 265 244 266 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 245 267 246 - ret = fill_event_metadata(group, &fanotify_event_metadata, event); 268 + ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); 247 269 if (ret < 0) 248 270 goto out; 249 271 250 272 fd = fanotify_event_metadata.fd; 273 + ret = -EFAULT; 274 + if (copy_to_user(buf, &fanotify_event_metadata, 275 + fanotify_event_metadata.event_len)) 276 + goto out_close_fd; 277 + 251 278 ret = prepare_for_access_response(group, event, fd); 252 279 if (ret) 253 280 goto out_close_fd; 254 281 255 - ret = -EFAULT; 256 - if (copy_to_user(buf, &fanotify_event_metadata, 257 - fanotify_event_metadata.event_len)) 258 - goto out_kill_access_response; 259 - 282 + fd_install(fd, f); 260 283 return fanotify_event_metadata.event_len; 261 284 262 - out_kill_access_response: 263 - remove_access_response(group, event, fd); 264 285 out_close_fd: 265 - if (fd != FAN_NOFD) 266 - sys_close(fd); 286 + if (fd != FAN_NOFD) { 287 + put_unused_fd(fd); 288 + fput(f); 289 + } 267 290 out: 268 291 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 269 292 if (event->mask & FAN_ALL_PERM_EVENTS) { ··· 451 470 dfd, filename, flags); 452 471 453 472 if (filename == NULL) { 454 - struct file *file; 455 - int fput_needed; 473 + struct fd f = fdget(dfd); 456 474 457 475 ret = -EBADF; 458 - file = fget_light(dfd, &fput_needed); 459 - if (!file) 476 + if (!f.file) 460 477 goto out; 461 478 462 479 ret = -ENOTDIR; 463 480 if ((flags & FAN_MARK_ONLYDIR) && 464 - !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { 465 - fput_light(file, fput_needed); 481 + !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { 482 + fdput(f); 466 483 goto out; 467 484 } 468 485 469 - *path = file->f_path; 486 + *path = f.file->f_path; 470 487 path_get(path); 471 - fput_light(file, fput_needed); 488 + fdput(f); 472 489 } else { 473 490 unsigned int lookup_flags = 0; 474 491 ··· 746 767 struct inode *inode = NULL; 747 768 struct vfsmount *mnt = NULL; 748 769 struct fsnotify_group *group; 749 - struct file *filp; 770 + struct fd f; 750 771 struct path path; 751 - int ret, fput_needed; 772 + int ret; 752 773 753 774 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 754 775 __func__, fanotify_fd, flags, dfd, pathname, mask); ··· 782 803 #endif 783 804 return -EINVAL; 784 805 785 - filp = fget_light(fanotify_fd, &fput_needed); 786 - if (unlikely(!filp)) 806 + f = fdget(fanotify_fd); 807 + if (unlikely(!f.file)) 787 808 return -EBADF; 788 809 789 810 /* verify that this is indeed an fanotify instance */ 790 811 ret = -EINVAL; 791 - if (unlikely(filp->f_op != &fanotify_fops)) 812 + if (unlikely(f.file->f_op != &fanotify_fops)) 792 813 goto fput_and_out; 793 - group = filp->private_data; 814 + group = f.file->private_data; 794 815 795 816 /* 796 817 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not ··· 837 858 838 859 path_put(&path); 839 860 fput_and_out: 840 - fput_light(filp, fput_needed); 861 + fdput(f); 841 862 return ret; 842 863 } 843 864

+14 -14

fs/notify/inotify/inotify_user.c

··· 757 757 struct fsnotify_group *group; 758 758 struct inode *inode; 759 759 struct path path; 760 - struct file *filp; 761 - int ret, fput_needed; 760 + struct fd f; 761 + int ret; 762 762 unsigned flags = 0; 763 763 764 - filp = fget_light(fd, &fput_needed); 765 - if (unlikely(!filp)) 764 + f = fdget(fd); 765 + if (unlikely(!f.file)) 766 766 return -EBADF; 767 767 768 768 /* verify that this is indeed an inotify instance */ 769 - if (unlikely(filp->f_op != &inotify_fops)) { 769 + if (unlikely(f.file->f_op != &inotify_fops)) { 770 770 ret = -EINVAL; 771 771 goto fput_and_out; 772 772 } ··· 782 782 783 783 /* inode held in place by reference to path; group by fget on fd */ 784 784 inode = path.dentry->d_inode; 785 - group = filp->private_data; 785 + group = f.file->private_data; 786 786 787 787 /* create/update an inode mark */ 788 788 ret = inotify_update_watch(group, inode, mask); 789 789 path_put(&path); 790 790 fput_and_out: 791 - fput_light(filp, fput_needed); 791 + fdput(f); 792 792 return ret; 793 793 } 794 794 ··· 796 796 { 797 797 struct fsnotify_group *group; 798 798 struct inotify_inode_mark *i_mark; 799 - struct file *filp; 800 - int ret = 0, fput_needed; 799 + struct fd f; 800 + int ret = 0; 801 801 802 - filp = fget_light(fd, &fput_needed); 803 - if (unlikely(!filp)) 802 + f = fdget(fd); 803 + if (unlikely(!f.file)) 804 804 return -EBADF; 805 805 806 806 /* verify that this is indeed an inotify instance */ 807 807 ret = -EINVAL; 808 - if (unlikely(filp->f_op != &inotify_fops)) 808 + if (unlikely(f.file->f_op != &inotify_fops)) 809 809 goto out; 810 810 811 - group = filp->private_data; 811 + group = f.file->private_data; 812 812 813 813 ret = -EINVAL; 814 814 i_mark = inotify_idr_find(group, wd); ··· 823 823 fsnotify_put_mark(&i_mark->fsn_mark); 824 824 825 825 out: 826 - fput_light(filp, fput_needed); 826 + fdput(f); 827 827 return ret; 828 828 } 829 829

+6

fs/ntfs/super.c

··· 3193 3193 ntfs_debug("Unregistering NTFS driver."); 3194 3194 3195 3195 unregister_filesystem(&ntfs_fs_type); 3196 + 3197 + /* 3198 + * Make sure all delayed rcu free inodes are flushed before we 3199 + * destroy cache. 3200 + */ 3201 + rcu_barrier(); 3196 3202 kmem_cache_destroy(ntfs_big_inode_cache); 3197 3203 kmem_cache_destroy(ntfs_inode_cache); 3198 3204 kmem_cache_destroy(ntfs_name_cache);

+19 -19

fs/ocfs2/cluster/heartbeat.c

··· 1746 1746 long fd; 1747 1747 int sectsize; 1748 1748 char *p = (char *)page; 1749 - struct file *filp = NULL; 1750 - struct inode *inode = NULL; 1749 + struct fd f; 1750 + struct inode *inode; 1751 1751 ssize_t ret = -EINVAL; 1752 1752 int live_threshold; 1753 1753 ··· 1766 1766 if (fd < 0 || fd >= INT_MAX) 1767 1767 goto out; 1768 1768 1769 - filp = fget(fd); 1770 - if (filp == NULL) 1769 + f = fdget(fd); 1770 + if (f.file == NULL) 1771 1771 goto out; 1772 1772 1773 1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || 1774 1774 reg->hr_block_bytes == 0) 1775 - goto out; 1775 + goto out2; 1776 1776 1777 - inode = igrab(filp->f_mapping->host); 1777 + inode = igrab(f.file->f_mapping->host); 1778 1778 if (inode == NULL) 1779 - goto out; 1779 + goto out2; 1780 1780 1781 1781 if (!S_ISBLK(inode->i_mode)) 1782 - goto out; 1782 + goto out3; 1783 1783 1784 - reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1784 + reg->hr_bdev = I_BDEV(f.file->f_mapping->host); 1785 1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); 1786 1786 if (ret) { 1787 1787 reg->hr_bdev = NULL; 1788 - goto out; 1788 + goto out3; 1789 1789 } 1790 1790 inode = NULL; 1791 1791 ··· 1797 1797 "blocksize %u incorrect for device, expected %d", 1798 1798 reg->hr_block_bytes, sectsize); 1799 1799 ret = -EINVAL; 1800 - goto out; 1800 + goto out3; 1801 1801 } 1802 1802 1803 1803 o2hb_init_region_params(reg); ··· 1811 1811 ret = o2hb_map_slot_data(reg); 1812 1812 if (ret) { 1813 1813 mlog_errno(ret); 1814 - goto out; 1814 + goto out3; 1815 1815 } 1816 1816 1817 1817 ret = o2hb_populate_slot_data(reg); 1818 1818 if (ret) { 1819 1819 mlog_errno(ret); 1820 - goto out; 1820 + goto out3; 1821 1821 } 1822 1822 1823 1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout); ··· 1847 1847 if (IS_ERR(hb_task)) { 1848 1848 ret = PTR_ERR(hb_task); 1849 1849 mlog_errno(ret); 1850 - goto out; 1850 + goto out3; 1851 1851 } 1852 1852 1853 1853 spin_lock(&o2hb_live_lock); ··· 1863 1863 1864 1864 if (reg->hr_aborted_start) { 1865 1865 ret = -EIO; 1866 - goto out; 1866 + goto out3; 1867 1867 } 1868 1868 1869 1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */ ··· 1882 1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", 1883 1883 config_item_name(&reg->hr_item), reg->hr_dev_name); 1884 1884 1885 + out3: 1886 + iput(inode); 1887 + out2: 1888 + fdput(f); 1885 1889 out: 1886 - if (filp) 1887 - fput(filp); 1888 - if (inode) 1889 - iput(inode); 1890 1890 if (ret < 0) { 1891 1891 if (reg->hr_bdev) { 1892 1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);

+5

fs/ocfs2/dlmfs/dlmfs.c

··· 691 691 flush_workqueue(user_dlm_worker); 692 692 destroy_workqueue(user_dlm_worker); 693 693 694 + /* 695 + * Make sure all delayed rcu free inodes are flushed before we 696 + * destroy cache. 697 + */ 698 + rcu_barrier(); 694 699 kmem_cache_destroy(dlmfs_inode_cache); 695 700 696 701 bdi_destroy(&dlmfs_backing_dev_info);

+5

fs/ocfs2/super.c

··· 1818 1818 1819 1819 static void ocfs2_free_mem_caches(void) 1820 1820 { 1821 + /* 1822 + * Make sure all delayed rcu free inodes are flushed before we 1823 + * destroy cache. 1824 + */ 1825 + rcu_barrier(); 1821 1826 if (ocfs2_inode_cachep) 1822 1827 kmem_cache_destroy(ocfs2_inode_cachep); 1823 1828 ocfs2_inode_cachep = NULL;

+30 -100

fs/open.c

··· 132 132 133 133 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 134 134 { 135 - struct inode * inode; 135 + struct inode *inode; 136 136 struct dentry *dentry; 137 - struct file * file; 137 + struct fd f; 138 138 int error; 139 139 140 140 error = -EINVAL; 141 141 if (length < 0) 142 142 goto out; 143 143 error = -EBADF; 144 - file = fget(fd); 145 - if (!file) 144 + f = fdget(fd); 145 + if (!f.file) 146 146 goto out; 147 147 148 148 /* explicitly opened as large or we are on 64-bit box */ 149 - if (file->f_flags & O_LARGEFILE) 149 + if (f.file->f_flags & O_LARGEFILE) 150 150 small = 0; 151 151 152 - dentry = file->f_path.dentry; 152 + dentry = f.file->f_path.dentry; 153 153 inode = dentry->d_inode; 154 154 error = -EINVAL; 155 - if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 155 + if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) 156 156 goto out_putf; 157 157 158 158 error = -EINVAL; ··· 165 165 goto out_putf; 166 166 167 167 sb_start_write(inode->i_sb); 168 - error = locks_verify_truncate(inode, file, length); 168 + error = locks_verify_truncate(inode, f.file, length); 169 169 if (!error) 170 - error = security_path_truncate(&file->f_path); 170 + error = security_path_truncate(&f.file->f_path); 171 171 if (!error) 172 - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 + error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); 173 173 sb_end_write(inode->i_sb); 174 174 out_putf: 175 - fput(file); 175 + fdput(f); 176 176 out: 177 177 return error; 178 178 } ··· 276 276 277 277 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 278 278 { 279 - struct file *file; 279 + struct fd f = fdget(fd); 280 280 int error = -EBADF; 281 281 282 - file = fget(fd); 283 - if (file) { 284 - error = do_fallocate(file, mode, offset, len); 285 - fput(file); 282 + if (f.file) { 283 + error = do_fallocate(f.file, mode, offset, len); 284 + fdput(f); 286 285 } 287 - 288 286 return error; 289 287 } 290 288 ··· 398 400 399 401 SYSCALL_DEFINE1(fchdir, unsigned int, fd) 400 402 { 401 - struct file *file; 403 + struct fd f = fdget_raw(fd); 402 404 struct inode *inode; 403 - int error, fput_needed; 405 + int error = -EBADF; 404 406 405 407 error = -EBADF; 406 - file = fget_raw_light(fd, &fput_needed); 407 - if (!file) 408 + if (!f.file) 408 409 goto out; 409 410 410 - inode = file->f_path.dentry->d_inode; 411 + inode = f.file->f_path.dentry->d_inode; 411 412 412 413 error = -ENOTDIR; 413 414 if (!S_ISDIR(inode->i_mode)) ··· 414 417 415 418 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); 416 419 if (!error) 417 - set_fs_pwd(current->fs, &file->f_path); 420 + set_fs_pwd(current->fs, &f.file->f_path); 418 421 out_putf: 419 - fput_light(file, fput_needed); 422 + fdput(f); 420 423 out: 421 424 return error; 422 425 } ··· 579 582 580 583 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) 581 584 { 582 - struct file * file; 585 + struct fd f = fdget(fd); 583 586 int error = -EBADF; 584 - struct dentry * dentry; 585 587 586 - file = fget(fd); 587 - if (!file) 588 + if (!f.file) 588 589 goto out; 589 590 590 - error = mnt_want_write_file(file); 591 + error = mnt_want_write_file(f.file); 591 592 if (error) 592 593 goto out_fput; 593 - dentry = file->f_path.dentry; 594 - audit_inode(NULL, dentry); 595 - error = chown_common(&file->f_path, user, group); 596 - mnt_drop_write_file(file); 594 + audit_inode(NULL, f.file->f_path.dentry); 595 + error = chown_common(&f.file->f_path, user, group); 596 + mnt_drop_write_file(f.file); 597 597 out_fput: 598 - fput(file); 598 + fdput(f); 599 599 out: 600 600 return error; 601 601 } ··· 797 803 } 798 804 EXPORT_SYMBOL(dentry_open); 799 805 800 - static void __put_unused_fd(struct files_struct *files, unsigned int fd) 801 - { 802 - struct fdtable *fdt = files_fdtable(files); 803 - __clear_open_fd(fd, fdt); 804 - if (fd < files->next_fd) 805 - files->next_fd = fd; 806 - } 807 - 808 - void put_unused_fd(unsigned int fd) 809 - { 810 - struct files_struct *files = current->files; 811 - spin_lock(&files->file_lock); 812 - __put_unused_fd(files, fd); 813 - spin_unlock(&files->file_lock); 814 - } 815 - 816 - EXPORT_SYMBOL(put_unused_fd); 817 - 818 - /* 819 - * Install a file pointer in the fd array. 820 - * 821 - * The VFS is full of places where we drop the files lock between 822 - * setting the open_fds bitmap and installing the file in the file 823 - * array. At any such point, we are vulnerable to a dup2() race 824 - * installing a file in the array before us. We need to detect this and 825 - * fput() the struct file we are about to overwrite in this case. 826 - * 827 - * It should never happen - if we allow dup2() do it, _really_ bad things 828 - * will follow. 829 - */ 830 - 831 - void fd_install(unsigned int fd, struct file *file) 832 - { 833 - struct files_struct *files = current->files; 834 - struct fdtable *fdt; 835 - spin_lock(&files->file_lock); 836 - fdt = files_fdtable(files); 837 - BUG_ON(fdt->fd[fd] != NULL); 838 - rcu_assign_pointer(fdt->fd[fd], file); 839 - spin_unlock(&files->file_lock); 840 - } 841 - 842 - EXPORT_SYMBOL(fd_install); 843 - 844 806 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 845 807 { 846 808 int lookup_flags = 0; ··· 808 858 op->mode = 0; 809 859 810 860 /* Must never be set by userspace */ 811 - flags &= ~FMODE_NONOTIFY; 861 + flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; 812 862 813 863 /* 814 864 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only ··· 988 1038 */ 989 1039 SYSCALL_DEFINE1(close, unsigned int, fd) 990 1040 { 991 - struct file * filp; 992 - struct files_struct *files = current->files; 993 - struct fdtable *fdt; 994 - int retval; 995 - 996 - spin_lock(&files->file_lock); 997 - fdt = files_fdtable(files); 998 - if (fd >= fdt->max_fds) 999 - goto out_unlock; 1000 - filp = fdt->fd[fd]; 1001 - if (!filp) 1002 - goto out_unlock; 1003 - rcu_assign_pointer(fdt->fd[fd], NULL); 1004 - __clear_close_on_exec(fd, fdt); 1005 - __put_unused_fd(files, fd); 1006 - spin_unlock(&files->file_lock); 1007 - retval = filp_close(filp, files); 1041 + int retval = __close_fd(current->files, fd); 1008 1042 1009 1043 /* can't restart close syscall because file table entry was cleared */ 1010 1044 if (unlikely(retval == -ERESTARTSYS || ··· 998 1064 retval = -EINTR; 999 1065 1000 1066 return retval; 1001 - 1002 - out_unlock: 1003 - spin_unlock(&files->file_lock); 1004 - return -EBADF; 1005 1067 } 1006 1068 EXPORT_SYMBOL(sys_close); 1007 1069

+5

fs/openpromfs/inode.c

··· 463 463 static void __exit exit_openprom_fs(void) 464 464 { 465 465 unregister_filesystem(&openprom_fs_type); 466 + /* 467 + * Make sure all delayed rcu free inodes are flushed before we 468 + * destroy cache. 469 + */ 470 + rcu_barrier(); 466 471 kmem_cache_destroy(op_inode_cachep); 467 472 } 468 473

+22 -9

fs/pipe.c

··· 1064 1064 return err; 1065 1065 } 1066 1066 1067 - int do_pipe_flags(int *fd, int flags) 1067 + static int __do_pipe_flags(int *fd, struct file **files, int flags) 1068 1068 { 1069 - struct file *files[2]; 1070 1069 int error; 1071 1070 int fdw, fdr; 1072 1071 ··· 1087 1088 fdw = error; 1088 1089 1089 1090 audit_fd_pair(fdr, fdw); 1090 - fd_install(fdr, files[0]); 1091 - fd_install(fdw, files[1]); 1092 1091 fd[0] = fdr; 1093 1092 fd[1] = fdw; 1094 - 1095 1093 return 0; 1096 1094 1097 1095 err_fdr: ··· 1099 1103 return error; 1100 1104 } 1101 1105 1106 + int do_pipe_flags(int *fd, int flags) 1107 + { 1108 + struct file *files[2]; 1109 + int error = __do_pipe_flags(fd, files, flags); 1110 + if (!error) { 1111 + fd_install(fd[0], files[0]); 1112 + fd_install(fd[1], files[1]); 1113 + } 1114 + return error; 1115 + } 1116 + 1102 1117 /* 1103 1118 * sys_pipe() is the normal C calling standard for creating 1104 1119 * a pipe. It's not the way Unix traditionally does this, though. 1105 1120 */ 1106 1121 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1107 1122 { 1123 + struct file *files[2]; 1108 1124 int fd[2]; 1109 1125 int error; 1110 1126 1111 - error = do_pipe_flags(fd, flags); 1127 + error = __do_pipe_flags(fd, files, flags); 1112 1128 if (!error) { 1113 - if (copy_to_user(fildes, fd, sizeof(fd))) { 1114 - sys_close(fd[0]); 1115 - sys_close(fd[1]); 1129 + if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { 1130 + fput(files[0]); 1131 + fput(files[1]); 1132 + put_unused_fd(fd[0]); 1133 + put_unused_fd(fd[1]); 1116 1134 error = -EFAULT; 1135 + } else { 1136 + fd_install(fd[0], files[0]); 1137 + fd_install(fd[1], files[1]); 1117 1138 } 1118 1139 } 1119 1140 return error;

+1 -1

fs/proc/Makefile

··· 8 8 proc-$(CONFIG_MMU) := mmu.o task_mmu.o 9 9 10 10 proc-y += inode.o root.o base.o generic.o array.o \ 11 - proc_tty.o 11 + proc_tty.o fd.o 12 12 proc-y += cmdline.o 13 13 proc-y += consoles.o 14 14 proc-y += cpuinfo.o

+11 -406

fs/proc/base.c

··· 90 90 #endif 91 91 #include <trace/events/oom.h> 92 92 #include "internal.h" 93 + #include "fd.h" 93 94 94 95 /* NOTE: 95 96 * Implementing inode permission operations in /proc is almost ··· 136 135 NOD(NAME, (S_IFREG|(MODE)), \ 137 136 NULL, &proc_single_file_operations, \ 138 137 { .proc_show = show } ) 139 - 140 - static int proc_fd_permission(struct inode *inode, int mask); 141 138 142 139 /* 143 140 * Count the number of hardlinks for the pid_entry table, excluding the . ··· 1499 1500 return error; 1500 1501 } 1501 1502 1502 - static const struct inode_operations proc_pid_link_inode_operations = { 1503 + const struct inode_operations proc_pid_link_inode_operations = { 1503 1504 .readlink = proc_pid_readlink, 1504 1505 .follow_link = proc_pid_follow_link, 1505 1506 .setattr = proc_setattr, ··· 1507 1508 1508 1509 1509 1510 /* building an inode */ 1510 - 1511 - static int task_dumpable(struct task_struct *task) 1512 - { 1513 - int dumpable = 0; 1514 - struct mm_struct *mm; 1515 - 1516 - task_lock(task); 1517 - mm = task->mm; 1518 - if (mm) 1519 - dumpable = get_dumpable(mm); 1520 - task_unlock(task); 1521 - if(dumpable == 1) 1522 - return 1; 1523 - return 0; 1524 - } 1525 1511 1526 1512 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1527 1513 { ··· 1633 1649 return 0; 1634 1650 } 1635 1651 1636 - static int pid_delete_dentry(const struct dentry * dentry) 1637 - { 1638 - /* Is the task we represent dead? 1639 - * If so, then don't put the dentry on the lru list, 1640 - * kill it immediately. 1641 - */ 1642 - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1643 - } 1644 - 1645 1652 const struct dentry_operations pid_dentry_operations = 1646 1653 { 1647 1654 .d_revalidate = pid_revalidate, ··· 1694 1719 ino = 1; 1695 1720 return filldir(dirent, name, len, filp->f_pos, ino, type); 1696 1721 } 1697 - 1698 - static unsigned name_to_int(struct dentry *dentry) 1699 - { 1700 - const char *name = dentry->d_name.name; 1701 - int len = dentry->d_name.len; 1702 - unsigned n = 0; 1703 - 1704 - if (len > 1 && *name == '0') 1705 - goto out; 1706 - while (len-- > 0) { 1707 - unsigned c = *name++ - '0'; 1708 - if (c > 9) 1709 - goto out; 1710 - if (n >= (~0U-9)/10) 1711 - goto out; 1712 - n *= 10; 1713 - n += c; 1714 - } 1715 - return n; 1716 - out: 1717 - return ~0U; 1718 - } 1719 - 1720 - #define PROC_FDINFO_MAX 64 1721 - 1722 - static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1723 - { 1724 - struct task_struct *task = get_proc_task(inode); 1725 - struct files_struct *files = NULL; 1726 - struct file *file; 1727 - int fd = proc_fd(inode); 1728 - 1729 - if (task) { 1730 - files = get_files_struct(task); 1731 - put_task_struct(task); 1732 - } 1733 - if (files) { 1734 - /* 1735 - * We are not taking a ref to the file structure, so we must 1736 - * hold ->file_lock. 1737 - */ 1738 - spin_lock(&files->file_lock); 1739 - file = fcheck_files(files, fd); 1740 - if (file) { 1741 - unsigned int f_flags; 1742 - struct fdtable *fdt; 1743 - 1744 - fdt = files_fdtable(files); 1745 - f_flags = file->f_flags & ~O_CLOEXEC; 1746 - if (close_on_exec(fd, fdt)) 1747 - f_flags |= O_CLOEXEC; 1748 - 1749 - if (path) { 1750 - *path = file->f_path; 1751 - path_get(&file->f_path); 1752 - } 1753 - if (info) 1754 - snprintf(info, PROC_FDINFO_MAX, 1755 - "pos:\t%lli\n" 1756 - "flags:\t0%o\n", 1757 - (long long) file->f_pos, 1758 - f_flags); 1759 - spin_unlock(&files->file_lock); 1760 - put_files_struct(files); 1761 - return 0; 1762 - } 1763 - spin_unlock(&files->file_lock); 1764 - put_files_struct(files); 1765 - } 1766 - return -ENOENT; 1767 - } 1768 - 1769 - static int proc_fd_link(struct dentry *dentry, struct path *path) 1770 - { 1771 - return proc_fd_info(dentry->d_inode, path, NULL); 1772 - } 1773 - 1774 - static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) 1775 - { 1776 - struct inode *inode; 1777 - struct task_struct *task; 1778 - int fd; 1779 - struct files_struct *files; 1780 - const struct cred *cred; 1781 - 1782 - if (flags & LOOKUP_RCU) 1783 - return -ECHILD; 1784 - 1785 - inode = dentry->d_inode; 1786 - task = get_proc_task(inode); 1787 - fd = proc_fd(inode); 1788 - 1789 - if (task) { 1790 - files = get_files_struct(task); 1791 - if (files) { 1792 - struct file *file; 1793 - rcu_read_lock(); 1794 - file = fcheck_files(files, fd); 1795 - if (file) { 1796 - unsigned f_mode = file->f_mode; 1797 - 1798 - rcu_read_unlock(); 1799 - put_files_struct(files); 1800 - 1801 - if (task_dumpable(task)) { 1802 - rcu_read_lock(); 1803 - cred = __task_cred(task); 1804 - inode->i_uid = cred->euid; 1805 - inode->i_gid = cred->egid; 1806 - rcu_read_unlock(); 1807 - } else { 1808 - inode->i_uid = GLOBAL_ROOT_UID; 1809 - inode->i_gid = GLOBAL_ROOT_GID; 1810 - } 1811 - 1812 - if (S_ISLNK(inode->i_mode)) { 1813 - unsigned i_mode = S_IFLNK; 1814 - if (f_mode & FMODE_READ) 1815 - i_mode |= S_IRUSR | S_IXUSR; 1816 - if (f_mode & FMODE_WRITE) 1817 - i_mode |= S_IWUSR | S_IXUSR; 1818 - inode->i_mode = i_mode; 1819 - } 1820 - 1821 - security_task_to_inode(task, inode); 1822 - put_task_struct(task); 1823 - return 1; 1824 - } 1825 - rcu_read_unlock(); 1826 - put_files_struct(files); 1827 - } 1828 - put_task_struct(task); 1829 - } 1830 - d_drop(dentry); 1831 - return 0; 1832 - } 1833 - 1834 - static const struct dentry_operations tid_fd_dentry_operations = 1835 - { 1836 - .d_revalidate = tid_fd_revalidate, 1837 - .d_delete = pid_delete_dentry, 1838 - }; 1839 - 1840 - static struct dentry *proc_fd_instantiate(struct inode *dir, 1841 - struct dentry *dentry, struct task_struct *task, const void *ptr) 1842 - { 1843 - unsigned fd = (unsigned long)ptr; 1844 - struct inode *inode; 1845 - struct proc_inode *ei; 1846 - struct dentry *error = ERR_PTR(-ENOENT); 1847 - 1848 - inode = proc_pid_make_inode(dir->i_sb, task); 1849 - if (!inode) 1850 - goto out; 1851 - ei = PROC_I(inode); 1852 - ei->fd = fd; 1853 - 1854 - inode->i_mode = S_IFLNK; 1855 - inode->i_op = &proc_pid_link_inode_operations; 1856 - inode->i_size = 64; 1857 - ei->op.proc_get_link = proc_fd_link; 1858 - d_set_d_op(dentry, &tid_fd_dentry_operations); 1859 - d_add(dentry, inode); 1860 - /* Close the race of the process dying before we return the dentry */ 1861 - if (tid_fd_revalidate(dentry, 0)) 1862 - error = NULL; 1863 - 1864 - out: 1865 - return error; 1866 - } 1867 - 1868 - static struct dentry *proc_lookupfd_common(struct inode *dir, 1869 - struct dentry *dentry, 1870 - instantiate_t instantiate) 1871 - { 1872 - struct task_struct *task = get_proc_task(dir); 1873 - unsigned fd = name_to_int(dentry); 1874 - struct dentry *result = ERR_PTR(-ENOENT); 1875 - 1876 - if (!task) 1877 - goto out_no_task; 1878 - if (fd == ~0U) 1879 - goto out; 1880 - 1881 - result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); 1882 - out: 1883 - put_task_struct(task); 1884 - out_no_task: 1885 - return result; 1886 - } 1887 - 1888 - static int proc_readfd_common(struct file * filp, void * dirent, 1889 - filldir_t filldir, instantiate_t instantiate) 1890 - { 1891 - struct dentry *dentry = filp->f_path.dentry; 1892 - struct inode *inode = dentry->d_inode; 1893 - struct task_struct *p = get_proc_task(inode); 1894 - unsigned int fd, ino; 1895 - int retval; 1896 - struct files_struct * files; 1897 - 1898 - retval = -ENOENT; 1899 - if (!p) 1900 - goto out_no_task; 1901 - retval = 0; 1902 - 1903 - fd = filp->f_pos; 1904 - switch (fd) { 1905 - case 0: 1906 - if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1907 - goto out; 1908 - filp->f_pos++; 1909 - case 1: 1910 - ino = parent_ino(dentry); 1911 - if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1912 - goto out; 1913 - filp->f_pos++; 1914 - default: 1915 - files = get_files_struct(p); 1916 - if (!files) 1917 - goto out; 1918 - rcu_read_lock(); 1919 - for (fd = filp->f_pos-2; 1920 - fd < files_fdtable(files)->max_fds; 1921 - fd++, filp->f_pos++) { 1922 - char name[PROC_NUMBUF]; 1923 - int len; 1924 - int rv; 1925 - 1926 - if (!fcheck_files(files, fd)) 1927 - continue; 1928 - rcu_read_unlock(); 1929 - 1930 - len = snprintf(name, sizeof(name), "%d", fd); 1931 - rv = proc_fill_cache(filp, dirent, filldir, 1932 - name, len, instantiate, p, 1933 - (void *)(unsigned long)fd); 1934 - if (rv < 0) 1935 - goto out_fd_loop; 1936 - rcu_read_lock(); 1937 - } 1938 - rcu_read_unlock(); 1939 - out_fd_loop: 1940 - put_files_struct(files); 1941 - } 1942 - out: 1943 - put_task_struct(p); 1944 - out_no_task: 1945 - return retval; 1946 - } 1947 - 1948 - static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 1949 - unsigned int flags) 1950 - { 1951 - return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 1952 - } 1953 - 1954 - static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) 1955 - { 1956 - return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); 1957 - } 1958 - 1959 - static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, 1960 - size_t len, loff_t *ppos) 1961 - { 1962 - char tmp[PROC_FDINFO_MAX]; 1963 - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); 1964 - if (!err) 1965 - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); 1966 - return err; 1967 - } 1968 - 1969 - static const struct file_operations proc_fdinfo_file_operations = { 1970 - .open = nonseekable_open, 1971 - .read = proc_fdinfo_read, 1972 - .llseek = no_llseek, 1973 - }; 1974 - 1975 - static const struct file_operations proc_fd_operations = { 1976 - .read = generic_read_dir, 1977 - .readdir = proc_readfd, 1978 - .llseek = default_llseek, 1979 - }; 1980 1722 1981 1723 #ifdef CONFIG_CHECKPOINT_RESTORE 1982 1724 ··· 1813 2121 } 1814 2122 1815 2123 struct map_files_info { 1816 - struct file *file; 2124 + fmode_t mode; 1817 2125 unsigned long len; 1818 2126 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ 1819 2127 }; ··· 1822 2130 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 1823 2131 struct task_struct *task, const void *ptr) 1824 2132 { 1825 - const struct file *file = ptr; 2133 + fmode_t mode = (fmode_t)(unsigned long)ptr; 1826 2134 struct proc_inode *ei; 1827 2135 struct inode *inode; 1828 - 1829 - if (!file) 1830 - return ERR_PTR(-ENOENT); 1831 2136 1832 2137 inode = proc_pid_make_inode(dir->i_sb, task); 1833 2138 if (!inode) ··· 1837 2148 inode->i_size = 64; 1838 2149 inode->i_mode = S_IFLNK; 1839 2150 1840 - if (file->f_mode & FMODE_READ) 2151 + if (mode & FMODE_READ) 1841 2152 inode->i_mode |= S_IRUSR; 1842 - if (file->f_mode & FMODE_WRITE) 2153 + if (mode & FMODE_WRITE) 1843 2154 inode->i_mode |= S_IWUSR; 1844 2155 1845 2156 d_set_d_op(dentry, &tid_map_files_dentry_operations); ··· 1883 2194 if (!vma) 1884 2195 goto out_no_vma; 1885 2196 1886 - result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); 2197 + result = proc_map_files_instantiate(dir, dentry, task, 2198 + (void *)(unsigned long)vma->vm_file->f_mode); 1887 2199 1888 2200 out_no_vma: 1889 2201 up_read(&mm->mmap_sem); ··· 1985 2295 if (++pos <= filp->f_pos) 1986 2296 continue; 1987 2297 1988 - get_file(vma->vm_file); 1989 - info.file = vma->vm_file; 2298 + info.mode = vma->vm_file->f_mode; 1990 2299 info.len = snprintf(info.name, 1991 2300 sizeof(info.name), "%lx-%lx", 1992 2301 vma->vm_start, vma->vm_end); ··· 2000 2311 ret = proc_fill_cache(filp, dirent, filldir, 2001 2312 p->name, p->len, 2002 2313 proc_map_files_instantiate, 2003 - task, p->file); 2314 + task, 2315 + (void *)(unsigned long)p->mode); 2004 2316 if (ret) 2005 2317 break; 2006 2318 filp->f_pos++; 2007 - fput(p->file); 2008 - } 2009 - for (; i < nr_files; i++) { 2010 - /* 2011 - * In case of error don't forget 2012 - * to put rest of file refs. 2013 - */ 2014 - p = flex_array_get(fa, i); 2015 - fput(p->file); 2016 2319 } 2017 2320 if (fa) 2018 2321 flex_array_free(fa); ··· 2025 2344 }; 2026 2345 2027 2346 #endif /* CONFIG_CHECKPOINT_RESTORE */ 2028 - 2029 - /* 2030 - * /proc/pid/fd needs a special permission handler so that a process can still 2031 - * access /proc/self/fd after it has executed a setuid(). 2032 - */ 2033 - static int proc_fd_permission(struct inode *inode, int mask) 2034 - { 2035 - int rv = generic_permission(inode, mask); 2036 - if (rv == 0) 2037 - return 0; 2038 - if (task_pid(current) == proc_pid(inode)) 2039 - rv = 0; 2040 - return rv; 2041 - } 2042 - 2043 - /* 2044 - * proc directories can do almost nothing.. 2045 - */ 2046 - static const struct inode_operations proc_fd_inode_operations = { 2047 - .lookup = proc_lookupfd, 2048 - .permission = proc_fd_permission, 2049 - .setattr = proc_setattr, 2050 - }; 2051 - 2052 - static struct dentry *proc_fdinfo_instantiate(struct inode *dir, 2053 - struct dentry *dentry, struct task_struct *task, const void *ptr) 2054 - { 2055 - unsigned fd = (unsigned long)ptr; 2056 - struct inode *inode; 2057 - struct proc_inode *ei; 2058 - struct dentry *error = ERR_PTR(-ENOENT); 2059 - 2060 - inode = proc_pid_make_inode(dir->i_sb, task); 2061 - if (!inode) 2062 - goto out; 2063 - ei = PROC_I(inode); 2064 - ei->fd = fd; 2065 - inode->i_mode = S_IFREG | S_IRUSR; 2066 - inode->i_fop = &proc_fdinfo_file_operations; 2067 - d_set_d_op(dentry, &tid_fd_dentry_operations); 2068 - d_add(dentry, inode); 2069 - /* Close the race of the process dying before we return the dentry */ 2070 - if (tid_fd_revalidate(dentry, 0)) 2071 - error = NULL; 2072 - 2073 - out: 2074 - return error; 2075 - } 2076 - 2077 - static struct dentry *proc_lookupfdinfo(struct inode *dir, 2078 - struct dentry *dentry, 2079 - unsigned int flags) 2080 - { 2081 - return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 2082 - } 2083 - 2084 - static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) 2085 - { 2086 - return proc_readfd_common(filp, dirent, filldir, 2087 - proc_fdinfo_instantiate); 2088 - } 2089 - 2090 - static const struct file_operations proc_fdinfo_operations = { 2091 - .read = generic_read_dir, 2092 - .readdir = proc_readfdinfo, 2093 - .llseek = default_llseek, 2094 - }; 2095 - 2096 - /* 2097 - * proc directories can do almost nothing.. 2098 - */ 2099 - static const struct inode_operations proc_fdinfo_inode_operations = { 2100 - .lookup = proc_lookupfdinfo, 2101 - .setattr = proc_setattr, 2102 - }; 2103 - 2104 2347 2105 2348 static struct dentry *proc_pident_instantiate(struct inode *dir, 2106 2349 struct dentry *dentry, struct task_struct *task, const void *ptr)

+367

fs/proc/fd.c

··· 1 + #include <linux/sched.h> 2 + #include <linux/errno.h> 3 + #include <linux/dcache.h> 4 + #include <linux/path.h> 5 + #include <linux/fdtable.h> 6 + #include <linux/namei.h> 7 + #include <linux/pid.h> 8 + #include <linux/security.h> 9 + #include <linux/file.h> 10 + #include <linux/seq_file.h> 11 + 12 + #include <linux/proc_fs.h> 13 + 14 + #include "internal.h" 15 + #include "fd.h" 16 + 17 + static int seq_show(struct seq_file *m, void *v) 18 + { 19 + struct files_struct *files = NULL; 20 + int f_flags = 0, ret = -ENOENT; 21 + struct file *file = NULL; 22 + struct task_struct *task; 23 + 24 + task = get_proc_task(m->private); 25 + if (!task) 26 + return -ENOENT; 27 + 28 + files = get_files_struct(task); 29 + put_task_struct(task); 30 + 31 + if (files) { 32 + int fd = proc_fd(m->private); 33 + 34 + spin_lock(&files->file_lock); 35 + file = fcheck_files(files, fd); 36 + if (file) { 37 + struct fdtable *fdt = files_fdtable(files); 38 + 39 + f_flags = file->f_flags; 40 + if (close_on_exec(fd, fdt)) 41 + f_flags |= O_CLOEXEC; 42 + 43 + get_file(file); 44 + ret = 0; 45 + } 46 + spin_unlock(&files->file_lock); 47 + put_files_struct(files); 48 + } 49 + 50 + if (!ret) { 51 + seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", 52 + (long long)file->f_pos, f_flags); 53 + fput(file); 54 + } 55 + 56 + return ret; 57 + } 58 + 59 + static int seq_fdinfo_open(struct inode *inode, struct file *file) 60 + { 61 + return single_open(file, seq_show, inode); 62 + } 63 + 64 + static const struct file_operations proc_fdinfo_file_operations = { 65 + .open = seq_fdinfo_open, 66 + .read = seq_read, 67 + .llseek = seq_lseek, 68 + .release = single_release, 69 + }; 70 + 71 + static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) 72 + { 73 + struct files_struct *files; 74 + struct task_struct *task; 75 + const struct cred *cred; 76 + struct inode *inode; 77 + int fd; 78 + 79 + if (flags & LOOKUP_RCU) 80 + return -ECHILD; 81 + 82 + inode = dentry->d_inode; 83 + task = get_proc_task(inode); 84 + fd = proc_fd(inode); 85 + 86 + if (task) { 87 + files = get_files_struct(task); 88 + if (files) { 89 + struct file *file; 90 + 91 + rcu_read_lock(); 92 + file = fcheck_files(files, fd); 93 + if (file) { 94 + unsigned f_mode = file->f_mode; 95 + 96 + rcu_read_unlock(); 97 + put_files_struct(files); 98 + 99 + if (task_dumpable(task)) { 100 + rcu_read_lock(); 101 + cred = __task_cred(task); 102 + inode->i_uid = cred->euid; 103 + inode->i_gid = cred->egid; 104 + rcu_read_unlock(); 105 + } else { 106 + inode->i_uid = GLOBAL_ROOT_UID; 107 + inode->i_gid = GLOBAL_ROOT_GID; 108 + } 109 + 110 + if (S_ISLNK(inode->i_mode)) { 111 + unsigned i_mode = S_IFLNK; 112 + if (f_mode & FMODE_READ) 113 + i_mode |= S_IRUSR | S_IXUSR; 114 + if (f_mode & FMODE_WRITE) 115 + i_mode |= S_IWUSR | S_IXUSR; 116 + inode->i_mode = i_mode; 117 + } 118 + 119 + security_task_to_inode(task, inode); 120 + put_task_struct(task); 121 + return 1; 122 + } 123 + rcu_read_unlock(); 124 + put_files_struct(files); 125 + } 126 + put_task_struct(task); 127 + } 128 + 129 + d_drop(dentry); 130 + return 0; 131 + } 132 + 133 + static const struct dentry_operations tid_fd_dentry_operations = { 134 + .d_revalidate = tid_fd_revalidate, 135 + .d_delete = pid_delete_dentry, 136 + }; 137 + 138 + static int proc_fd_link(struct dentry *dentry, struct path *path) 139 + { 140 + struct files_struct *files = NULL; 141 + struct task_struct *task; 142 + int ret = -ENOENT; 143 + 144 + task = get_proc_task(dentry->d_inode); 145 + if (task) { 146 + files = get_files_struct(task); 147 + put_task_struct(task); 148 + } 149 + 150 + if (files) { 151 + int fd = proc_fd(dentry->d_inode); 152 + struct file *fd_file; 153 + 154 + spin_lock(&files->file_lock); 155 + fd_file = fcheck_files(files, fd); 156 + if (fd_file) { 157 + *path = fd_file->f_path; 158 + path_get(&fd_file->f_path); 159 + ret = 0; 160 + } 161 + spin_unlock(&files->file_lock); 162 + put_files_struct(files); 163 + } 164 + 165 + return ret; 166 + } 167 + 168 + static struct dentry * 169 + proc_fd_instantiate(struct inode *dir, struct dentry *dentry, 170 + struct task_struct *task, const void *ptr) 171 + { 172 + struct dentry *error = ERR_PTR(-ENOENT); 173 + unsigned fd = (unsigned long)ptr; 174 + struct proc_inode *ei; 175 + struct inode *inode; 176 + 177 + inode = proc_pid_make_inode(dir->i_sb, task); 178 + if (!inode) 179 + goto out; 180 + 181 + ei = PROC_I(inode); 182 + ei->fd = fd; 183 + 184 + inode->i_mode = S_IFLNK; 185 + inode->i_op = &proc_pid_link_inode_operations; 186 + inode->i_size = 64; 187 + 188 + ei->op.proc_get_link = proc_fd_link; 189 + 190 + d_set_d_op(dentry, &tid_fd_dentry_operations); 191 + d_add(dentry, inode); 192 + 193 + /* Close the race of the process dying before we return the dentry */ 194 + if (tid_fd_revalidate(dentry, 0)) 195 + error = NULL; 196 + out: 197 + return error; 198 + } 199 + 200 + static struct dentry *proc_lookupfd_common(struct inode *dir, 201 + struct dentry *dentry, 202 + instantiate_t instantiate) 203 + { 204 + struct task_struct *task = get_proc_task(dir); 205 + struct dentry *result = ERR_PTR(-ENOENT); 206 + unsigned fd = name_to_int(dentry); 207 + 208 + if (!task) 209 + goto out_no_task; 210 + if (fd == ~0U) 211 + goto out; 212 + 213 + result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); 214 + out: 215 + put_task_struct(task); 216 + out_no_task: 217 + return result; 218 + } 219 + 220 + static int proc_readfd_common(struct file * filp, void * dirent, 221 + filldir_t filldir, instantiate_t instantiate) 222 + { 223 + struct dentry *dentry = filp->f_path.dentry; 224 + struct inode *inode = dentry->d_inode; 225 + struct task_struct *p = get_proc_task(inode); 226 + struct files_struct *files; 227 + unsigned int fd, ino; 228 + int retval; 229 + 230 + retval = -ENOENT; 231 + if (!p) 232 + goto out_no_task; 233 + retval = 0; 234 + 235 + fd = filp->f_pos; 236 + switch (fd) { 237 + case 0: 238 + if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 239 + goto out; 240 + filp->f_pos++; 241 + case 1: 242 + ino = parent_ino(dentry); 243 + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 244 + goto out; 245 + filp->f_pos++; 246 + default: 247 + files = get_files_struct(p); 248 + if (!files) 249 + goto out; 250 + rcu_read_lock(); 251 + for (fd = filp->f_pos - 2; 252 + fd < files_fdtable(files)->max_fds; 253 + fd++, filp->f_pos++) { 254 + char name[PROC_NUMBUF]; 255 + int len; 256 + int rv; 257 + 258 + if (!fcheck_files(files, fd)) 259 + continue; 260 + rcu_read_unlock(); 261 + 262 + len = snprintf(name, sizeof(name), "%d", fd); 263 + rv = proc_fill_cache(filp, dirent, filldir, 264 + name, len, instantiate, p, 265 + (void *)(unsigned long)fd); 266 + if (rv < 0) 267 + goto out_fd_loop; 268 + rcu_read_lock(); 269 + } 270 + rcu_read_unlock(); 271 + out_fd_loop: 272 + put_files_struct(files); 273 + } 274 + out: 275 + put_task_struct(p); 276 + out_no_task: 277 + return retval; 278 + } 279 + 280 + static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) 281 + { 282 + return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); 283 + } 284 + 285 + const struct file_operations proc_fd_operations = { 286 + .read = generic_read_dir, 287 + .readdir = proc_readfd, 288 + .llseek = default_llseek, 289 + }; 290 + 291 + static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 292 + unsigned int flags) 293 + { 294 + return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 295 + } 296 + 297 + /* 298 + * /proc/pid/fd needs a special permission handler so that a process can still 299 + * access /proc/self/fd after it has executed a setuid(). 300 + */ 301 + int proc_fd_permission(struct inode *inode, int mask) 302 + { 303 + int rv = generic_permission(inode, mask); 304 + if (rv == 0) 305 + return 0; 306 + if (task_pid(current) == proc_pid(inode)) 307 + rv = 0; 308 + return rv; 309 + } 310 + 311 + const struct inode_operations proc_fd_inode_operations = { 312 + .lookup = proc_lookupfd, 313 + .permission = proc_fd_permission, 314 + .setattr = proc_setattr, 315 + }; 316 + 317 + static struct dentry * 318 + proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, 319 + struct task_struct *task, const void *ptr) 320 + { 321 + struct dentry *error = ERR_PTR(-ENOENT); 322 + unsigned fd = (unsigned long)ptr; 323 + struct proc_inode *ei; 324 + struct inode *inode; 325 + 326 + inode = proc_pid_make_inode(dir->i_sb, task); 327 + if (!inode) 328 + goto out; 329 + 330 + ei = PROC_I(inode); 331 + ei->fd = fd; 332 + 333 + inode->i_mode = S_IFREG | S_IRUSR; 334 + inode->i_fop = &proc_fdinfo_file_operations; 335 + 336 + d_set_d_op(dentry, &tid_fd_dentry_operations); 337 + d_add(dentry, inode); 338 + 339 + /* Close the race of the process dying before we return the dentry */ 340 + if (tid_fd_revalidate(dentry, 0)) 341 + error = NULL; 342 + out: 343 + return error; 344 + } 345 + 346 + static struct dentry * 347 + proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) 348 + { 349 + return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 350 + } 351 + 352 + static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) 353 + { 354 + return proc_readfd_common(filp, dirent, filldir, 355 + proc_fdinfo_instantiate); 356 + } 357 + 358 + const struct inode_operations proc_fdinfo_inode_operations = { 359 + .lookup = proc_lookupfdinfo, 360 + .setattr = proc_setattr, 361 + }; 362 + 363 + const struct file_operations proc_fdinfo_operations = { 364 + .read = generic_read_dir, 365 + .readdir = proc_readfdinfo, 366 + .llseek = default_llseek, 367 + };

+14

fs/proc/fd.h

··· 1 + #ifndef __PROCFS_FD_H__ 2 + #define __PROCFS_FD_H__ 3 + 4 + #include <linux/fs.h> 5 + 6 + extern const struct file_operations proc_fd_operations; 7 + extern const struct inode_operations proc_fd_inode_operations; 8 + 9 + extern const struct file_operations proc_fdinfo_operations; 10 + extern const struct inode_operations proc_fdinfo_inode_operations; 11 + 12 + extern int proc_fd_permission(struct inode *inode, int mask); 13 + 14 + #endif /* __PROCFS_FD_H__ */

+48

fs/proc/internal.h

··· 9 9 * 2 of the License, or (at your option) any later version. 10 10 */ 11 11 12 + #include <linux/sched.h> 12 13 #include <linux/proc_fs.h> 13 14 struct ctl_table_header; 14 15 ··· 66 65 extern const struct file_operations proc_pagemap_operations; 67 66 extern const struct file_operations proc_net_operations; 68 67 extern const struct inode_operations proc_net_inode_operations; 68 + extern const struct inode_operations proc_pid_link_inode_operations; 69 69 70 70 struct proc_maps_private { 71 71 struct pid *pid; ··· 91 89 static inline int proc_fd(struct inode *inode) 92 90 { 93 91 return PROC_I(inode)->fd; 92 + } 93 + 94 + static inline int task_dumpable(struct task_struct *task) 95 + { 96 + int dumpable = 0; 97 + struct mm_struct *mm; 98 + 99 + task_lock(task); 100 + mm = task->mm; 101 + if (mm) 102 + dumpable = get_dumpable(mm); 103 + task_unlock(task); 104 + if(dumpable == 1) 105 + return 1; 106 + return 0; 107 + } 108 + 109 + static inline int pid_delete_dentry(const struct dentry * dentry) 110 + { 111 + /* Is the task we represent dead? 112 + * If so, then don't put the dentry on the lru list, 113 + * kill it immediately. 114 + */ 115 + return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 116 + } 117 + 118 + static inline unsigned name_to_int(struct dentry *dentry) 119 + { 120 + const char *name = dentry->d_name.name; 121 + int len = dentry->d_name.len; 122 + unsigned n = 0; 123 + 124 + if (len > 1 && *name == '0') 125 + goto out; 126 + while (len-- > 0) { 127 + unsigned c = *name++ - '0'; 128 + if (c > 9) 129 + goto out; 130 + if (n >= (~0U-9)/10) 131 + goto out; 132 + n *= 10; 133 + n += c; 134 + } 135 + return n; 136 + out: 137 + return ~0U; 94 138 } 95 139 96 140 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,

+5

fs/qnx4/inode.c

··· 391 391 392 392 static void destroy_inodecache(void) 393 393 { 394 + /* 395 + * Make sure all delayed rcu free inodes are flushed before we 396 + * destroy cache. 397 + */ 398 + rcu_barrier(); 394 399 kmem_cache_destroy(qnx4_inode_cachep); 395 400 } 396 401

+5

fs/qnx6/inode.c

··· 651 651 652 652 static void destroy_inodecache(void) 653 653 { 654 + /* 655 + * Make sure all delayed rcu free inodes are flushed before we 656 + * destroy cache. 657 + */ 658 + rcu_barrier(); 654 659 kmem_cache_destroy(qnx6_inode_cachep); 655 660 } 656 661

+79 -101

fs/read_write.c

··· 232 232 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 233 233 { 234 234 off_t retval; 235 - struct file * file; 236 - int fput_needed; 237 - 238 - retval = -EBADF; 239 - file = fget_light(fd, &fput_needed); 240 - if (!file) 241 - goto bad; 235 + struct fd f = fdget(fd); 236 + if (!f.file) 237 + return -EBADF; 242 238 243 239 retval = -EINVAL; 244 240 if (origin <= SEEK_MAX) { 245 - loff_t res = vfs_llseek(file, offset, origin); 241 + loff_t res = vfs_llseek(f.file, offset, origin); 246 242 retval = res; 247 243 if (res != (loff_t)retval) 248 244 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 249 245 } 250 - fput_light(file, fput_needed); 251 - bad: 246 + fdput(f); 252 247 return retval; 253 248 } 254 249 ··· 253 258 unsigned int, origin) 254 259 { 255 260 int retval; 256 - struct file * file; 261 + struct fd f = fdget(fd); 257 262 loff_t offset; 258 - int fput_needed; 259 263 260 - retval = -EBADF; 261 - file = fget_light(fd, &fput_needed); 262 - if (!file) 263 - goto bad; 264 + if (!f.file) 265 + return -EBADF; 264 266 265 267 retval = -EINVAL; 266 268 if (origin > SEEK_MAX) 267 269 goto out_putf; 268 270 269 - offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 271 + offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 270 272 origin); 271 273 272 274 retval = (int)offset; ··· 273 281 retval = 0; 274 282 } 275 283 out_putf: 276 - fput_light(file, fput_needed); 277 - bad: 284 + fdput(f); 278 285 return retval; 279 286 } 280 287 #endif ··· 452 461 453 462 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 454 463 { 455 - struct file *file; 464 + struct fd f = fdget(fd); 456 465 ssize_t ret = -EBADF; 457 - int fput_needed; 458 466 459 - file = fget_light(fd, &fput_needed); 460 - if (file) { 461 - loff_t pos = file_pos_read(file); 462 - ret = vfs_read(file, buf, count, &pos); 463 - file_pos_write(file, pos); 464 - fput_light(file, fput_needed); 467 + if (f.file) { 468 + loff_t pos = file_pos_read(f.file); 469 + ret = vfs_read(f.file, buf, count, &pos); 470 + file_pos_write(f.file, pos); 471 + fdput(f); 465 472 } 466 - 467 473 return ret; 468 474 } 469 475 470 476 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 471 477 size_t, count) 472 478 { 473 - struct file *file; 479 + struct fd f = fdget(fd); 474 480 ssize_t ret = -EBADF; 475 - int fput_needed; 476 481 477 - file = fget_light(fd, &fput_needed); 478 - if (file) { 479 - loff_t pos = file_pos_read(file); 480 - ret = vfs_write(file, buf, count, &pos); 481 - file_pos_write(file, pos); 482 - fput_light(file, fput_needed); 482 + if (f.file) { 483 + loff_t pos = file_pos_read(f.file); 484 + ret = vfs_write(f.file, buf, count, &pos); 485 + file_pos_write(f.file, pos); 486 + fdput(f); 483 487 } 484 488 485 489 return ret; ··· 483 497 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 484 498 size_t count, loff_t pos) 485 499 { 486 - struct file *file; 500 + struct fd f; 487 501 ssize_t ret = -EBADF; 488 - int fput_needed; 489 502 490 503 if (pos < 0) 491 504 return -EINVAL; 492 505 493 - file = fget_light(fd, &fput_needed); 494 - if (file) { 506 + f = fdget(fd); 507 + if (f.file) { 495 508 ret = -ESPIPE; 496 - if (file->f_mode & FMODE_PREAD) 497 - ret = vfs_read(file, buf, count, &pos); 498 - fput_light(file, fput_needed); 509 + if (f.file->f_mode & FMODE_PREAD) 510 + ret = vfs_read(f.file, buf, count, &pos); 511 + fdput(f); 499 512 } 500 513 501 514 return ret; ··· 511 526 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 512 527 size_t count, loff_t pos) 513 528 { 514 - struct file *file; 529 + struct fd f; 515 530 ssize_t ret = -EBADF; 516 - int fput_needed; 517 531 518 532 if (pos < 0) 519 533 return -EINVAL; 520 534 521 - file = fget_light(fd, &fput_needed); 522 - if (file) { 535 + f = fdget(fd); 536 + if (f.file) { 523 537 ret = -ESPIPE; 524 - if (file->f_mode & FMODE_PWRITE) 525 - ret = vfs_write(file, buf, count, &pos); 526 - fput_light(file, fput_needed); 538 + if (f.file->f_mode & FMODE_PWRITE) 539 + ret = vfs_write(f.file, buf, count, &pos); 540 + fdput(f); 527 541 } 528 542 529 543 return ret; ··· 773 789 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 774 790 unsigned long, vlen) 775 791 { 776 - struct file *file; 792 + struct fd f = fdget(fd); 777 793 ssize_t ret = -EBADF; 778 - int fput_needed; 779 794 780 - file = fget_light(fd, &fput_needed); 781 - if (file) { 782 - loff_t pos = file_pos_read(file); 783 - ret = vfs_readv(file, vec, vlen, &pos); 784 - file_pos_write(file, pos); 785 - fput_light(file, fput_needed); 795 + if (f.file) { 796 + loff_t pos = file_pos_read(f.file); 797 + ret = vfs_readv(f.file, vec, vlen, &pos); 798 + file_pos_write(f.file, pos); 799 + fdput(f); 786 800 } 787 801 788 802 if (ret > 0) ··· 792 810 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 793 811 unsigned long, vlen) 794 812 { 795 - struct file *file; 813 + struct fd f = fdget(fd); 796 814 ssize_t ret = -EBADF; 797 - int fput_needed; 798 815 799 - file = fget_light(fd, &fput_needed); 800 - if (file) { 801 - loff_t pos = file_pos_read(file); 802 - ret = vfs_writev(file, vec, vlen, &pos); 803 - file_pos_write(file, pos); 804 - fput_light(file, fput_needed); 816 + if (f.file) { 817 + loff_t pos = file_pos_read(f.file); 818 + ret = vfs_writev(f.file, vec, vlen, &pos); 819 + file_pos_write(f.file, pos); 820 + fdput(f); 805 821 } 806 822 807 823 if (ret > 0) ··· 818 838 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 819 839 { 820 840 loff_t pos = pos_from_hilo(pos_h, pos_l); 821 - struct file *file; 841 + struct fd f; 822 842 ssize_t ret = -EBADF; 823 - int fput_needed; 824 843 825 844 if (pos < 0) 826 845 return -EINVAL; 827 846 828 - file = fget_light(fd, &fput_needed); 829 - if (file) { 847 + f = fdget(fd); 848 + if (f.file) { 830 849 ret = -ESPIPE; 831 - if (file->f_mode & FMODE_PREAD) 832 - ret = vfs_readv(file, vec, vlen, &pos); 833 - fput_light(file, fput_needed); 850 + if (f.file->f_mode & FMODE_PREAD) 851 + ret = vfs_readv(f.file, vec, vlen, &pos); 852 + fdput(f); 834 853 } 835 854 836 855 if (ret > 0) ··· 842 863 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 843 864 { 844 865 loff_t pos = pos_from_hilo(pos_h, pos_l); 845 - struct file *file; 866 + struct fd f; 846 867 ssize_t ret = -EBADF; 847 - int fput_needed; 848 868 849 869 if (pos < 0) 850 870 return -EINVAL; 851 871 852 - file = fget_light(fd, &fput_needed); 853 - if (file) { 872 + f = fdget(fd); 873 + if (f.file) { 854 874 ret = -ESPIPE; 855 - if (file->f_mode & FMODE_PWRITE) 856 - ret = vfs_writev(file, vec, vlen, &pos); 857 - fput_light(file, fput_needed); 875 + if (f.file->f_mode & FMODE_PWRITE) 876 + ret = vfs_writev(f.file, vec, vlen, &pos); 877 + fdput(f); 858 878 } 859 879 860 880 if (ret > 0) ··· 862 884 return ret; 863 885 } 864 886 865 - static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 866 - size_t count, loff_t max) 887 + ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, 888 + loff_t max) 867 889 { 868 - struct file * in_file, * out_file; 869 - struct inode * in_inode, * out_inode; 890 + struct fd in, out; 891 + struct inode *in_inode, *out_inode; 870 892 loff_t pos; 871 893 ssize_t retval; 872 - int fput_needed_in, fput_needed_out, fl; 894 + int fl; 873 895 874 896 /* 875 897 * Get input file, and verify that it is ok.. 876 898 */ 877 899 retval = -EBADF; 878 - in_file = fget_light(in_fd, &fput_needed_in); 879 - if (!in_file) 900 + in = fdget(in_fd); 901 + if (!in.file) 880 902 goto out; 881 - if (!(in_file->f_mode & FMODE_READ)) 903 + if (!(in.file->f_mode & FMODE_READ)) 882 904 goto fput_in; 883 905 retval = -ESPIPE; 884 906 if (!ppos) 885 - ppos = &in_file->f_pos; 907 + ppos = &in.file->f_pos; 886 908 else 887 - if (!(in_file->f_mode & FMODE_PREAD)) 909 + if (!(in.file->f_mode & FMODE_PREAD)) 888 910 goto fput_in; 889 - retval = rw_verify_area(READ, in_file, ppos, count); 911 + retval = rw_verify_area(READ, in.file, ppos, count); 890 912 if (retval < 0) 891 913 goto fput_in; 892 914 count = retval; ··· 895 917 * Get output file, and verify that it is ok.. 896 918 */ 897 919 retval = -EBADF; 898 - out_file = fget_light(out_fd, &fput_needed_out); 899 - if (!out_file) 920 + out = fdget(out_fd); 921 + if (!out.file) 900 922 goto fput_in; 901 - if (!(out_file->f_mode & FMODE_WRITE)) 923 + if (!(out.file->f_mode & FMODE_WRITE)) 902 924 goto fput_out; 903 925 retval = -EINVAL; 904 - in_inode = in_file->f_path.dentry->d_inode; 905 - out_inode = out_file->f_path.dentry->d_inode; 906 - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 926 + in_inode = in.file->f_path.dentry->d_inode; 927 + out_inode = out.file->f_path.dentry->d_inode; 928 + retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); 907 929 if (retval < 0) 908 930 goto fput_out; 909 931 count = retval; ··· 927 949 * and the application is arguably buggy if it doesn't expect 928 950 * EAGAIN on a non-blocking file descriptor. 929 951 */ 930 - if (in_file->f_flags & O_NONBLOCK) 952 + if (in.file->f_flags & O_NONBLOCK) 931 953 fl = SPLICE_F_NONBLOCK; 932 954 #endif 933 - retval = do_splice_direct(in_file, ppos, out_file, count, fl); 955 + retval = do_splice_direct(in.file, ppos, out.file, count, fl); 934 956 935 957 if (retval > 0) { 936 958 add_rchar(current, retval); ··· 943 965 retval = -EOVERFLOW; 944 966 945 967 fput_out: 946 - fput_light(out_file, fput_needed_out); 968 + fdput(out); 947 969 fput_in: 948 - fput_light(in_file, fput_needed_in); 970 + fdput(in); 949 971 out: 950 972 return retval; 951 973 }

+2

fs/read_write.h

··· 12 12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); 13 13 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 14 14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn); 15 + ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, 16 + loff_t max);

+16 -20

fs/readdir.c

··· 106 106 struct old_linux_dirent __user *, dirent, unsigned int, count) 107 107 { 108 108 int error; 109 - struct file * file; 109 + struct fd f = fdget(fd); 110 110 struct readdir_callback buf; 111 - int fput_needed; 112 111 113 - file = fget_light(fd, &fput_needed); 114 - if (!file) 112 + if (!f.file) 115 113 return -EBADF; 116 114 117 115 buf.result = 0; 118 116 buf.dirent = dirent; 119 117 120 - error = vfs_readdir(file, fillonedir, &buf); 118 + error = vfs_readdir(f.file, fillonedir, &buf); 121 119 if (buf.result) 122 120 error = buf.result; 123 121 124 - fput_light(file, fput_needed); 122 + fdput(f); 125 123 return error; 126 124 } 127 125 ··· 189 191 SYSCALL_DEFINE3(getdents, unsigned int, fd, 190 192 struct linux_dirent __user *, dirent, unsigned int, count) 191 193 { 192 - struct file * file; 194 + struct fd f; 193 195 struct linux_dirent __user * lastdirent; 194 196 struct getdents_callback buf; 195 - int fput_needed; 196 197 int error; 197 198 198 199 if (!access_ok(VERIFY_WRITE, dirent, count)) 199 200 return -EFAULT; 200 201 201 - file = fget_light(fd, &fput_needed); 202 - if (!file) 202 + f = fdget(fd); 203 + if (!f.file) 203 204 return -EBADF; 204 205 205 206 buf.current_dir = dirent; ··· 206 209 buf.count = count; 207 210 buf.error = 0; 208 211 209 - error = vfs_readdir(file, filldir, &buf); 212 + error = vfs_readdir(f.file, filldir, &buf); 210 213 if (error >= 0) 211 214 error = buf.error; 212 215 lastdirent = buf.previous; 213 216 if (lastdirent) { 214 - if (put_user(file->f_pos, &lastdirent->d_off)) 217 + if (put_user(f.file->f_pos, &lastdirent->d_off)) 215 218 error = -EFAULT; 216 219 else 217 220 error = count - buf.count; 218 221 } 219 - fput_light(file, fput_needed); 222 + fdput(f); 220 223 return error; 221 224 } 222 225 ··· 269 272 SYSCALL_DEFINE3(getdents64, unsigned int, fd, 270 273 struct linux_dirent64 __user *, dirent, unsigned int, count) 271 274 { 272 - struct file * file; 275 + struct fd f; 273 276 struct linux_dirent64 __user * lastdirent; 274 277 struct getdents_callback64 buf; 275 - int fput_needed; 276 278 int error; 277 279 278 280 if (!access_ok(VERIFY_WRITE, dirent, count)) 279 281 return -EFAULT; 280 282 281 - file = fget_light(fd, &fput_needed); 282 - if (!file) 283 + f = fdget(fd); 284 + if (!f.file) 283 285 return -EBADF; 284 286 285 287 buf.current_dir = dirent; ··· 286 290 buf.count = count; 287 291 buf.error = 0; 288 292 289 - error = vfs_readdir(file, filldir64, &buf); 293 + error = vfs_readdir(f.file, filldir64, &buf); 290 294 if (error >= 0) 291 295 error = buf.error; 292 296 lastdirent = buf.previous; 293 297 if (lastdirent) { 294 - typeof(lastdirent->d_off) d_off = file->f_pos; 298 + typeof(lastdirent->d_off) d_off = f.file->f_pos; 295 299 if (__put_user(d_off, &lastdirent->d_off)) 296 300 error = -EFAULT; 297 301 else 298 302 error = count - buf.count; 299 303 } 300 - fput_light(file, fput_needed); 304 + fdput(f); 301 305 return error; 302 306 }

+5

fs/reiserfs/super.c

··· 608 608 609 609 static void destroy_inodecache(void) 610 610 { 611 + /* 612 + * Make sure all delayed rcu free inodes are flushed before we 613 + * destroy cache. 614 + */ 615 + rcu_barrier(); 611 616 kmem_cache_destroy(reiserfs_inode_cachep); 612 617 } 613 618

+5

fs/romfs/super.c

··· 648 648 static void __exit exit_romfs_fs(void) 649 649 { 650 650 unregister_filesystem(&romfs_fs_type); 651 + /* 652 + * Make sure all delayed rcu free inodes are flushed before we 653 + * destroy cache. 654 + */ 655 + rcu_barrier(); 651 656 kmem_cache_destroy(romfs_inode_cachep); 652 657 } 653 658

+13 -18

fs/select.c

··· 220 220 struct poll_table_entry *entry = poll_get_entry(pwq); 221 221 if (!entry) 222 222 return; 223 - get_file(filp); 224 - entry->filp = filp; 223 + entry->filp = get_file(filp); 225 224 entry->wait_address = wait_address; 226 225 entry->key = p->_key; 227 226 init_waitqueue_func_entry(&entry->wait, pollwake); ··· 428 429 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 429 430 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 430 431 unsigned long res_in = 0, res_out = 0, res_ex = 0; 431 - const struct file_operations *f_op = NULL; 432 - struct file *file = NULL; 433 432 434 433 in = *inp++; out = *outp++; ex = *exp++; 435 434 all_bits = in | out | ex; ··· 437 440 } 438 441 439 442 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { 440 - int fput_needed; 443 + struct fd f; 441 444 if (i >= n) 442 445 break; 443 446 if (!(bit & all_bits)) 444 447 continue; 445 - file = fget_light(i, &fput_needed); 446 - if (file) { 447 - f_op = file->f_op; 448 + f = fdget(i); 449 + if (f.file) { 450 + const struct file_operations *f_op; 451 + f_op = f.file->f_op; 448 452 mask = DEFAULT_POLLMASK; 449 453 if (f_op && f_op->poll) { 450 454 wait_key_set(wait, in, out, bit); 451 - mask = (*f_op->poll)(file, wait); 455 + mask = (*f_op->poll)(f.file, wait); 452 456 } 453 - fput_light(file, fput_needed); 457 + fdput(f); 454 458 if ((mask & POLLIN_SET) && (in & bit)) { 455 459 res_in |= bit; 456 460 retval++; ··· 724 726 mask = 0; 725 727 fd = pollfd->fd; 726 728 if (fd >= 0) { 727 - int fput_needed; 728 - struct file * file; 729 - 730 - file = fget_light(fd, &fput_needed); 729 + struct fd f = fdget(fd); 731 730 mask = POLLNVAL; 732 - if (file != NULL) { 731 + if (f.file) { 733 732 mask = DEFAULT_POLLMASK; 734 - if (file->f_op && file->f_op->poll) { 733 + if (f.file->f_op && f.file->f_op->poll) { 735 734 pwait->_key = pollfd->events|POLLERR|POLLHUP; 736 - mask = file->f_op->poll(file, pwait); 735 + mask = f.file->f_op->poll(f.file, pwait); 737 736 } 738 737 /* Mask out unneeded events. */ 739 738 mask &= pollfd->events | POLLERR | POLLHUP; 740 - fput_light(file, fput_needed); 739 + fdput(f); 741 740 } 742 741 } 743 742 pollfd->revents = mask;

+6 -7

fs/signalfd.c

··· 269 269 if (ufd < 0) 270 270 kfree(ctx); 271 271 } else { 272 - int fput_needed; 273 - struct file *file = fget_light(ufd, &fput_needed); 274 - if (!file) 272 + struct fd f = fdget(ufd); 273 + if (!f.file) 275 274 return -EBADF; 276 - ctx = file->private_data; 277 - if (file->f_op != &signalfd_fops) { 278 - fput_light(file, fput_needed); 275 + ctx = f.file->private_data; 276 + if (f.file->f_op != &signalfd_fops) { 277 + fdput(f); 279 278 return -EINVAL; 280 279 } 281 280 spin_lock_irq(&current->sighand->siglock); ··· 282 283 spin_unlock_irq(&current->sighand->siglock); 283 284 284 285 wake_up(&current->sighand->signalfd_wqh); 285 - fput_light(file, fput_needed); 286 + fdput(f); 286 287 } 287 288 288 289 return ufd;

+31 -36

fs/splice.c

··· 1666 1666 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, 1667 1667 unsigned long, nr_segs, unsigned int, flags) 1668 1668 { 1669 - struct file *file; 1669 + struct fd f; 1670 1670 long error; 1671 - int fput; 1672 1671 1673 1672 if (unlikely(nr_segs > UIO_MAXIOV)) 1674 1673 return -EINVAL; ··· 1675 1676 return 0; 1676 1677 1677 1678 error = -EBADF; 1678 - file = fget_light(fd, &fput); 1679 - if (file) { 1680 - if (file->f_mode & FMODE_WRITE) 1681 - error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1682 - else if (file->f_mode & FMODE_READ) 1683 - error = vmsplice_to_user(file, iov, nr_segs, flags); 1679 + f = fdget(fd); 1680 + if (f.file) { 1681 + if (f.file->f_mode & FMODE_WRITE) 1682 + error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); 1683 + else if (f.file->f_mode & FMODE_READ) 1684 + error = vmsplice_to_user(f.file, iov, nr_segs, flags); 1684 1685 1685 - fput_light(file, fput); 1686 + fdput(f); 1686 1687 } 1687 1688 1688 1689 return error; ··· 1692 1693 int, fd_out, loff_t __user *, off_out, 1693 1694 size_t, len, unsigned int, flags) 1694 1695 { 1696 + struct fd in, out; 1695 1697 long error; 1696 - struct file *in, *out; 1697 - int fput_in, fput_out; 1698 1698 1699 1699 if (unlikely(!len)) 1700 1700 return 0; 1701 1701 1702 1702 error = -EBADF; 1703 - in = fget_light(fd_in, &fput_in); 1704 - if (in) { 1705 - if (in->f_mode & FMODE_READ) { 1706 - out = fget_light(fd_out, &fput_out); 1707 - if (out) { 1708 - if (out->f_mode & FMODE_WRITE) 1709 - error = do_splice(in, off_in, 1710 - out, off_out, 1703 + in = fdget(fd_in); 1704 + if (in.file) { 1705 + if (in.file->f_mode & FMODE_READ) { 1706 + out = fdget(fd_out); 1707 + if (out.file) { 1708 + if (out.file->f_mode & FMODE_WRITE) 1709 + error = do_splice(in.file, off_in, 1710 + out.file, off_out, 1711 1711 len, flags); 1712 - fput_light(out, fput_out); 1712 + fdput(out); 1713 1713 } 1714 1714 } 1715 - 1716 - fput_light(in, fput_in); 1715 + fdput(in); 1717 1716 } 1718 - 1719 1717 return error; 1720 1718 } 1721 1719 ··· 2023 2027 2024 2028 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 2025 2029 { 2026 - struct file *in; 2027 - int error, fput_in; 2030 + struct fd in; 2031 + int error; 2028 2032 2029 2033 if (unlikely(!len)) 2030 2034 return 0; 2031 2035 2032 2036 error = -EBADF; 2033 - in = fget_light(fdin, &fput_in); 2034 - if (in) { 2035 - if (in->f_mode & FMODE_READ) { 2036 - int fput_out; 2037 - struct file *out = fget_light(fdout, &fput_out); 2038 - 2039 - if (out) { 2040 - if (out->f_mode & FMODE_WRITE) 2041 - error = do_tee(in, out, len, flags); 2042 - fput_light(out, fput_out); 2037 + in = fdget(fdin); 2038 + if (in.file) { 2039 + if (in.file->f_mode & FMODE_READ) { 2040 + struct fd out = fdget(fdout); 2041 + if (out.file) { 2042 + if (out.file->f_mode & FMODE_WRITE) 2043 + error = do_tee(in.file, out.file, 2044 + len, flags); 2045 + fdput(out); 2043 2046 } 2044 2047 } 2045 - fput_light(in, fput_in); 2048 + fdput(in); 2046 2049 } 2047 2050 2048 2051 return error;

+5

fs/squashfs/super.c

··· 425 425 426 426 static void destroy_inodecache(void) 427 427 { 428 + /* 429 + * Make sure all delayed rcu free inodes are flushed before we 430 + * destroy cache. 431 + */ 432 + rcu_barrier(); 428 433 kmem_cache_destroy(squashfs_inode_cachep); 429 434 } 430 435

+5 -5

fs/stat.c

··· 57 57 58 58 int vfs_fstat(unsigned int fd, struct kstat *stat) 59 59 { 60 - int fput_needed; 61 - struct file *f = fget_raw_light(fd, &fput_needed); 60 + struct fd f = fdget_raw(fd); 62 61 int error = -EBADF; 63 62 64 - if (f) { 65 - error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); 66 - fput_light(f, fput_needed); 63 + if (f.file) { 64 + error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, 65 + stat); 66 + fdput(f); 67 67 } 68 68 return error; 69 69 }

+4 -5

fs/statfs.c

··· 87 87 88 88 int fd_statfs(int fd, struct kstatfs *st) 89 89 { 90 - int fput_needed; 91 - struct file *file = fget_light(fd, &fput_needed); 90 + struct fd f = fdget(fd); 92 91 int error = -EBADF; 93 - if (file) { 94 - error = vfs_statfs(&file->f_path, st); 95 - fput_light(file, fput_needed); 92 + if (f.file) { 93 + error = vfs_statfs(&f.file->f_path, st); 94 + fdput(f); 96 95 } 97 96 return error; 98 97 }

-6

fs/super.c

··· 307 307 308 308 /* caches are now gone, we can safely kill the shrinker now */ 309 309 unregister_shrinker(&s->s_shrink); 310 - 311 - /* 312 - * We need to call rcu_barrier so all the delayed rcu free 313 - * inodes are flushed before we release the fs module. 314 - */ 315 - rcu_barrier(); 316 310 put_filesystem(fs); 317 311 put_super(s); 318 312 } else {

+14 -19

fs/sync.c

··· 148 148 */ 149 149 SYSCALL_DEFINE1(syncfs, int, fd) 150 150 { 151 - struct file *file; 151 + struct fd f = fdget(fd); 152 152 struct super_block *sb; 153 153 int ret; 154 - int fput_needed; 155 154 156 - file = fget_light(fd, &fput_needed); 157 - if (!file) 155 + if (!f.file) 158 156 return -EBADF; 159 - sb = file->f_dentry->d_sb; 157 + sb = f.file->f_dentry->d_sb; 160 158 161 159 down_read(&sb->s_umount); 162 160 ret = sync_filesystem(sb); 163 161 up_read(&sb->s_umount); 164 162 165 - fput_light(file, fput_needed); 163 + fdput(f); 166 164 return ret; 167 165 } 168 166 ··· 199 201 200 202 static int do_fsync(unsigned int fd, int datasync) 201 203 { 202 - struct file *file; 204 + struct fd f = fdget(fd); 203 205 int ret = -EBADF; 204 - int fput_needed; 205 206 206 - file = fget_light(fd, &fput_needed); 207 - if (file) { 208 - ret = vfs_fsync(file, datasync); 209 - fput_light(file, fput_needed); 207 + if (f.file) { 208 + ret = vfs_fsync(f.file, datasync); 209 + fdput(f); 210 210 } 211 211 return ret; 212 212 } ··· 287 291 unsigned int flags) 288 292 { 289 293 int ret; 290 - struct file *file; 294 + struct fd f; 291 295 struct address_space *mapping; 292 296 loff_t endbyte; /* inclusive */ 293 - int fput_needed; 294 297 umode_t i_mode; 295 298 296 299 ret = -EINVAL; ··· 328 333 endbyte--; /* inclusive */ 329 334 330 335 ret = -EBADF; 331 - file = fget_light(fd, &fput_needed); 332 - if (!file) 336 + f = fdget(fd); 337 + if (!f.file) 333 338 goto out; 334 339 335 - i_mode = file->f_path.dentry->d_inode->i_mode; 340 + i_mode = f.file->f_path.dentry->d_inode->i_mode; 336 341 ret = -ESPIPE; 337 342 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 338 343 !S_ISLNK(i_mode)) 339 344 goto out_put; 340 345 341 - mapping = file->f_mapping; 346 + mapping = f.file->f_mapping; 342 347 if (!mapping) { 343 348 ret = -EINVAL; 344 349 goto out_put; ··· 361 366 ret = filemap_fdatawait_range(mapping, offset, endbyte); 362 367 363 368 out_put: 364 - fput_light(file, fput_needed); 369 + fdput(f); 365 370 out: 366 371 return ret; 367 372 }

+5

fs/sysv/inode.c

··· 360 360 361 361 void sysv_destroy_icache(void) 362 362 { 363 + /* 364 + * Make sure all delayed rcu free inodes are flushed before we 365 + * destroy cache. 366 + */ 367 + rcu_barrier(); 363 368 kmem_cache_destroy(sysv_inode_cachep); 364 369 }

+21 -24

fs/timerfd.c

··· 234 234 .llseek = noop_llseek, 235 235 }; 236 236 237 - static struct file *timerfd_fget(int fd) 237 + static int timerfd_fget(int fd, struct fd *p) 238 238 { 239 - struct file *file; 240 - 241 - file = fget(fd); 242 - if (!file) 243 - return ERR_PTR(-EBADF); 244 - if (file->f_op != &timerfd_fops) { 245 - fput(file); 246 - return ERR_PTR(-EINVAL); 239 + struct fd f = fdget(fd); 240 + if (!f.file) 241 + return -EBADF; 242 + if (f.file->f_op != &timerfd_fops) { 243 + fdput(f); 244 + return -EINVAL; 247 245 } 248 - 249 - return file; 246 + *p = f; 247 + return 0; 250 248 } 251 249 252 250 SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ··· 282 284 const struct itimerspec __user *, utmr, 283 285 struct itimerspec __user *, otmr) 284 286 { 285 - struct file *file; 287 + struct fd f; 286 288 struct timerfd_ctx *ctx; 287 289 struct itimerspec ktmr, kotmr; 288 290 int ret; ··· 295 297 !timespec_valid(&ktmr.it_interval)) 296 298 return -EINVAL; 297 299 298 - file = timerfd_fget(ufd); 299 - if (IS_ERR(file)) 300 - return PTR_ERR(file); 301 - ctx = file->private_data; 300 + ret = timerfd_fget(ufd, &f); 301 + if (ret) 302 + return ret; 303 + ctx = f.file->private_data; 302 304 303 305 timerfd_setup_cancel(ctx, flags); 304 306 ··· 332 334 ret = timerfd_setup(ctx, flags, &ktmr); 333 335 334 336 spin_unlock_irq(&ctx->wqh.lock); 335 - fput(file); 337 + fdput(f); 336 338 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 337 339 return -EFAULT; 338 340 ··· 341 343 342 344 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 343 345 { 344 - struct file *file; 346 + struct fd f; 345 347 struct timerfd_ctx *ctx; 346 348 struct itimerspec kotmr; 347 - 348 - file = timerfd_fget(ufd); 349 - if (IS_ERR(file)) 350 - return PTR_ERR(file); 351 - ctx = file->private_data; 349 + int ret = timerfd_fget(ufd, &f); 350 + if (ret) 351 + return ret; 352 + ctx = f.file->private_data; 352 353 353 354 spin_lock_irq(&ctx->wqh.lock); 354 355 if (ctx->expired && ctx->tintv.tv64) { ··· 359 362 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 360 363 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 361 364 spin_unlock_irq(&ctx->wqh.lock); 362 - fput(file); 365 + fdput(f); 363 366 364 367 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 365 368 }

+6

fs/ubifs/super.c

··· 2298 2298 dbg_debugfs_exit(); 2299 2299 ubifs_compressors_exit(); 2300 2300 unregister_shrinker(&ubifs_shrinker_info); 2301 + 2302 + /* 2303 + * Make sure all delayed rcu free inodes are flushed before we 2304 + * destroy cache. 2305 + */ 2306 + rcu_barrier(); 2301 2307 kmem_cache_destroy(ubifs_inode_slab); 2302 2308 unregister_filesystem(&ubifs_fs_type); 2303 2309 }

+5

fs/udf/super.c

··· 171 171 172 172 static void destroy_inodecache(void) 173 173 { 174 + /* 175 + * Make sure all delayed rcu free inodes are flushed before we 176 + * destroy cache. 177 + */ 178 + rcu_barrier(); 174 179 kmem_cache_destroy(udf_inode_cachep); 175 180 } 176 181

+5

fs/ufs/super.c

··· 1466 1466 1467 1467 static void destroy_inodecache(void) 1468 1468 { 1469 + /* 1470 + * Make sure all delayed rcu free inodes are flushed before we 1471 + * destroy cache. 1472 + */ 1473 + rcu_barrier(); 1469 1474 kmem_cache_destroy(ufs_inode_cachep); 1470 1475 } 1471 1476

+5 -6

fs/utimes.c

··· 140 140 goto out; 141 141 142 142 if (filename == NULL && dfd != AT_FDCWD) { 143 - int fput_needed; 144 - struct file *file; 143 + struct fd f; 145 144 146 145 if (flags & AT_SYMLINK_NOFOLLOW) 147 146 goto out; 148 147 149 - file = fget_light(dfd, &fput_needed); 148 + f = fdget(dfd); 150 149 error = -EBADF; 151 - if (!file) 150 + if (!f.file) 152 151 goto out; 153 152 154 - error = utimes_common(&file->f_path, times); 155 - fput_light(file, fput_needed); 153 + error = utimes_common(&f.file->f_path, times); 154 + fdput(f); 156 155 } else { 157 156 struct path path; 158 157 int lookup_flags = 0;

+22 -30

fs/xattr.c

··· 403 403 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, 404 404 const void __user *,value, size_t, size, int, flags) 405 405 { 406 - int fput_needed; 407 - struct file *f; 406 + struct fd f = fdget(fd); 408 407 struct dentry *dentry; 409 408 int error = -EBADF; 410 409 411 - f = fget_light(fd, &fput_needed); 412 - if (!f) 410 + if (!f.file) 413 411 return error; 414 - dentry = f->f_path.dentry; 412 + dentry = f.file->f_path.dentry; 415 413 audit_inode(NULL, dentry); 416 - error = mnt_want_write_file(f); 414 + error = mnt_want_write_file(f.file); 417 415 if (!error) { 418 416 error = setxattr(dentry, name, value, size, flags); 419 - mnt_drop_write_file(f); 417 + mnt_drop_write_file(f.file); 420 418 } 421 - fput_light(f, fput_needed); 419 + fdput(f); 422 420 return error; 423 421 } 424 422 ··· 500 502 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, 501 503 void __user *, value, size_t, size) 502 504 { 503 - int fput_needed; 504 - struct file *f; 505 + struct fd f = fdget(fd); 505 506 ssize_t error = -EBADF; 506 507 507 - f = fget_light(fd, &fput_needed); 508 - if (!f) 508 + if (!f.file) 509 509 return error; 510 - audit_inode(NULL, f->f_path.dentry); 511 - error = getxattr(f->f_path.dentry, name, value, size); 512 - fput_light(f, fput_needed); 510 + audit_inode(NULL, f.file->f_path.dentry); 511 + error = getxattr(f.file->f_path.dentry, name, value, size); 512 + fdput(f); 513 513 return error; 514 514 } 515 515 ··· 579 583 580 584 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) 581 585 { 582 - int fput_needed; 583 - struct file *f; 586 + struct fd f = fdget(fd); 584 587 ssize_t error = -EBADF; 585 588 586 - f = fget_light(fd, &fput_needed); 587 - if (!f) 589 + if (!f.file) 588 590 return error; 589 - audit_inode(NULL, f->f_path.dentry); 590 - error = listxattr(f->f_path.dentry, list, size); 591 - fput_light(f, fput_needed); 591 + audit_inode(NULL, f.file->f_path.dentry); 592 + error = listxattr(f.file->f_path.dentry, list, size); 593 + fdput(f); 592 594 return error; 593 595 } 594 596 ··· 646 652 647 653 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) 648 654 { 649 - int fput_needed; 650 - struct file *f; 655 + struct fd f = fdget(fd); 651 656 struct dentry *dentry; 652 657 int error = -EBADF; 653 658 654 - f = fget_light(fd, &fput_needed); 655 - if (!f) 659 + if (!f.file) 656 660 return error; 657 - dentry = f->f_path.dentry; 661 + dentry = f.file->f_path.dentry; 658 662 audit_inode(NULL, dentry); 659 - error = mnt_want_write_file(f); 663 + error = mnt_want_write_file(f.file); 660 664 if (!error) { 661 665 error = removexattr(dentry, name); 662 - mnt_drop_write_file(f); 666 + mnt_drop_write_file(f.file); 663 667 } 664 - fput_light(f, fput_needed); 668 + fdput(f); 665 669 return error; 666 670 } 667 671

+17 -17

fs/xfs/xfs_dfrag.c

··· 48 48 xfs_swapext_t *sxp) 49 49 { 50 50 xfs_inode_t *ip, *tip; 51 - struct file *file, *tmp_file; 51 + struct fd f, tmp; 52 52 int error = 0; 53 53 54 54 /* Pull information for the target fd */ 55 - file = fget((int)sxp->sx_fdtarget); 56 - if (!file) { 55 + f = fdget((int)sxp->sx_fdtarget); 56 + if (!f.file) { 57 57 error = XFS_ERROR(EINVAL); 58 58 goto out; 59 59 } 60 60 61 - if (!(file->f_mode & FMODE_WRITE) || 62 - !(file->f_mode & FMODE_READ) || 63 - (file->f_flags & O_APPEND)) { 61 + if (!(f.file->f_mode & FMODE_WRITE) || 62 + !(f.file->f_mode & FMODE_READ) || 63 + (f.file->f_flags & O_APPEND)) { 64 64 error = XFS_ERROR(EBADF); 65 65 goto out_put_file; 66 66 } 67 67 68 - tmp_file = fget((int)sxp->sx_fdtmp); 69 - if (!tmp_file) { 68 + tmp = fdget((int)sxp->sx_fdtmp); 69 + if (!tmp.file) { 70 70 error = XFS_ERROR(EINVAL); 71 71 goto out_put_file; 72 72 } 73 73 74 - if (!(tmp_file->f_mode & FMODE_WRITE) || 75 - !(tmp_file->f_mode & FMODE_READ) || 76 - (tmp_file->f_flags & O_APPEND)) { 74 + if (!(tmp.file->f_mode & FMODE_WRITE) || 75 + !(tmp.file->f_mode & FMODE_READ) || 76 + (tmp.file->f_flags & O_APPEND)) { 77 77 error = XFS_ERROR(EBADF); 78 78 goto out_put_tmp_file; 79 79 } 80 80 81 - if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 82 - IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { 81 + if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || 82 + IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { 83 83 error = XFS_ERROR(EINVAL); 84 84 goto out_put_tmp_file; 85 85 } 86 86 87 - ip = XFS_I(file->f_path.dentry->d_inode); 88 - tip = XFS_I(tmp_file->f_path.dentry->d_inode); 87 + ip = XFS_I(f.file->f_path.dentry->d_inode); 88 + tip = XFS_I(tmp.file->f_path.dentry->d_inode); 89 89 90 90 if (ip->i_mount != tip->i_mount) { 91 91 error = XFS_ERROR(EINVAL); ··· 105 105 error = xfs_swap_extents(ip, tip, sxp); 106 106 107 107 out_put_tmp_file: 108 - fput(tmp_file); 108 + fdput(tmp); 109 109 out_put_file: 110 - fput(file); 110 + fdput(f); 111 111 out: 112 112 return error; 113 113 }

+5 -5

fs/xfs/xfs_ioctl.c

··· 70 70 int hsize; 71 71 xfs_handle_t handle; 72 72 struct inode *inode; 73 - struct file *file = NULL; 73 + struct fd f; 74 74 struct path path; 75 75 int error; 76 76 struct xfs_inode *ip; 77 77 78 78 if (cmd == XFS_IOC_FD_TO_HANDLE) { 79 - file = fget(hreq->fd); 80 - if (!file) 79 + f = fdget(hreq->fd); 80 + if (!f.file) 81 81 return -EBADF; 82 - inode = file->f_path.dentry->d_inode; 82 + inode = f.file->f_path.dentry->d_inode; 83 83 } else { 84 84 error = user_lpath((const char __user *)hreq->path, &path); 85 85 if (error) ··· 134 134 135 135 out_put: 136 136 if (cmd == XFS_IOC_FD_TO_HANDLE) 137 - fput(file); 137 + fdput(f); 138 138 else 139 139 path_put(&path); 140 140 return error;

+5

fs/xfs/xfs_super.c

··· 1506 1506 STATIC void 1507 1507 xfs_destroy_zones(void) 1508 1508 { 1509 + /* 1510 + * Make sure all delayed rcu free are flushed before we 1511 + * destroy caches. 1512 + */ 1513 + rcu_barrier(); 1509 1514 kmem_zone_destroy(xfs_ili_zone); 1510 1515 kmem_zone_destroy(xfs_inode_zone); 1511 1516 kmem_zone_destroy(xfs_efi_zone);

+3

include/linux/compat.h

··· 590 590 unsigned long liovcnt, const struct compat_iovec __user *rvec, 591 591 unsigned long riovcnt, unsigned long flags); 592 592 593 + asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, 594 + compat_off_t __user *offset, compat_size_t count); 595 + 593 596 #else 594 597 595 598 #define is_compat_task() (0)

+12 -27

include/linux/fdtable.h

··· 30 30 struct fdtable *next; 31 31 }; 32 32 33 - static inline void __set_close_on_exec(int fd, struct fdtable *fdt) 34 - { 35 - __set_bit(fd, fdt->close_on_exec); 36 - } 37 - 38 - static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) 39 - { 40 - __clear_bit(fd, fdt->close_on_exec); 41 - } 42 - 43 33 static inline bool close_on_exec(int fd, const struct fdtable *fdt) 44 34 { 45 35 return test_bit(fd, fdt->close_on_exec); 46 - } 47 - 48 - static inline void __set_open_fd(int fd, struct fdtable *fdt) 49 - { 50 - __set_bit(fd, fdt->open_fds); 51 - } 52 - 53 - static inline void __clear_open_fd(int fd, struct fdtable *fdt) 54 - { 55 - __clear_bit(fd, fdt->open_fds); 56 36 } 57 37 58 38 static inline bool fd_is_open(int fd, const struct fdtable *fdt) ··· 73 93 struct vfsmount; 74 94 struct dentry; 75 95 76 - extern int expand_files(struct files_struct *, int nr); 77 - extern void free_fdtable_rcu(struct rcu_head *rcu); 78 96 extern void __init files_defer_init(void); 79 - 80 - static inline void free_fdtable(struct fdtable *fdt) 81 - { 82 - call_rcu(&fdt->rcu, free_fdtable_rcu); 83 - } 84 97 85 98 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) 86 99 { ··· 95 122 struct files_struct *get_files_struct(struct task_struct *); 96 123 void put_files_struct(struct files_struct *fs); 97 124 void reset_files_struct(struct files_struct *); 125 + void daemonize_descriptors(void); 98 126 int unshare_files(struct files_struct **); 99 127 struct files_struct *dup_fd(struct files_struct *, int *); 128 + void do_close_on_exec(struct files_struct *); 129 + int iterate_fd(struct files_struct *, unsigned, 130 + int (*)(const void *, struct file *, unsigned), 131 + const void *); 132 + 133 + extern int __alloc_fd(struct files_struct *files, 134 + unsigned start, unsigned end, unsigned flags); 135 + extern void __fd_install(struct files_struct *files, 136 + unsigned int fd, struct file *file); 137 + extern int __close_fd(struct files_struct *files, 138 + unsigned int fd); 100 139 101 140 extern struct kmem_cache *files_cachep; 102 141

+32 -3

include/linux/file.h

··· 26 26 fput(file); 27 27 } 28 28 29 + struct fd { 30 + struct file *file; 31 + int need_put; 32 + }; 33 + 34 + static inline void fdput(struct fd fd) 35 + { 36 + if (fd.need_put) 37 + fput(fd.file); 38 + } 39 + 29 40 extern struct file *fget(unsigned int fd); 30 41 extern struct file *fget_light(unsigned int fd, int *fput_needed); 42 + 43 + static inline struct fd fdget(unsigned int fd) 44 + { 45 + int b; 46 + struct file *f = fget_light(fd, &b); 47 + return (struct fd){f,b}; 48 + } 49 + 31 50 extern struct file *fget_raw(unsigned int fd); 32 51 extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); 52 + 53 + static inline struct fd fdget_raw(unsigned int fd) 54 + { 55 + int b; 56 + struct file *f = fget_raw_light(fd, &b); 57 + return (struct fd){f,b}; 58 + } 59 + 60 + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); 61 + extern int replace_fd(unsigned fd, struct file *file, unsigned flags); 33 62 extern void set_close_on_exec(unsigned int fd, int flag); 63 + extern bool get_close_on_exec(unsigned int fd); 34 64 extern void put_filp(struct file *); 35 - extern int alloc_fd(unsigned start, unsigned flags); 36 - extern int get_unused_fd(void); 37 - #define get_unused_fd_flags(flags) alloc_fd(0, (flags)) 65 + extern int get_unused_fd_flags(unsigned flags); 66 + #define get_unused_fd() get_unused_fd_flags(0) 38 67 extern void put_unused_fd(unsigned int fd); 39 68 40 69 extern void fd_install(unsigned int fd, struct file *file);

+7 -3

include/linux/fs.h

··· 1074 1074 unsigned char f_handle[0]; 1075 1075 }; 1076 1076 1077 - #define get_file(x) atomic_long_inc(&(x)->f_count) 1077 + static inline struct file *get_file(struct file *f) 1078 + { 1079 + atomic_long_inc(&f->f_count); 1080 + return f; 1081 + } 1078 1082 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 1079 1083 #define file_count(x) atomic_long_read(&(x)->f_count) 1080 1084 ··· 1130 1126 /* Page cache limit. The filesystems should put that into their s_maxbytes 1131 1127 limits, otherwise bad things can happen in VM. */ 1132 1128 #if BITS_PER_LONG==32 1133 - #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1129 + #define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1134 1130 #elif BITS_PER_LONG==64 1135 - #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1131 + #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffff) 1136 1132 #endif 1137 1133 1138 1134 #define FL_POSIX 1

+2 -1

include/linux/net.h

··· 65 65 struct poll_table_struct; 66 66 struct pipe_inode_info; 67 67 struct inode; 68 + struct file; 68 69 struct net; 69 70 70 71 #define SOCK_ASYNC_NOSPACE 0 ··· 247 246 size_t len); 248 247 extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, 249 248 size_t size, int flags); 250 - extern int sock_map_fd(struct socket *sock, int flags); 249 + extern struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); 251 250 extern struct socket *sockfd_lookup(int fd, int *err); 252 251 extern struct socket *sock_from_file(struct file *file, int *err); 253 252 #define sockfd_put(sock) fput(sock->file)

+1

include/linux/sched.h

··· 405 405 406 406 extern void set_dumpable(struct mm_struct *mm, int value); 407 407 extern int get_dumpable(struct mm_struct *mm); 408 + extern int __get_dumpable(unsigned long mm_flags); 408 409 409 410 /* get/set_dumpable() values */ 410 411 #define SUID_DUMPABLE_DISABLED 0

+39 -39

ipc/mqueue.c

··· 944 944 size_t, msg_len, unsigned int, msg_prio, 945 945 const struct timespec __user *, u_abs_timeout) 946 946 { 947 - struct file *filp; 947 + struct fd f; 948 948 struct inode *inode; 949 949 struct ext_wait_queue wait; 950 950 struct ext_wait_queue *receiver; ··· 967 967 968 968 audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); 969 969 970 - filp = fget(mqdes); 971 - if (unlikely(!filp)) { 970 + f = fdget(mqdes); 971 + if (unlikely(!f.file)) { 972 972 ret = -EBADF; 973 973 goto out; 974 974 } 975 975 976 - inode = filp->f_path.dentry->d_inode; 977 - if (unlikely(filp->f_op != &mqueue_file_operations)) { 976 + inode = f.file->f_path.dentry->d_inode; 977 + if (unlikely(f.file->f_op != &mqueue_file_operations)) { 978 978 ret = -EBADF; 979 979 goto out_fput; 980 980 } 981 981 info = MQUEUE_I(inode); 982 - audit_inode(NULL, filp->f_path.dentry); 982 + audit_inode(NULL, f.file->f_path.dentry); 983 983 984 - if (unlikely(!(filp->f_mode & FMODE_WRITE))) { 984 + if (unlikely(!(f.file->f_mode & FMODE_WRITE))) { 985 985 ret = -EBADF; 986 986 goto out_fput; 987 987 } ··· 1023 1023 } 1024 1024 1025 1025 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { 1026 - if (filp->f_flags & O_NONBLOCK) { 1026 + if (f.file->f_flags & O_NONBLOCK) { 1027 1027 ret = -EAGAIN; 1028 1028 } else { 1029 1029 wait.task = current; ··· 1056 1056 if (ret) 1057 1057 free_msg(msg_ptr); 1058 1058 out_fput: 1059 - fput(filp); 1059 + fdput(f); 1060 1060 out: 1061 1061 return ret; 1062 1062 } ··· 1067 1067 { 1068 1068 ssize_t ret; 1069 1069 struct msg_msg *msg_ptr; 1070 - struct file *filp; 1070 + struct fd f; 1071 1071 struct inode *inode; 1072 1072 struct mqueue_inode_info *info; 1073 1073 struct ext_wait_queue wait; ··· 1084 1084 1085 1085 audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); 1086 1086 1087 - filp = fget(mqdes); 1088 - if (unlikely(!filp)) { 1087 + f = fdget(mqdes); 1088 + if (unlikely(!f.file)) { 1089 1089 ret = -EBADF; 1090 1090 goto out; 1091 1091 } 1092 1092 1093 - inode = filp->f_path.dentry->d_inode; 1094 - if (unlikely(filp->f_op != &mqueue_file_operations)) { 1093 + inode = f.file->f_path.dentry->d_inode; 1094 + if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1095 1095 ret = -EBADF; 1096 1096 goto out_fput; 1097 1097 } 1098 1098 info = MQUEUE_I(inode); 1099 - audit_inode(NULL, filp->f_path.dentry); 1099 + audit_inode(NULL, f.file->f_path.dentry); 1100 1100 1101 - if (unlikely(!(filp->f_mode & FMODE_READ))) { 1101 + if (unlikely(!(f.file->f_mode & FMODE_READ))) { 1102 1102 ret = -EBADF; 1103 1103 goto out_fput; 1104 1104 } ··· 1130 1130 } 1131 1131 1132 1132 if (info->attr.mq_curmsgs == 0) { 1133 - if (filp->f_flags & O_NONBLOCK) { 1133 + if (f.file->f_flags & O_NONBLOCK) { 1134 1134 spin_unlock(&info->lock); 1135 1135 ret = -EAGAIN; 1136 1136 } else { ··· 1160 1160 free_msg(msg_ptr); 1161 1161 } 1162 1162 out_fput: 1163 - fput(filp); 1163 + fdput(f); 1164 1164 out: 1165 1165 return ret; 1166 1166 } ··· 1174 1174 const struct sigevent __user *, u_notification) 1175 1175 { 1176 1176 int ret; 1177 - struct file *filp; 1177 + struct fd f; 1178 1178 struct sock *sock; 1179 1179 struct inode *inode; 1180 1180 struct sigevent notification; ··· 1220 1220 skb_put(nc, NOTIFY_COOKIE_LEN); 1221 1221 /* and attach it to the socket */ 1222 1222 retry: 1223 - filp = fget(notification.sigev_signo); 1224 - if (!filp) { 1223 + f = fdget(notification.sigev_signo); 1224 + if (!f.file) { 1225 1225 ret = -EBADF; 1226 1226 goto out; 1227 1227 } 1228 - sock = netlink_getsockbyfilp(filp); 1229 - fput(filp); 1228 + sock = netlink_getsockbyfilp(f.file); 1229 + fdput(f); 1230 1230 if (IS_ERR(sock)) { 1231 1231 ret = PTR_ERR(sock); 1232 1232 sock = NULL; ··· 1245 1245 } 1246 1246 } 1247 1247 1248 - filp = fget(mqdes); 1249 - if (!filp) { 1248 + f = fdget(mqdes); 1249 + if (!f.file) { 1250 1250 ret = -EBADF; 1251 1251 goto out; 1252 1252 } 1253 1253 1254 - inode = filp->f_path.dentry->d_inode; 1255 - if (unlikely(filp->f_op != &mqueue_file_operations)) { 1254 + inode = f.file->f_path.dentry->d_inode; 1255 + if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1256 1256 ret = -EBADF; 1257 1257 goto out_fput; 1258 1258 } ··· 1292 1292 } 1293 1293 spin_unlock(&info->lock); 1294 1294 out_fput: 1295 - fput(filp); 1295 + fdput(f); 1296 1296 out: 1297 1297 if (sock) { 1298 1298 netlink_detachskb(sock, nc); ··· 1308 1308 { 1309 1309 int ret; 1310 1310 struct mq_attr mqstat, omqstat; 1311 - struct file *filp; 1311 + struct fd f; 1312 1312 struct inode *inode; 1313 1313 struct mqueue_inode_info *info; 1314 1314 ··· 1319 1319 return -EINVAL; 1320 1320 } 1321 1321 1322 - filp = fget(mqdes); 1323 - if (!filp) { 1322 + f = fdget(mqdes); 1323 + if (!f.file) { 1324 1324 ret = -EBADF; 1325 1325 goto out; 1326 1326 } 1327 1327 1328 - inode = filp->f_path.dentry->d_inode; 1329 - if (unlikely(filp->f_op != &mqueue_file_operations)) { 1328 + inode = f.file->f_path.dentry->d_inode; 1329 + if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1330 1330 ret = -EBADF; 1331 1331 goto out_fput; 1332 1332 } ··· 1335 1335 spin_lock(&info->lock); 1336 1336 1337 1337 omqstat = info->attr; 1338 - omqstat.mq_flags = filp->f_flags & O_NONBLOCK; 1338 + omqstat.mq_flags = f.file->f_flags & O_NONBLOCK; 1339 1339 if (u_mqstat) { 1340 1340 audit_mq_getsetattr(mqdes, &mqstat); 1341 - spin_lock(&filp->f_lock); 1341 + spin_lock(&f.file->f_lock); 1342 1342 if (mqstat.mq_flags & O_NONBLOCK) 1343 - filp->f_flags |= O_NONBLOCK; 1343 + f.file->f_flags |= O_NONBLOCK; 1344 1344 else 1345 - filp->f_flags &= ~O_NONBLOCK; 1346 - spin_unlock(&filp->f_lock); 1345 + f.file->f_flags &= ~O_NONBLOCK; 1346 + spin_unlock(&f.file->f_lock); 1347 1347 1348 1348 inode->i_atime = inode->i_ctime = CURRENT_TIME; 1349 1349 } ··· 1356 1356 ret = -EFAULT; 1357 1357 1358 1358 out_fput: 1359 - fput(filp); 1359 + fdput(f); 1360 1360 out: 1361 1361 return ret; 1362 1362 }

+31 -41

kernel/events/core.c

··· 468 468 { 469 469 struct perf_cgroup *cgrp; 470 470 struct cgroup_subsys_state *css; 471 - struct file *file; 472 - int ret = 0, fput_needed; 471 + struct fd f = fdget(fd); 472 + int ret = 0; 473 473 474 - file = fget_light(fd, &fput_needed); 475 - if (!file) 474 + if (!f.file) 476 475 return -EBADF; 477 476 478 - css = cgroup_css_from_dir(file, perf_subsys_id); 477 + css = cgroup_css_from_dir(f.file, perf_subsys_id); 479 478 if (IS_ERR(css)) { 480 479 ret = PTR_ERR(css); 481 480 goto out; ··· 500 501 ret = -EINVAL; 501 502 } 502 503 out: 503 - fput_light(file, fput_needed); 504 + fdput(f); 504 505 return ret; 505 506 } 506 507 ··· 3233 3234 3234 3235 static const struct file_operations perf_fops; 3235 3236 3236 - static struct file *perf_fget_light(int fd, int *fput_needed) 3237 + static inline int perf_fget_light(int fd, struct fd *p) 3237 3238 { 3238 - struct file *file; 3239 + struct fd f = fdget(fd); 3240 + if (!f.file) 3241 + return -EBADF; 3239 3242 3240 - file = fget_light(fd, fput_needed); 3241 - if (!file) 3242 - return ERR_PTR(-EBADF); 3243 - 3244 - if (file->f_op != &perf_fops) { 3245 - fput_light(file, *fput_needed); 3246 - *fput_needed = 0; 3247 - return ERR_PTR(-EBADF); 3243 + if (f.file->f_op != &perf_fops) { 3244 + fdput(f); 3245 + return -EBADF; 3248 3246 } 3249 - 3250 - return file; 3247 + *p = f; 3248 + return 0; 3251 3249 } 3252 3250 3253 3251 static int perf_event_set_output(struct perf_event *event, ··· 3276 3280 3277 3281 case PERF_EVENT_IOC_SET_OUTPUT: 3278 3282 { 3279 - struct file *output_file = NULL; 3280 - struct perf_event *output_event = NULL; 3281 - int fput_needed = 0; 3282 3283 int ret; 3283 - 3284 3284 if (arg != -1) { 3285 - output_file = perf_fget_light(arg, &fput_needed); 3286 - if (IS_ERR(output_file)) 3287 - return PTR_ERR(output_file); 3288 - output_event = output_file->private_data; 3285 + struct perf_event *output_event; 3286 + struct fd output; 3287 + ret = perf_fget_light(arg, &output); 3288 + if (ret) 3289 + return ret; 3290 + output_event = output.file->private_data; 3291 + ret = perf_event_set_output(event, output_event); 3292 + fdput(output); 3293 + } else { 3294 + ret = perf_event_set_output(event, NULL); 3289 3295 } 3290 - 3291 - ret = perf_event_set_output(event, output_event); 3292 - if (output_event) 3293 - fput_light(output_file, fput_needed); 3294 - 3295 3296 return ret; 3296 3297 } 3297 3298 ··· 6436 6443 struct perf_event_attr attr; 6437 6444 struct perf_event_context *ctx; 6438 6445 struct file *event_file = NULL; 6439 - struct file *group_file = NULL; 6446 + struct fd group = {NULL, 0}; 6440 6447 struct task_struct *task = NULL; 6441 6448 struct pmu *pmu; 6442 6449 int event_fd; 6443 6450 int move_group = 0; 6444 - int fput_needed = 0; 6445 6451 int err; 6446 6452 6447 6453 /* for future expandability... */ ··· 6470 6478 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) 6471 6479 return -EINVAL; 6472 6480 6473 - event_fd = get_unused_fd_flags(O_RDWR); 6481 + event_fd = get_unused_fd(); 6474 6482 if (event_fd < 0) 6475 6483 return event_fd; 6476 6484 6477 6485 if (group_fd != -1) { 6478 - group_file = perf_fget_light(group_fd, &fput_needed); 6479 - if (IS_ERR(group_file)) { 6480 - err = PTR_ERR(group_file); 6486 + err = perf_fget_light(group_fd, &group); 6487 + if (err) 6481 6488 goto err_fd; 6482 - } 6483 - group_leader = group_file->private_data; 6489 + group_leader = group.file->private_data; 6484 6490 if (flags & PERF_FLAG_FD_OUTPUT) 6485 6491 output_event = group_leader; 6486 6492 if (flags & PERF_FLAG_FD_NO_GROUP) ··· 6654 6664 * of the group leader will find the pointer to itself in 6655 6665 * perf_group_detach(). 6656 6666 */ 6657 - fput_light(group_file, fput_needed); 6667 + fdput(group); 6658 6668 fd_install(event_fd, event_file); 6659 6669 return event_fd; 6660 6670 ··· 6668 6678 if (task) 6669 6679 put_task_struct(task); 6670 6680 err_group_fd: 6671 - fput_light(group_file, fput_needed); 6681 + fdput(group); 6672 6682 err_fd: 6673 6683 put_unused_fd(event_fd); 6674 6684 return err;

+1 -96

kernel/exit.c

··· 457 457 /* Become as one with the init task */ 458 458 459 459 daemonize_fs_struct(); 460 - exit_files(current); 461 - current->files = init_task.files; 462 - atomic_inc(&current->files->count); 460 + daemonize_descriptors(); 463 461 464 462 reparent_to_kthreadd(); 465 463 } 466 464 467 465 EXPORT_SYMBOL(daemonize); 468 - 469 - static void close_files(struct files_struct * files) 470 - { 471 - int i, j; 472 - struct fdtable *fdt; 473 - 474 - j = 0; 475 - 476 - /* 477 - * It is safe to dereference the fd table without RCU or 478 - * ->file_lock because this is the last reference to the 479 - * files structure. But use RCU to shut RCU-lockdep up. 480 - */ 481 - rcu_read_lock(); 482 - fdt = files_fdtable(files); 483 - rcu_read_unlock(); 484 - for (;;) { 485 - unsigned long set; 486 - i = j * BITS_PER_LONG; 487 - if (i >= fdt->max_fds) 488 - break; 489 - set = fdt->open_fds[j++]; 490 - while (set) { 491 - if (set & 1) { 492 - struct file * file = xchg(&fdt->fd[i], NULL); 493 - if (file) { 494 - filp_close(file, files); 495 - cond_resched(); 496 - } 497 - } 498 - i++; 499 - set >>= 1; 500 - } 501 - } 502 - } 503 - 504 - struct files_struct *get_files_struct(struct task_struct *task) 505 - { 506 - struct files_struct *files; 507 - 508 - task_lock(task); 509 - files = task->files; 510 - if (files) 511 - atomic_inc(&files->count); 512 - task_unlock(task); 513 - 514 - return files; 515 - } 516 - 517 - void put_files_struct(struct files_struct *files) 518 - { 519 - struct fdtable *fdt; 520 - 521 - if (atomic_dec_and_test(&files->count)) { 522 - close_files(files); 523 - /* 524 - * Free the fd and fdset arrays if we expanded them. 525 - * If the fdtable was embedded, pass files for freeing 526 - * at the end of the RCU grace period. Otherwise, 527 - * you can free files immediately. 528 - */ 529 - rcu_read_lock(); 530 - fdt = files_fdtable(files); 531 - if (fdt != &files->fdtab) 532 - kmem_cache_free(files_cachep, files); 533 - free_fdtable(fdt); 534 - rcu_read_unlock(); 535 - } 536 - } 537 - 538 - void reset_files_struct(struct files_struct *files) 539 - { 540 - struct task_struct *tsk = current; 541 - struct files_struct *old; 542 - 543 - old = tsk->files; 544 - task_lock(tsk); 545 - tsk->files = files; 546 - task_unlock(tsk); 547 - put_files_struct(old); 548 - } 549 - 550 - void exit_files(struct task_struct *tsk) 551 - { 552 - struct files_struct * files = tsk->files; 553 - 554 - if (files) { 555 - task_lock(tsk); 556 - tsk->files = NULL; 557 - task_unlock(tsk); 558 - put_files_struct(files); 559 - } 560 - } 561 466 562 467 #ifdef CONFIG_MM_OWNER 563 468 /*

+7 -7

kernel/sys.c

··· 1788 1788 #ifdef CONFIG_CHECKPOINT_RESTORE 1789 1789 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1790 1790 { 1791 - struct file *exe_file; 1791 + struct fd exe; 1792 1792 struct dentry *dentry; 1793 1793 int err; 1794 1794 1795 - exe_file = fget(fd); 1796 - if (!exe_file) 1795 + exe = fdget(fd); 1796 + if (!exe.file) 1797 1797 return -EBADF; 1798 1798 1799 - dentry = exe_file->f_path.dentry; 1799 + dentry = exe.file->f_path.dentry; 1800 1800 1801 1801 /* 1802 1802 * Because the original mm->exe_file points to executable file, make ··· 1805 1805 */ 1806 1806 err = -EACCES; 1807 1807 if (!S_ISREG(dentry->d_inode->i_mode) || 1808 - exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1808 + exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1809 1809 goto exit; 1810 1810 1811 1811 err = inode_permission(dentry->d_inode, MAY_EXEC); ··· 1839 1839 goto exit_unlock; 1840 1840 1841 1841 err = 0; 1842 - set_mm_exe_file(mm, exe_file); 1842 + set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1843 1843 exit_unlock: 1844 1844 up_write(&mm->mmap_sem); 1845 1845 1846 1846 exit: 1847 - fput(exe_file); 1847 + fdput(exe); 1848 1848 return err; 1849 1849 } 1850 1850

+5 -6

kernel/taskstats.c

··· 424 424 struct nlattr *na; 425 425 size_t size; 426 426 u32 fd; 427 - struct file *file; 428 - int fput_needed; 427 + struct fd f; 429 428 430 429 na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; 431 430 if (!na) 432 431 return -EINVAL; 433 432 434 433 fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); 435 - file = fget_light(fd, &fput_needed); 436 - if (!file) 434 + f = fdget(fd); 435 + if (!f.file) 437 436 return 0; 438 437 439 438 size = nla_total_size(sizeof(struct cgroupstats)); ··· 452 453 stats = nla_data(na); 453 454 memset(stats, 0, sizeof(*stats)); 454 455 455 - rc = cgroupstats_build(stats, file->f_dentry); 456 + rc = cgroupstats_build(stats, f.file->f_dentry); 456 457 if (rc < 0) { 457 458 nlmsg_free(rep_skb); 458 459 goto err; ··· 461 462 rc = send_reply(rep_skb, info); 462 463 463 464 err: 464 - fput_light(file, fput_needed); 465 + fdput(f); 465 466 return rc; 466 467 } 467 468

+17 -17

mm/fadvise.c

··· 26 26 */ 27 27 SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) 28 28 { 29 - struct file *file = fget(fd); 29 + struct fd f = fdget(fd); 30 30 struct address_space *mapping; 31 31 struct backing_dev_info *bdi; 32 32 loff_t endbyte; /* inclusive */ ··· 35 35 unsigned long nrpages; 36 36 int ret = 0; 37 37 38 - if (!file) 38 + if (!f.file) 39 39 return -EBADF; 40 40 41 - if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) { 41 + if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) { 42 42 ret = -ESPIPE; 43 43 goto out; 44 44 } 45 45 46 - mapping = file->f_mapping; 46 + mapping = f.file->f_mapping; 47 47 if (!mapping || len < 0) { 48 48 ret = -EINVAL; 49 49 goto out; ··· 76 76 77 77 switch (advice) { 78 78 case POSIX_FADV_NORMAL: 79 - file->f_ra.ra_pages = bdi->ra_pages; 80 - spin_lock(&file->f_lock); 81 - file->f_mode &= ~FMODE_RANDOM; 82 - spin_unlock(&file->f_lock); 79 + f.file->f_ra.ra_pages = bdi->ra_pages; 80 + spin_lock(&f.file->f_lock); 81 + f.file->f_mode &= ~FMODE_RANDOM; 82 + spin_unlock(&f.file->f_lock); 83 83 break; 84 84 case POSIX_FADV_RANDOM: 85 - spin_lock(&file->f_lock); 86 - file->f_mode |= FMODE_RANDOM; 87 - spin_unlock(&file->f_lock); 85 + spin_lock(&f.file->f_lock); 86 + f.file->f_mode |= FMODE_RANDOM; 87 + spin_unlock(&f.file->f_lock); 88 88 break; 89 89 case POSIX_FADV_SEQUENTIAL: 90 - file->f_ra.ra_pages = bdi->ra_pages * 2; 91 - spin_lock(&file->f_lock); 92 - file->f_mode &= ~FMODE_RANDOM; 93 - spin_unlock(&file->f_lock); 90 + f.file->f_ra.ra_pages = bdi->ra_pages * 2; 91 + spin_lock(&f.file->f_lock); 92 + f.file->f_mode &= ~FMODE_RANDOM; 93 + spin_unlock(&f.file->f_lock); 94 94 break; 95 95 case POSIX_FADV_WILLNEED: 96 96 /* First and last PARTIAL page! */ ··· 106 106 * Ignore return value because fadvise() shall return 107 107 * success even if filesystem can't retrieve a hint, 108 108 */ 109 - force_page_cache_readahead(mapping, file, start_index, 109 + force_page_cache_readahead(mapping, f.file, start_index, 110 110 nrpages); 111 111 break; 112 112 case POSIX_FADV_NOREUSE: ··· 128 128 ret = -EINVAL; 129 129 } 130 130 out: 131 - fput(file); 131 + fdput(f); 132 132 return ret; 133 133 } 134 134 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS

+1 -2

mm/fremap.c

··· 195 195 */ 196 196 if (mapping_cap_account_dirty(mapping)) { 197 197 unsigned long addr; 198 - struct file *file = vma->vm_file; 198 + struct file *file = get_file(vma->vm_file); 199 199 200 200 flags &= MAP_NONBLOCK; 201 - get_file(file); 202 201 addr = mmap_region(file, start, size, 203 202 flags, vma->vm_flags, pgoff); 204 203 fput(file);

+1 -2

mm/mmap.c

··· 1301 1301 goto free_vma; 1302 1302 correct_wcount = 1; 1303 1303 } 1304 - vma->vm_file = file; 1305 - get_file(file); 1304 + vma->vm_file = get_file(file); 1306 1305 error = file->f_op->mmap(file, vma); 1307 1306 if (error) 1308 1307 goto unmap_and_free_vma;

+2 -4

mm/nommu.c

··· 1282 1282 vma->vm_pgoff = pgoff; 1283 1283 1284 1284 if (file) { 1285 - region->vm_file = file; 1286 - get_file(file); 1287 - vma->vm_file = file; 1288 - get_file(file); 1285 + region->vm_file = get_file(file); 1286 + vma->vm_file = get_file(file); 1289 1287 if (vm_flags & VM_EXECUTABLE) { 1290 1288 added_exe_file_vma(current->mm); 1291 1289 vma->vm_mm = current->mm;

+7 -7

mm/readahead.c

··· 579 579 SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) 580 580 { 581 581 ssize_t ret; 582 - struct file *file; 582 + struct fd f; 583 583 584 584 ret = -EBADF; 585 - file = fget(fd); 586 - if (file) { 587 - if (file->f_mode & FMODE_READ) { 588 - struct address_space *mapping = file->f_mapping; 585 + f = fdget(fd); 586 + if (f.file) { 587 + if (f.file->f_mode & FMODE_READ) { 588 + struct address_space *mapping = f.file->f_mapping; 589 589 pgoff_t start = offset >> PAGE_CACHE_SHIFT; 590 590 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; 591 591 unsigned long len = end - start + 1; 592 - ret = do_readahead(mapping, file, start, len); 592 + ret = do_readahead(mapping, f.file, start, len); 593 593 } 594 - fput(file); 594 + fdput(f); 595 595 } 596 596 return ret; 597 597 }

+7 -9

net/9p/trans_fd.c

··· 793 793 static int p9_socket_open(struct p9_client *client, struct socket *csocket) 794 794 { 795 795 struct p9_trans_fd *p; 796 - int ret, fd; 796 + struct file *file; 797 + int ret; 797 798 798 799 p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); 799 800 if (!p) 800 801 return -ENOMEM; 801 802 802 803 csocket->sk->sk_allocation = GFP_NOIO; 803 - fd = sock_map_fd(csocket, 0); 804 - if (fd < 0) { 804 + file = sock_alloc_file(csocket, 0, NULL); 805 + if (IS_ERR(file)) { 805 806 pr_err("%s (%d): failed to map fd\n", 806 807 __func__, task_pid_nr(current)); 807 808 sock_release(csocket); 808 809 kfree(p); 809 - return fd; 810 + return PTR_ERR(file); 810 811 } 811 812 812 - get_file(csocket->file); 813 - get_file(csocket->file); 814 - p->wr = p->rd = csocket->file; 813 + get_file(file); 814 + p->wr = p->rd = file; 815 815 client->trans = p; 816 816 client->status = Connected; 817 - 818 - sys_close(fd); /* still racy */ 819 817 820 818 p->rd->f_flags |= O_NONBLOCK; 821 819

+1 -2

net/compat.c

··· 301 301 break; 302 302 } 303 303 /* Bump the usage count and install the file. */ 304 - get_file(fp[i]); 305 - fd_install(new_fd, fp[i]); 304 + fd_install(new_fd, get_file(fp[i])); 306 305 } 307 306 308 307 if (i > 0) {

+12 -26

net/core/netprio_cgroup.c

··· 239 239 return ret; 240 240 } 241 241 242 + static int update_netprio(const void *v, struct file *file, unsigned n) 243 + { 244 + int err; 245 + struct socket *sock = sock_from_file(file, &err); 246 + if (sock) 247 + sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; 248 + return 0; 249 + } 250 + 242 251 void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 243 252 { 244 253 struct task_struct *p; 254 + void *v; 245 255 246 256 cgroup_taskset_for_each(p, cgrp, tset) { 247 - unsigned int fd; 248 - struct fdtable *fdt; 249 - struct files_struct *files; 250 - 251 257 task_lock(p); 252 - files = p->files; 253 - if (!files) { 254 - task_unlock(p); 255 - continue; 256 - } 257 - 258 - spin_lock(&files->file_lock); 259 - fdt = files_fdtable(files); 260 - for (fd = 0; fd < fdt->max_fds; fd++) { 261 - struct file *file; 262 - struct socket *sock; 263 - int err; 264 - 265 - file = fcheck_files(files, fd); 266 - if (!file) 267 - continue; 268 - 269 - sock = sock_from_file(file, &err); 270 - if (sock) 271 - sock_update_netprioidx(sock->sk, p); 272 - } 273 - spin_unlock(&files->file_lock); 258 + v = (void *)(unsigned long)task_netprioidx(p); 259 + iterate_fd(p->files, 0, update_netprio, v); 274 260 task_unlock(p); 275 261 } 276 262 }

+1 -2

net/core/scm.c

··· 301 301 break; 302 302 } 303 303 /* Bump the usage count and install the file. */ 304 - get_file(fp[i]); 305 304 sock = sock_from_file(fp[i], &err); 306 305 if (sock) 307 306 sock_update_netprioidx(sock->sk, current); 308 - fd_install(new_fd, fp[i]); 307 + fd_install(new_fd, get_file(fp[i])); 309 308 } 310 309 311 310 if (i > 0)

+21 -6

net/sctp/socket.c

··· 70 70 #include <linux/init.h> 71 71 #include <linux/crypto.h> 72 72 #include <linux/slab.h> 73 + #include <linux/file.h> 73 74 74 75 #include <net/ip.h> 75 76 #include <net/icmp.h> ··· 4293 4292 { 4294 4293 sctp_peeloff_arg_t peeloff; 4295 4294 struct socket *newsock; 4295 + struct file *newfile; 4296 4296 int retval = 0; 4297 4297 4298 4298 if (len < sizeof(sctp_peeloff_arg_t)) ··· 4307 4305 goto out; 4308 4306 4309 4307 /* Map the socket to an unused fd that can be returned to the user. */ 4310 - retval = sock_map_fd(newsock, 0); 4308 + retval = get_unused_fd(); 4311 4309 if (retval < 0) { 4312 4310 sock_release(newsock); 4313 4311 goto out; 4312 + } 4313 + 4314 + newfile = sock_alloc_file(newsock, 0, NULL); 4315 + if (unlikely(IS_ERR(newfile))) { 4316 + put_unused_fd(retval); 4317 + sock_release(newsock); 4318 + return PTR_ERR(newfile); 4314 4319 } 4315 4320 4316 4321 SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", 4317 4322 __func__, sk, newsock->sk, retval); 4318 4323 4319 4324 /* Return the fd mapped to the new socket. */ 4320 - peeloff.sd = retval; 4321 - if (put_user(len, optlen)) 4325 + if (put_user(len, optlen)) { 4326 + fput(newfile); 4327 + put_unused_fd(retval); 4322 4328 return -EFAULT; 4323 - if (copy_to_user(optval, &peeloff, len)) 4324 - retval = -EFAULT; 4325 - 4329 + } 4330 + peeloff.sd = retval; 4331 + if (copy_to_user(optval, &peeloff, len)) { 4332 + fput(newfile); 4333 + put_unused_fd(retval); 4334 + return -EFAULT; 4335 + } 4336 + fd_install(retval, newfile); 4326 4337 out: 4327 4338 return retval; 4328 4339 }

+42 -26

net/socket.c

··· 347 347 * but we take care of internal coherence yet. 348 348 */ 349 349 350 - static int sock_alloc_file(struct socket *sock, struct file **f, int flags, 351 - const char *dname) 350 + struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) 352 351 { 353 352 struct qstr name = { .name = "" }; 354 353 struct path path; 355 354 struct file *file; 356 - int fd; 357 - 358 - fd = get_unused_fd_flags(flags); 359 - if (unlikely(fd < 0)) 360 - return fd; 361 355 362 356 if (dname) { 363 357 name.name = dname; ··· 361 367 name.len = strlen(name.name); 362 368 } 363 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); 364 - if (unlikely(!path.dentry)) { 365 - put_unused_fd(fd); 366 - return -ENOMEM; 367 - } 370 + if (unlikely(!path.dentry)) 371 + return ERR_PTR(-ENOMEM); 368 372 path.mnt = mntget(sock_mnt); 369 373 370 374 d_instantiate(path.dentry, SOCK_INODE(sock)); ··· 374 382 /* drop dentry, keep inode */ 375 383 ihold(path.dentry->d_inode); 376 384 path_put(&path); 377 - put_unused_fd(fd); 378 - return -ENFILE; 385 + return ERR_PTR(-ENFILE); 379 386 } 380 387 381 388 sock->file = file; 382 389 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 383 390 file->f_pos = 0; 384 391 file->private_data = sock; 385 - 386 - *f = file; 387 - return fd; 392 + return file; 388 393 } 394 + EXPORT_SYMBOL(sock_alloc_file); 389 395 390 - int sock_map_fd(struct socket *sock, int flags) 396 + static int sock_map_fd(struct socket *sock, int flags) 391 397 { 392 398 struct file *newfile; 393 - int fd = sock_alloc_file(sock, &newfile, flags, NULL); 399 + int fd = get_unused_fd_flags(flags); 400 + if (unlikely(fd < 0)) 401 + return fd; 394 402 395 - if (likely(fd >= 0)) 403 + newfile = sock_alloc_file(sock, flags, NULL); 404 + if (likely(!IS_ERR(newfile))) { 396 405 fd_install(fd, newfile); 406 + return fd; 407 + } 397 408 398 - return fd; 409 + put_unused_fd(fd); 410 + return PTR_ERR(newfile); 399 411 } 400 - EXPORT_SYMBOL(sock_map_fd); 401 412 402 413 struct socket *sock_from_file(struct file *file, int *err) 403 414 { ··· 1461 1466 if (err < 0) 1462 1467 goto out_release_both; 1463 1468 1464 - fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL); 1469 + fd1 = get_unused_fd_flags(flags); 1465 1470 if (unlikely(fd1 < 0)) { 1466 1471 err = fd1; 1467 1472 goto out_release_both; 1468 1473 } 1469 - 1470 - fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL); 1474 + fd2 = get_unused_fd_flags(flags); 1471 1475 if (unlikely(fd2 < 0)) { 1472 1476 err = fd2; 1477 + put_unused_fd(fd1); 1478 + goto out_release_both; 1479 + } 1480 + 1481 + newfile1 = sock_alloc_file(sock1, flags, NULL); 1482 + if (unlikely(IS_ERR(newfile1))) { 1483 + err = PTR_ERR(newfile1); 1484 + put_unused_fd(fd1); 1485 + put_unused_fd(fd2); 1486 + goto out_release_both; 1487 + } 1488 + 1489 + newfile2 = sock_alloc_file(sock2, flags, NULL); 1490 + if (IS_ERR(newfile2)) { 1491 + err = PTR_ERR(newfile2); 1473 1492 fput(newfile1); 1474 1493 put_unused_fd(fd1); 1494 + put_unused_fd(fd2); 1475 1495 sock_release(sock2); 1476 1496 goto out; 1477 1497 } ··· 1618 1608 */ 1619 1609 __module_get(newsock->ops->owner); 1620 1610 1621 - newfd = sock_alloc_file(newsock, &newfile, flags, 1622 - sock->sk->sk_prot_creator->name); 1611 + newfd = get_unused_fd_flags(flags); 1623 1612 if (unlikely(newfd < 0)) { 1624 1613 err = newfd; 1614 + sock_release(newsock); 1615 + goto out_put; 1616 + } 1617 + newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); 1618 + if (unlikely(IS_ERR(newfile))) { 1619 + err = PTR_ERR(newfile); 1620 + put_unused_fd(newfd); 1625 1621 sock_release(newsock); 1626 1622 goto out_put; 1627 1623 }

+21 -52

security/selinux/hooks.c

··· 2088 2088 return (atsecure || cap_bprm_secureexec(bprm)); 2089 2089 } 2090 2090 2091 + static int match_file(const void *p, struct file *file, unsigned fd) 2092 + { 2093 + return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; 2094 + } 2095 + 2091 2096 /* Derived from fs/exec.c:flush_old_files. */ 2092 2097 static inline void flush_unauthorized_files(const struct cred *cred, 2093 2098 struct files_struct *files) 2094 2099 { 2095 2100 struct file *file, *devnull = NULL; 2096 2101 struct tty_struct *tty; 2097 - struct fdtable *fdt; 2098 - long j = -1; 2099 2102 int drop_tty = 0; 2103 + unsigned n; 2100 2104 2101 2105 tty = get_current_tty(); 2102 2106 if (tty) { ··· 2127 2123 no_tty(); 2128 2124 2129 2125 /* Revalidate access to inherited open files. */ 2130 - spin_lock(&files->file_lock); 2131 - for (;;) { 2132 - unsigned long set, i; 2133 - int fd; 2126 + n = iterate_fd(files, 0, match_file, cred); 2127 + if (!n) /* none found? */ 2128 + return; 2134 2129 2135 - j++; 2136 - i = j * BITS_PER_LONG; 2137 - fdt = files_fdtable(files); 2138 - if (i >= fdt->max_fds) 2139 - break; 2140 - set = fdt->open_fds[j]; 2141 - if (!set) 2142 - continue; 2143 - spin_unlock(&files->file_lock); 2144 - for ( ; set ; i++, set >>= 1) { 2145 - if (set & 1) { 2146 - file = fget(i); 2147 - if (!file) 2148 - continue; 2149 - if (file_has_perm(cred, 2150 - file, 2151 - file_to_av(file))) { 2152 - sys_close(i); 2153 - fd = get_unused_fd(); 2154 - if (fd != i) { 2155 - if (fd >= 0) 2156 - put_unused_fd(fd); 2157 - fput(file); 2158 - continue; 2159 - } 2160 - if (devnull) { 2161 - get_file(devnull); 2162 - } else { 2163 - devnull = dentry_open( 2164 - &selinux_null, 2165 - O_RDWR, cred); 2166 - if (IS_ERR(devnull)) { 2167 - devnull = NULL; 2168 - put_unused_fd(fd); 2169 - fput(file); 2170 - continue; 2171 - } 2172 - } 2173 - fd_install(fd, devnull); 2174 - } 2175 - fput(file); 2176 - } 2177 - } 2178 - spin_lock(&files->file_lock); 2179 - 2130 + devnull = dentry_open(&selinux_null, O_RDWR, cred); 2131 + if (!IS_ERR(devnull)) { 2132 + /* replace all the matching ones with this */ 2133 + do { 2134 + replace_fd(n - 1, get_file(devnull), 0); 2135 + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); 2136 + fput(devnull); 2137 + } else { 2138 + /* just close all the matching ones */ 2139 + do { 2140 + replace_fd(n - 1, NULL, 0); 2141 + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); 2180 2142 } 2181 - spin_unlock(&files->file_lock); 2182 2143 } 2183 2144 2184 2145 /*

+7 -6

sound/core/pcm_native.c

··· 1563 1563 1564 1564 1565 1565 /* WARNING: Don't forget to fput back the file */ 1566 - static struct file *snd_pcm_file_fd(int fd) 1566 + static struct file *snd_pcm_file_fd(int fd, int *fput_needed) 1567 1567 { 1568 1568 struct file *file; 1569 1569 struct inode *inode; 1570 1570 unsigned int minor; 1571 1571 1572 - file = fget(fd); 1572 + file = fget_light(fd, fput_needed); 1573 1573 if (!file) 1574 1574 return NULL; 1575 1575 inode = file->f_path.dentry->d_inode; 1576 1576 if (!S_ISCHR(inode->i_mode) || 1577 1577 imajor(inode) != snd_major) { 1578 - fput(file); 1578 + fput_light(file, *fput_needed); 1579 1579 return NULL; 1580 1580 } 1581 1581 minor = iminor(inode); 1582 1582 if (!snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_PLAYBACK) && 1583 1583 !snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_CAPTURE)) { 1584 - fput(file); 1584 + fput_light(file, *fput_needed); 1585 1585 return NULL; 1586 1586 } 1587 1587 return file; ··· 1597 1597 struct snd_pcm_file *pcm_file; 1598 1598 struct snd_pcm_substream *substream1; 1599 1599 struct snd_pcm_group *group; 1600 + int fput_needed; 1600 1601 1601 - file = snd_pcm_file_fd(fd); 1602 + file = snd_pcm_file_fd(fd, &fput_needed); 1602 1603 if (!file) 1603 1604 return -EBADFD; 1604 1605 pcm_file = file->private_data; ··· 1634 1633 write_unlock_irq(&snd_pcm_link_rwlock); 1635 1634 up_write(&snd_pcm_link_rwsem); 1636 1635 _nolock: 1637 - fput(file); 1636 + fput_light(file, fput_needed); 1638 1637 if (res < 0) 1639 1638 kfree(group); 1640 1639 return res;