Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (57 commits)
tidy the trailing symlinks traversal up
Turn resolution of trailing symlinks iterative everywhere
simplify link_path_walk() tail
Make trailing symlink resolution in path_lookupat() iterative
update nd->inode in __do_follow_link() instead of after do_follow_link()
pull handling of one pathname component into a helper
fs: allow AT_EMPTY_PATH in linkat(), limit that to CAP_DAC_READ_SEARCH
Allow passing O_PATH descriptors via SCM_RIGHTS datagrams
readlinkat(), fchownat() and fstatat() with empty relative pathnames
Allow O_PATH for symlinks
New kind of open files - "location only".
ext4: Copy fs UUID to superblock
ext3: Copy fs UUID to superblock.
vfs: Export file system uuid via /proc/<pid>/mountinfo
unistd.h: Add new syscalls numbers to asm-generic
x86: Add new syscalls for x86_64
x86: Add new syscalls for x86_32
fs: Remove i_nlink check from file system link callback
fs: Don't allow to create hardlink for deleted file
vfs: Add open by file handle support
...

+1508 -1278
+8 -28
arch/alpha/kernel/osf_sys.c
··· 230 230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; 231 231 } 232 232 233 - static int 234 - do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, 235 - unsigned long bufsiz) 233 + SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname, 234 + struct osf_statfs __user *, buffer, unsigned long, bufsiz) 236 235 { 237 236 struct kstatfs linux_stat; 238 - int error = vfs_statfs(path, &linux_stat); 237 + int error = user_statfs(pathname, &linux_stat); 239 238 if (!error) 240 239 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); 241 240 return error; 242 241 } 243 242 244 - SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname, 245 - struct osf_statfs __user *, buffer, unsigned long, bufsiz) 246 - { 247 - struct path path; 248 - int retval; 249 - 250 - retval = user_path(pathname, &path); 251 - if (!retval) { 252 - retval = do_osf_statfs(&path, buffer, bufsiz); 253 - path_put(&path); 254 - } 255 - return retval; 256 - } 257 - 258 243 SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, 259 244 struct osf_statfs __user *, buffer, unsigned long, bufsiz) 260 245 { 261 - struct file *file; 262 - int retval; 263 - 264 - retval = -EBADF; 265 - file = fget(fd); 266 - if (file) { 267 - retval = do_osf_statfs(&file->f_path, buffer, bufsiz); 268 - fput(file); 269 - } 270 - return retval; 246 + struct kstatfs linux_stat; 247 + int error = fd_statfs(fd, &linux_stat); 248 + if (!error) 249 + error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); 250 + return error; 271 251 } 272 252 273 253 /*
+22 -43
arch/parisc/hpux/sys_hpux.c
··· 185 185 int16_t f_pad; 186 186 }; 187 187 188 - static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) 188 + static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p) 189 189 { 190 - struct kstatfs st; 191 - int retval; 192 - 193 - retval = vfs_statfs(path, &st); 194 - if (retval) 195 - return retval; 196 - 197 - memset(buf, 0, sizeof(*buf)); 198 - buf->f_type = st.f_type; 199 - buf->f_bsize = st.f_bsize; 200 - buf->f_blocks = st.f_blocks; 201 - buf->f_bfree = st.f_bfree; 202 - buf->f_bavail = st.f_bavail; 203 - buf->f_files = st.f_files; 204 - buf->f_ffree = st.f_ffree; 205 - buf->f_fsid[0] = st.f_fsid.val[0]; 206 - buf->f_fsid[1] = st.f_fsid.val[1]; 207 - 190 + struct hpux_statfs buf; 191 + memset(&buf, 0, sizeof(buf)); 192 + buf.f_type = st->f_type; 193 + buf.f_bsize = st->f_bsize; 194 + buf.f_blocks = st->f_blocks; 195 + buf.f_bfree = st->f_bfree; 196 + buf.f_bavail = st->f_bavail; 197 + buf.f_files = st->f_files; 198 + buf.f_ffree = st->f_ffree; 199 + buf.f_fsid[0] = st->f_fsid.val[0]; 200 + buf.f_fsid[1] = st->f_fsid.val[1]; 201 + if (copy_to_user(p, &buf, sizeof(buf))) 202 + return -EFAULT; 208 203 return 0; 209 204 } 210 205 ··· 207 212 asmlinkage long hpux_statfs(const char __user *pathname, 208 213 struct hpux_statfs __user *buf) 209 214 { 210 - struct path path; 211 - int error; 212 - 213 - error = user_path(pathname, &path); 214 - if (!error) { 215 - struct hpux_statfs tmp; 216 - error = do_statfs_hpux(&path, &tmp); 217 - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 218 - error = -EFAULT; 219 - path_put(&path); 220 - } 215 + struct kstatfs st; 216 + int error = user_statfs(pathname, &st); 217 + if (!error) 218 + error = do_statfs_hpux(&st, buf); 221 219 return error; 222 220 } 223 221 224 222 asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) 225 223 { 226 - struct file *file; 227 - struct hpux_statfs tmp; 228 - int error; 229 - 230 - error = -EBADF; 231 - file = fget(fd); 232 - if (!file) 233 - goto out; 234 - error = do_statfs_hpux(&file->f_path, &tmp); 235 - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 236 - error = -EFAULT; 237 - fput(file); 238 - out: 224 + struct kstatfs st; 225 + int error = fd_statfs(fd, &st); 226 + if (!error) 227 + error = do_statfs_hpux(&st, buf); 239 228 return error; 240 229 } 241 230
+1 -1
arch/powerpc/platforms/cell/spufs/syscalls.c
··· 70 70 if (!IS_ERR(tmp)) { 71 71 struct nameidata nd; 72 72 73 - ret = path_lookup(tmp, LOOKUP_PARENT, &nd); 73 + ret = kern_path_parent(tmp, &nd); 74 74 if (!ret) { 75 75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; 76 76 ret = spufs_create(&nd, flags, mode, neighbor);
+2 -19
arch/um/drivers/mconsole_kern.c
··· 124 124 #if 0 125 125 void mconsole_proc(struct mc_request *req) 126 126 { 127 - struct nameidata nd; 128 127 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 129 128 struct file *file; 130 - int n, err; 129 + int n; 131 130 char *ptr = req->request.data, *buf; 132 131 mm_segment_t old_fs = get_fs(); 133 132 134 133 ptr += strlen("proc"); 135 134 ptr = skip_spaces(ptr); 136 135 137 - err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); 138 - if (err) { 139 - mconsole_reply(req, "Failed to look up file", 1, 0); 140 - goto out; 141 - } 142 - 143 - err = may_open(&nd.path, MAY_READ, O_RDONLY); 144 - if (result) { 145 - mconsole_reply(req, "Failed to open file", 1, 0); 146 - path_put(&nd.path); 147 - goto out; 148 - } 149 - 150 - file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, 151 - current_cred()); 152 - err = PTR_ERR(file); 136 + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); 153 137 if (IS_ERR(file)) { 154 138 mconsole_reply(req, "Failed to open file", 1, 0); 155 - path_put(&nd.path); 156 139 goto out; 157 140 } 158 141
+2
arch/x86/ia32/ia32entry.S
··· 851 851 .quad sys_fanotify_init 852 852 .quad sys32_fanotify_mark 853 853 .quad sys_prlimit64 /* 340 */ 854 + .quad sys_name_to_handle_at 855 + .quad compat_sys_open_by_handle_at 854 856 ia32_syscall_end:
+3 -1
arch/x86/include/asm/unistd_32.h
··· 346 346 #define __NR_fanotify_init 338 347 347 #define __NR_fanotify_mark 339 348 348 #define __NR_prlimit64 340 349 + #define __NR_name_to_handle_at 341 350 + #define __NR_open_by_handle_at 342 349 351 350 352 #ifdef __KERNEL__ 351 353 352 - #define NR_syscalls 341 354 + #define NR_syscalls 343 353 355 354 356 #define __ARCH_WANT_IPC_PARSE_VERSION 355 357 #define __ARCH_WANT_OLD_READDIR
+4
arch/x86/include/asm/unistd_64.h
··· 669 669 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 670 670 #define __NR_prlimit64 302 671 671 __SYSCALL(__NR_prlimit64, sys_prlimit64) 672 + #define __NR_name_to_handle_at 303 673 + __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) 674 + #define __NR_open_by_handle_at 304 675 + __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) 672 676 673 677 #ifndef __NO_STUBS 674 678 #define __ARCH_WANT_OLD_READDIR
+2
arch/x86/kernel/syscall_table_32.S
··· 340 340 .long sys_fanotify_init 341 341 .long sys_fanotify_mark 342 342 .long sys_prlimit64 /* 340 */ 343 + .long sys_name_to_handle_at 344 + .long sys_open_by_handle_at
+1 -1
fs/Kconfig
··· 47 47 def_bool n 48 48 49 49 config EXPORTFS 50 - tristate 50 + bool 51 51 52 52 config FILE_LOCKING 53 53 bool "Enable POSIX file locking API" if EXPERT
+2
fs/Makefile
··· 48 48 obj-$(CONFIG_NFS_COMMON) += nfs_common/ 49 49 obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 50 50 51 + obj-$(CONFIG_FHANDLE) += fhandle.o 52 + 51 53 obj-y += quota/ 52 54 53 55 obj-$(CONFIG_PROC_FS) += proc/
+6 -2
fs/btrfs/export.c
··· 21 21 int len = *max_len; 22 22 int type; 23 23 24 - if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 25 - (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 24 + if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { 25 + *max_len = BTRFS_FID_SIZE_CONNECTABLE; 26 26 return 255; 27 + } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { 28 + *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE; 29 + return 255; 30 + } 27 31 28 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 29 33 type = FILEID_BTRFS_WITHOUT_PARENT;
-3
fs/btrfs/inode.c
··· 4806 4806 int err; 4807 4807 int drop_inode = 0; 4808 4808 4809 - if (inode->i_nlink == 0) 4810 - return -ENOENT; 4811 - 4812 4809 /* do not allow sys_link's with other subvols of the same device */ 4813 4810 if (root->objectid != BTRFS_I(inode)->root->objectid) 4814 4811 return -EPERM;
+33 -48
fs/compat.c
··· 262 262 */ 263 263 asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 264 264 { 265 - struct path path; 266 - int error; 267 - 268 - error = user_path(pathname, &path); 269 - if (!error) { 270 - struct kstatfs tmp; 271 - error = vfs_statfs(&path, &tmp); 272 - if (!error) 273 - error = put_compat_statfs(buf, &tmp); 274 - path_put(&path); 275 - } 265 + struct kstatfs tmp; 266 + int error = user_statfs(pathname, &tmp); 267 + if (!error) 268 + error = put_compat_statfs(buf, &tmp); 276 269 return error; 277 270 } 278 271 279 272 asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 280 273 { 281 - struct file * file; 282 274 struct kstatfs tmp; 283 - int error; 284 - 285 - error = -EBADF; 286 - file = fget(fd); 287 - if (!file) 288 - goto out; 289 - error = vfs_statfs(&file->f_path, &tmp); 275 + int error = fd_statfs(fd, &tmp); 290 276 if (!error) 291 277 error = put_compat_statfs(buf, &tmp); 292 - fput(file); 293 - out: 294 278 return error; 295 279 } 296 280 ··· 313 329 314 330 asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 315 331 { 316 - struct path path; 317 - int error; 318 - 319 - if (sz != sizeof(*buf)) 320 - return -EINVAL; 321 - 322 - error = user_path(pathname, &path); 323 - if (!error) { 324 - struct kstatfs tmp; 325 - error = vfs_statfs(&path, &tmp); 326 - if (!error) 327 - error = put_compat_statfs64(buf, &tmp); 328 - path_put(&path); 329 - } 330 - return error; 331 - } 332 - 333 - asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 334 - { 335 - struct file * file; 336 332 struct kstatfs tmp; 337 333 int error; 338 334 339 335 if (sz != sizeof(*buf)) 340 336 return -EINVAL; 341 337 342 - error = -EBADF; 343 - file = fget(fd); 344 - if (!file) 345 - goto out; 346 - error = vfs_statfs(&file->f_path, &tmp); 338 + error = user_statfs(pathname, &tmp); 347 339 if (!error) 348 340 error = put_compat_statfs64(buf, &tmp); 349 - fput(file); 350 - out: 341 + return error; 342 + } 343 + 344 + asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 345 + { 346 + struct kstatfs tmp; 347 + int error; 348 + 349 + if (sz != sizeof(*buf)) 350 + return -EINVAL; 351 + 352 + error = fd_statfs(fd, &tmp); 353 + if (!error) 354 + error = put_compat_statfs64(buf, &tmp); 351 355 return error; 352 356 } 353 357 ··· 2284 2312 } 2285 2313 2286 2314 #endif /* CONFIG_TIMERFD */ 2315 + 2316 + #ifdef CONFIG_FHANDLE 2317 + /* 2318 + * Exactly like fs/open.c:sys_open_by_handle_at(), except that it 2319 + * doesn't set the O_LARGEFILE flag. 2320 + */ 2321 + asmlinkage long 2322 + compat_sys_open_by_handle_at(int mountdirfd, 2323 + struct file_handle __user *handle, int flags) 2324 + { 2325 + return do_handle_open(mountdirfd, handle, flags); 2326 + } 2327 + #endif
+12 -6
fs/exec.c
··· 115 115 struct file *file; 116 116 char *tmp = getname(library); 117 117 int error = PTR_ERR(tmp); 118 + static const struct open_flags uselib_flags = { 119 + .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 120 + .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, 121 + .intent = LOOKUP_OPEN 122 + }; 118 123 119 124 if (IS_ERR(tmp)) 120 125 goto out; 121 126 122 - file = do_filp_open(AT_FDCWD, tmp, 123 - O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, 124 - MAY_READ | MAY_EXEC | MAY_OPEN); 127 + file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); 125 128 putname(tmp); 126 129 error = PTR_ERR(file); 127 130 if (IS_ERR(file)) ··· 724 721 { 725 722 struct file *file; 726 723 int err; 724 + static const struct open_flags open_exec_flags = { 725 + .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 726 + .acc_mode = MAY_EXEC | MAY_OPEN, 727 + .intent = LOOKUP_OPEN 728 + }; 727 729 728 - file = do_filp_open(AT_FDCWD, name, 729 - O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, 730 - MAY_EXEC | MAY_OPEN); 730 + file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); 731 731 if (IS_ERR(file)) 732 732 goto out; 733 733
+9 -2
fs/exportfs/expfs.c
··· 320 320 struct inode * inode = dentry->d_inode; 321 321 int len = *max_len; 322 322 int type = FILEID_INO32_GEN; 323 - 324 - if (len < 2 || (connectable && len < 4)) 323 + 324 + if (connectable && (len < 4)) { 325 + *max_len = 4; 325 326 return 255; 327 + } else if (len < 2) { 328 + *max_len = 2; 329 + return 255; 330 + } 326 331 327 332 len = 2; 328 333 fid->i32.ino = inode->i_ino; ··· 374 369 /* 375 370 * Try to get any dentry for the given file handle from the filesystem. 376 371 */ 372 + if (!nop || !nop->fh_to_dentry) 373 + return ERR_PTR(-ESTALE); 377 374 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 378 375 if (!result) 379 376 result = ERR_PTR(-ESTALE);
-7
fs/ext3/namei.c
··· 2253 2253 2254 2254 dquot_initialize(dir); 2255 2255 2256 - /* 2257 - * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing 2258 - * otherwise has the potential to corrupt the orphan inode list. 2259 - */ 2260 - if (inode->i_nlink == 0) 2261 - return -ENOENT; 2262 - 2263 2256 retry: 2264 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2265 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+1
fs/ext3/super.c
··· 1936 1936 sb->s_qcop = &ext3_qctl_operations; 1937 1937 sb->dq_op = &ext3_quota_operations; 1938 1938 #endif 1939 + memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 1939 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 1941 mutex_init(&sbi->s_orphan_lock); 1941 1942 mutex_init(&sbi->s_resize_lock);
-7
fs/ext4/namei.c
··· 2304 2304 2305 2305 dquot_initialize(dir); 2306 2306 2307 - /* 2308 - * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing 2309 - * otherwise has the potential to corrupt the orphan inode list. 2310 - */ 2311 - if (inode->i_nlink == 0) 2312 - return -ENOENT; 2313 - 2314 2307 retry: 2315 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2316 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
+2
fs/ext4/super.c
··· 3415 3415 sb->s_qcop = &ext4_qctl_operations; 3416 3416 sb->dq_op = &ext4_quota_operations; 3417 3417 #endif 3418 + memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3419 + 3418 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3419 3421 mutex_init(&sbi->s_orphan_lock); 3420 3422 mutex_init(&sbi->s_resize_lock);
+3 -1
fs/fat/inode.c
··· 757 757 struct inode *inode = de->d_inode; 758 758 u32 ipos_h, ipos_m, ipos_l; 759 759 760 - if (len < 5) 760 + if (len < 5) { 761 + *lenp = 5; 761 762 return 255; /* no room */ 763 + } 762 764 763 765 ipos_h = MSDOS_I(inode)->i_pos >> 8; 764 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
+32 -5
fs/fcntl.c
··· 131 131 SYSCALL_DEFINE1(dup, unsigned int, fildes) 132 132 { 133 133 int ret = -EBADF; 134 - struct file *file = fget(fildes); 134 + struct file *file = fget_raw(fildes); 135 135 136 136 if (file) { 137 137 ret = get_unused_fd(); ··· 426 426 return err; 427 427 } 428 428 429 + static int check_fcntl_cmd(unsigned cmd) 430 + { 431 + switch (cmd) { 432 + case F_DUPFD: 433 + case F_DUPFD_CLOEXEC: 434 + case F_GETFD: 435 + case F_SETFD: 436 + case F_GETFL: 437 + return 1; 438 + } 439 + return 0; 440 + } 441 + 429 442 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 430 443 { 431 444 struct file *filp; 432 445 long err = -EBADF; 433 446 434 - filp = fget(fd); 447 + filp = fget_raw(fd); 435 448 if (!filp) 436 449 goto out; 450 + 451 + if (unlikely(filp->f_mode & FMODE_PATH)) { 452 + if (!check_fcntl_cmd(cmd)) { 453 + fput(filp); 454 + goto out; 455 + } 456 + } 437 457 438 458 err = security_file_fcntl(filp, cmd, arg); 439 459 if (err) { ··· 476 456 long err; 477 457 478 458 err = -EBADF; 479 - filp = fget(fd); 459 + filp = fget_raw(fd); 480 460 if (!filp) 481 461 goto out; 462 + 463 + if (unlikely(filp->f_mode & FMODE_PATH)) { 464 + if (!check_fcntl_cmd(cmd)) { 465 + fput(filp); 466 + goto out; 467 + } 468 + } 482 469 483 470 err = security_file_fcntl(filp, cmd, arg); 484 471 if (err) { ··· 835 808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 836 809 * is defined as O_NONBLOCK on some platforms and not on others. 837 810 */ 838 - BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 811 + BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 839 812 O_RDONLY | O_WRONLY | O_RDWR | 840 813 O_CREAT | O_EXCL | O_NOCTTY | 841 814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 842 815 __O_SYNC | O_DSYNC | FASYNC | 843 816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 844 817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 845 - __FMODE_EXEC 818 + __FMODE_EXEC | O_PATH 846 819 )); 847 820 848 821 fasync_cache = kmem_cache_create("fasync_cache",
+265
fs/fhandle.c
··· 1 + #include <linux/syscalls.h> 2 + #include <linux/slab.h> 3 + #include <linux/fs.h> 4 + #include <linux/file.h> 5 + #include <linux/mount.h> 6 + #include <linux/namei.h> 7 + #include <linux/exportfs.h> 8 + #include <linux/fs_struct.h> 9 + #include <linux/fsnotify.h> 10 + #include <asm/uaccess.h> 11 + #include "internal.h" 12 + 13 + static long do_sys_name_to_handle(struct path *path, 14 + struct file_handle __user *ufh, 15 + int __user *mnt_id) 16 + { 17 + long retval; 18 + struct file_handle f_handle; 19 + int handle_dwords, handle_bytes; 20 + struct file_handle *handle = NULL; 21 + 22 + /* 23 + * We need t make sure wether the file system 24 + * support decoding of the file handle 25 + */ 26 + if (!path->mnt->mnt_sb->s_export_op || 27 + !path->mnt->mnt_sb->s_export_op->fh_to_dentry) 28 + return -EOPNOTSUPP; 29 + 30 + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) 31 + return -EFAULT; 32 + 33 + if (f_handle.handle_bytes > MAX_HANDLE_SZ) 34 + return -EINVAL; 35 + 36 + handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, 37 + GFP_KERNEL); 38 + if (!handle) 39 + return -ENOMEM; 40 + 41 + /* convert handle size to multiple of sizeof(u32) */ 42 + handle_dwords = f_handle.handle_bytes >> 2; 43 + 44 + /* we ask for a non connected handle */ 45 + retval = exportfs_encode_fh(path->dentry, 46 + (struct fid *)handle->f_handle, 47 + &handle_dwords, 0); 48 + handle->handle_type = retval; 49 + /* convert handle size to bytes */ 50 + handle_bytes = handle_dwords * sizeof(u32); 51 + handle->handle_bytes = handle_bytes; 52 + if ((handle->handle_bytes > f_handle.handle_bytes) || 53 + (retval == 255) || (retval == -ENOSPC)) { 54 + /* As per old exportfs_encode_fh documentation 55 + * we could return ENOSPC to indicate overflow 56 + * But file system returned 255 always. So handle 57 + * both the values 58 + */ 59 + /* 60 + * set the handle size to zero so we copy only 61 + * non variable part of the file_handle 62 + */ 63 + handle_bytes = 0; 64 + retval = -EOVERFLOW; 65 + } else 66 + retval = 0; 67 + /* copy the mount id */ 68 + if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) || 69 + copy_to_user(ufh, handle, 70 + sizeof(struct file_handle) + handle_bytes)) 71 + retval = -EFAULT; 72 + kfree(handle); 73 + return retval; 74 + } 75 + 76 + /** 77 + * sys_name_to_handle_at: convert name to handle 78 + * @dfd: directory relative to which name is interpreted if not absolute 79 + * @name: name that should be converted to handle. 80 + * @handle: resulting file handle 81 + * @mnt_id: mount id of the file system containing the file 82 + * @flag: flag value to indicate whether to follow symlink or not 83 + * 84 + * @handle->handle_size indicate the space available to store the 85 + * variable part of the file handle in bytes. If there is not 86 + * enough space, the field is updated to return the minimum 87 + * value required. 88 + */ 89 + SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, 90 + struct file_handle __user *, handle, int __user *, mnt_id, 91 + int, flag) 92 + { 93 + struct path path; 94 + int lookup_flags; 95 + int err; 96 + 97 + if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 98 + return -EINVAL; 99 + 100 + lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; 101 + if (flag & AT_EMPTY_PATH) 102 + lookup_flags |= LOOKUP_EMPTY; 103 + err = user_path_at(dfd, name, lookup_flags, &path); 104 + if (!err) { 105 + err = do_sys_name_to_handle(&path, handle, mnt_id); 106 + path_put(&path); 107 + } 108 + return err; 109 + } 110 + 111 + static struct vfsmount *get_vfsmount_from_fd(int fd) 112 + { 113 + struct path path; 114 + 115 + if (fd == AT_FDCWD) { 116 + struct fs_struct *fs = current->fs; 117 + spin_lock(&fs->lock); 118 + path = fs->pwd; 119 + mntget(path.mnt); 120 + spin_unlock(&fs->lock); 121 + } else { 122 + int fput_needed; 123 + struct file *file = fget_light(fd, &fput_needed); 124 + if (!file) 125 + return ERR_PTR(-EBADF); 126 + path = file->f_path; 127 + mntget(path.mnt); 128 + fput_light(file, fput_needed); 129 + } 130 + return path.mnt; 131 + } 132 + 133 + static int vfs_dentry_acceptable(void *context, struct dentry *dentry) 134 + { 135 + return 1; 136 + } 137 + 138 + static int do_handle_to_path(int mountdirfd, struct file_handle *handle, 139 + struct path *path) 140 + { 141 + int retval = 0; 142 + int handle_dwords; 143 + 144 + path->mnt = get_vfsmount_from_fd(mountdirfd); 145 + if (IS_ERR(path->mnt)) { 146 + retval = PTR_ERR(path->mnt); 147 + goto out_err; 148 + } 149 + /* change the handle size to multiple of sizeof(u32) */ 150 + handle_dwords = handle->handle_bytes >> 2; 151 + path->dentry = exportfs_decode_fh(path->mnt, 152 + (struct fid *)handle->f_handle, 153 + handle_dwords, handle->handle_type, 154 + vfs_dentry_acceptable, NULL); 155 + if (IS_ERR(path->dentry)) { 156 + retval = PTR_ERR(path->dentry); 157 + goto out_mnt; 158 + } 159 + return 0; 160 + out_mnt: 161 + mntput(path->mnt); 162 + out_err: 163 + return retval; 164 + } 165 + 166 + static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, 167 + struct path *path) 168 + { 169 + int retval = 0; 170 + struct file_handle f_handle; 171 + struct file_handle *handle = NULL; 172 + 173 + /* 174 + * With handle we don't look at the execute bit on the 175 + * the directory. Ideally we would like CAP_DAC_SEARCH. 176 + * But we don't have that 177 + */ 178 + if (!capable(CAP_DAC_READ_SEARCH)) { 179 + retval = -EPERM; 180 + goto out_err; 181 + } 182 + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { 183 + retval = -EFAULT; 184 + goto out_err; 185 + } 186 + if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || 187 + (f_handle.handle_bytes == 0)) { 188 + retval = -EINVAL; 189 + goto out_err; 190 + } 191 + handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, 192 + GFP_KERNEL); 193 + if (!handle) { 194 + retval = -ENOMEM; 195 + goto out_err; 196 + } 197 + /* copy the full handle */ 198 + if (copy_from_user(handle, ufh, 199 + sizeof(struct file_handle) + 200 + f_handle.handle_bytes)) { 201 + retval = -EFAULT; 202 + goto out_handle; 203 + } 204 + 205 + retval = do_handle_to_path(mountdirfd, handle, path); 206 + 207 + out_handle: 208 + kfree(handle); 209 + out_err: 210 + return retval; 211 + } 212 + 213 + long do_handle_open(int mountdirfd, 214 + struct file_handle __user *ufh, int open_flag) 215 + { 216 + long retval = 0; 217 + struct path path; 218 + struct file *file; 219 + int fd; 220 + 221 + retval = handle_to_path(mountdirfd, ufh, &path); 222 + if (retval) 223 + return retval; 224 + 225 + fd = get_unused_fd_flags(open_flag); 226 + if (fd < 0) { 227 + path_put(&path); 228 + return fd; 229 + } 230 + file = file_open_root(path.dentry, path.mnt, "", open_flag); 231 + if (IS_ERR(file)) { 232 + put_unused_fd(fd); 233 + retval = PTR_ERR(file); 234 + } else { 235 + retval = fd; 236 + fsnotify_open(file); 237 + fd_install(fd, file); 238 + } 239 + path_put(&path); 240 + return retval; 241 + } 242 + 243 + /** 244 + * sys_open_by_handle_at: Open the file handle 245 + * @mountdirfd: directory file descriptor 246 + * @handle: file handle to be opened 247 + * @flag: open flags. 248 + * 249 + * @mountdirfd indicate the directory file descriptor 250 + * of the mount point. file handle is decoded relative 251 + * to the vfsmount pointed by the @mountdirfd. @flags 252 + * value is same as the open(2) flags. 253 + */ 254 + SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, 255 + struct file_handle __user *, handle, 256 + int, flags) 257 + { 258 + long ret; 259 + 260 + if (force_o_largefile()) 261 + flags |= O_LARGEFILE; 262 + 263 + ret = do_handle_open(mountdirfd, handle, flags); 264 + return ret; 265 + }
+50 -5
fs/file_table.c
··· 276 276 rcu_read_lock(); 277 277 file = fcheck_files(files, fd); 278 278 if (file) { 279 - if (!atomic_long_inc_not_zero(&file->f_count)) { 280 - /* File object ref couldn't be taken */ 281 - rcu_read_unlock(); 282 - return NULL; 283 - } 279 + /* File object ref couldn't be taken */ 280 + if (file->f_mode & FMODE_PATH || 281 + !atomic_long_inc_not_zero(&file->f_count)) 282 + file = NULL; 284 283 } 285 284 rcu_read_unlock(); 286 285 ··· 287 288 } 288 289 289 290 EXPORT_SYMBOL(fget); 291 + 292 + struct file *fget_raw(unsigned int fd) 293 + { 294 + struct file *file; 295 + struct files_struct *files = current->files; 296 + 297 + rcu_read_lock(); 298 + file = fcheck_files(files, fd); 299 + if (file) { 300 + /* File object ref couldn't be taken */ 301 + if (!atomic_long_inc_not_zero(&file->f_count)) 302 + file = NULL; 303 + } 304 + rcu_read_unlock(); 305 + 306 + return file; 307 + } 308 + 309 + EXPORT_SYMBOL(fget_raw); 290 310 291 311 /* 292 312 * Lightweight file lookup - no refcnt increment if fd table isn't shared. ··· 324 306 * corresponding fput_light. 325 307 */ 326 308 struct file *fget_light(unsigned int fd, int *fput_needed) 309 + { 310 + struct file *file; 311 + struct files_struct *files = current->files; 312 + 313 + *fput_needed = 0; 314 + if (atomic_read(&files->count) == 1) { 315 + file = fcheck_files(files, fd); 316 + if (file && (file->f_mode & FMODE_PATH)) 317 + file = NULL; 318 + } else { 319 + rcu_read_lock(); 320 + file = fcheck_files(files, fd); 321 + if (file) { 322 + if (!(file->f_mode & FMODE_PATH) && 323 + atomic_long_inc_not_zero(&file->f_count)) 324 + *fput_needed = 1; 325 + else 326 + /* Didn't get the reference, someone's freed */ 327 + file = NULL; 328 + } 329 + rcu_read_unlock(); 330 + } 331 + 332 + return file; 333 + } 334 + 335 + struct file *fget_raw_light(unsigned int fd, int *fput_needed) 327 336 { 328 337 struct file *file; 329 338 struct files_struct *files = current->files;
+3 -1
fs/fuse/inode.c
··· 637 637 u64 nodeid; 638 638 u32 generation; 639 639 640 - if (*max_len < len) 640 + if (*max_len < len) { 641 + *max_len = len; 641 642 return 255; 643 + } 642 644 643 645 nodeid = get_fuse_inode(inode)->nodeid; 644 646 generation = inode->i_generation;
+6 -2
fs/gfs2/export.c
··· 36 36 struct super_block *sb = inode->i_sb; 37 37 struct gfs2_inode *ip = GFS2_I(inode); 38 38 39 - if (*len < GFS2_SMALL_FH_SIZE || 40 - (connectable && *len < GFS2_LARGE_FH_SIZE)) 39 + if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { 40 + *len = GFS2_LARGE_FH_SIZE; 41 41 return 255; 42 + } else if (*len < GFS2_SMALL_FH_SIZE) { 43 + *len = GFS2_SMALL_FH_SIZE; 44 + return 255; 45 + } 42 46 43 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 44 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+13
fs/internal.h
··· 106 106 struct nameidata; 107 107 extern struct file *nameidata_to_filp(struct nameidata *); 108 108 extern void release_open_intent(struct nameidata *); 109 + struct open_flags { 110 + int open_flag; 111 + int mode; 112 + int acc_mode; 113 + int intent; 114 + }; 115 + extern struct file *do_filp_open(int dfd, const char *pathname, 116 + const struct open_flags *op, int lookup_flags); 117 + extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, 118 + const char *, const struct open_flags *, int lookup_flags); 119 + 120 + extern long do_handle_open(int mountdirfd, 121 + struct file_handle __user *ufh, int open_flag); 109 122 110 123 /* 111 124 * inode.c
+6 -2
fs/isofs/export.c
··· 124 124 * offset of the inode and the upper 16 bits of fh32[1] to 125 125 * hold the offset of the parent. 126 126 */ 127 - 128 - if (len < 3 || (connectable && len < 5)) 127 + if (connectable && (len < 5)) { 128 + *max_len = 5; 129 129 return 255; 130 + } else if (len < 3) { 131 + *max_len = 3; 132 + return 255; 133 + } 130 134 131 135 len = 3; 132 136 fh32[0] = ei->i_iget5_block;
-3
fs/jfs/namei.c
··· 809 809 if (ip->i_nlink == JFS_LINK_MAX) 810 810 return -EMLINK; 811 811 812 - if (ip->i_nlink == 0) 813 - return -ENOENT; 814 - 815 812 dquot_initialize(dir); 816 813 817 814 tid = txBegin(ip->i_sb, 0);
+671 -855
fs/namei.c
··· 136 136 return retval; 137 137 } 138 138 139 - char * getname(const char __user * filename) 139 + static char *getname_flags(const char __user * filename, int flags) 140 140 { 141 141 char *tmp, *result; 142 142 ··· 147 147 148 148 result = tmp; 149 149 if (retval < 0) { 150 - __putname(tmp); 151 - result = ERR_PTR(retval); 150 + if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { 151 + __putname(tmp); 152 + result = ERR_PTR(retval); 153 + } 152 154 } 153 155 } 154 156 audit_getname(result); 155 157 return result; 158 + } 159 + 160 + char *getname(const char __user * filename) 161 + { 162 + return getname_flags(filename, 0); 156 163 } 157 164 158 165 #ifdef CONFIG_AUDITSYSCALL ··· 408 401 { 409 402 struct fs_struct *fs = current->fs; 410 403 struct dentry *dentry = nd->path.dentry; 404 + int want_root = 0; 411 405 412 406 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 - if (nd->root.mnt) { 407 + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 408 + want_root = 1; 414 409 spin_lock(&fs->lock); 415 410 if (nd->root.mnt != fs->root.mnt || 416 411 nd->root.dentry != fs->root.dentry) ··· 423 414 goto err; 424 415 BUG_ON(nd->inode != dentry->d_inode); 425 416 spin_unlock(&dentry->d_lock); 426 - if (nd->root.mnt) { 417 + if (want_root) { 427 418 path_get(&nd->root); 428 419 spin_unlock(&fs->lock); 429 420 } ··· 436 427 err: 437 428 spin_unlock(&dentry->d_lock); 438 429 err_root: 439 - if (nd->root.mnt) 430 + if (want_root) 440 431 spin_unlock(&fs->lock); 441 432 return -ECHILD; 442 433 } ··· 463 454 { 464 455 struct fs_struct *fs = current->fs; 465 456 struct dentry *parent = nd->path.dentry; 457 + int want_root = 0; 466 458 467 459 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 - if (nd->root.mnt) { 460 + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 461 + want_root = 1; 469 462 spin_lock(&fs->lock); 470 463 if (nd->root.mnt != fs->root.mnt || 471 464 nd->root.dentry != fs->root.dentry) ··· 487 476 parent->d_count++; 488 477 spin_unlock(&dentry->d_lock); 489 478 spin_unlock(&parent->d_lock); 490 - if (nd->root.mnt) { 479 + if (want_root) { 491 480 path_get(&nd->root); 492 481 spin_unlock(&fs->lock); 493 482 } ··· 501 490 spin_unlock(&dentry->d_lock); 502 491 spin_unlock(&parent->d_lock); 503 492 err_root: 504 - if (nd->root.mnt) 493 + if (want_root) 505 494 spin_unlock(&fs->lock); 506 495 return -ECHILD; 507 496 } ··· 509 498 /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 510 499 static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 511 500 { 512 - if (nd->flags & LOOKUP_RCU) 513 - return nameidata_dentry_drop_rcu(nd, dentry); 501 + if (nd->flags & LOOKUP_RCU) { 502 + if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { 503 + nd->flags &= ~LOOKUP_RCU; 504 + if (!(nd->flags & LOOKUP_ROOT)) 505 + nd->root.mnt = NULL; 506 + rcu_read_unlock(); 507 + br_read_unlock(vfsmount_lock); 508 + return -ECHILD; 509 + } 510 + } 514 511 return 0; 515 512 } 516 513 ··· 537 518 538 519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 539 520 nd->flags &= ~LOOKUP_RCU; 540 - nd->root.mnt = NULL; 521 + if (!(nd->flags & LOOKUP_ROOT)) 522 + nd->root.mnt = NULL; 541 523 spin_lock(&dentry->d_lock); 542 524 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 525 goto err_unlock; ··· 557 537 rcu_read_unlock(); 558 538 br_read_unlock(vfsmount_lock); 559 539 return -ECHILD; 560 - } 561 - 562 - /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 563 - static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) 564 - { 565 - if (likely(nd->flags & LOOKUP_RCU)) 566 - return nameidata_drop_rcu_last(nd); 567 - return 0; 568 540 } 569 541 570 542 /** ··· 602 590 return dentry; 603 591 } 604 592 605 - static inline struct dentry * 606 - do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) 607 - { 608 - int status = d_revalidate(dentry, nd); 609 - if (likely(status > 0)) 610 - return dentry; 611 - if (status == -ECHILD) { 612 - if (nameidata_dentry_drop_rcu(nd, dentry)) 613 - return ERR_PTR(-ECHILD); 614 - return do_revalidate(dentry, nd); 615 - } 616 - if (status < 0) 617 - return ERR_PTR(status); 618 - /* Don't d_invalidate in rcu-walk mode */ 619 - if (nameidata_dentry_drop_rcu(nd, dentry)) 620 - return ERR_PTR(-ECHILD); 621 - if (!d_invalidate(dentry)) { 622 - dput(dentry); 623 - dentry = NULL; 624 - } 625 - return dentry; 626 - } 627 - 628 - static inline int need_reval_dot(struct dentry *dentry) 629 - { 630 - if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 631 - return 0; 632 - 633 - if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) 634 - return 0; 635 - 636 - return 1; 637 - } 638 - 639 593 /* 640 - * force_reval_path - force revalidation of a dentry 594 + * handle_reval_path - force revalidation of a dentry 641 595 * 642 596 * In some situations the path walking code will trust dentries without 643 597 * revalidating them. This causes problems for filesystems that depend on ··· 617 639 * invalidate the dentry. It's up to the caller to handle putting references 618 640 * to the path if necessary. 619 641 */ 620 - static int 621 - force_reval_path(struct path *path, struct nameidata *nd) 642 + static inline int handle_reval_path(struct nameidata *nd) 622 643 { 644 + struct dentry *dentry = nd->path.dentry; 623 645 int status; 624 - struct dentry *dentry = path->dentry; 625 646 626 - /* 627 - * only check on filesystems where it's possible for the dentry to 628 - * become stale. 629 - */ 630 - if (!need_reval_dot(dentry)) 647 + if (likely(!(nd->flags & LOOKUP_JUMPED))) 631 648 return 0; 632 649 650 + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 651 + return 0; 652 + 653 + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) 654 + return 0; 655 + 656 + /* Note: we do not d_invalidate() */ 633 657 status = d_revalidate(dentry, nd); 634 658 if (status > 0) 635 659 return 0; 636 660 637 - if (!status) { 638 - d_invalidate(dentry); 661 + if (!status) 639 662 status = -ESTALE; 640 - } 663 + 641 664 return status; 642 665 } 643 666 ··· 707 728 path_put(&nd->path); 708 729 nd->path = nd->root; 709 730 path_get(&nd->root); 731 + nd->flags |= LOOKUP_JUMPED; 710 732 } 711 733 nd->inode = nd->path.dentry->d_inode; 712 734 ··· 737 757 nd->path.dentry = path->dentry; 738 758 } 739 759 760 + static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) 761 + { 762 + struct inode *inode = link->dentry->d_inode; 763 + if (!IS_ERR(cookie) && inode->i_op->put_link) 764 + inode->i_op->put_link(link->dentry, nd, cookie); 765 + path_put(link); 766 + } 767 + 740 768 static __always_inline int 741 - __do_follow_link(const struct path *link, struct nameidata *nd, void **p) 769 + follow_link(struct path *link, struct nameidata *nd, void **p) 742 770 { 743 771 int error; 744 772 struct dentry *dentry = link->dentry; 745 773 746 774 BUG_ON(nd->flags & LOOKUP_RCU); 747 775 776 + if (unlikely(current->total_link_count >= 40)) { 777 + *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ 778 + path_put_conditional(link, nd); 779 + path_put(&nd->path); 780 + return -ELOOP; 781 + } 782 + cond_resched(); 783 + current->total_link_count++; 784 + 748 785 touch_atime(link->mnt, dentry); 749 786 nd_set_link(nd, NULL); 750 787 751 788 if (link->mnt == nd->path.mnt) 752 789 mntget(link->mnt); 790 + 791 + error = security_inode_follow_link(link->dentry, nd); 792 + if (error) { 793 + *p = ERR_PTR(error); /* no ->put_link(), please */ 794 + path_put(&nd->path); 795 + return error; 796 + } 753 797 754 798 nd->last_type = LAST_BIND; 755 799 *p = dentry->d_inode->i_op->follow_link(dentry, nd); ··· 784 780 if (s) 785 781 error = __vfs_follow_link(nd, s); 786 782 else if (nd->last_type == LAST_BIND) { 787 - error = force_reval_path(&nd->path, nd); 788 - if (error) 783 + nd->flags |= LOOKUP_JUMPED; 784 + nd->inode = nd->path.dentry->d_inode; 785 + if (nd->inode->i_op->follow_link) { 786 + /* stepped on a _really_ weird one */ 789 787 path_put(&nd->path); 788 + error = -ELOOP; 789 + } 790 790 } 791 791 } 792 792 return error; 793 - } 794 - 795 - /* 796 - * This limits recursive symlink follows to 8, while 797 - * limiting consecutive symlinks to 40. 798 - * 799 - * Without that kind of total limit, nasty chains of consecutive 800 - * symlinks can cause almost arbitrarily long lookups. 801 - */ 802 - static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd) 803 - { 804 - void *cookie; 805 - int err = -ELOOP; 806 - 807 - /* We drop rcu-walk here */ 808 - if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) 809 - return -ECHILD; 810 - BUG_ON(inode != path->dentry->d_inode); 811 - 812 - if (current->link_count >= MAX_NESTED_LINKS) 813 - goto loop; 814 - if (current->total_link_count >= 40) 815 - goto loop; 816 - BUG_ON(nd->depth >= MAX_NESTED_LINKS); 817 - cond_resched(); 818 - err = security_inode_follow_link(path->dentry, nd); 819 - if (err) 820 - goto loop; 821 - current->link_count++; 822 - current->total_link_count++; 823 - nd->depth++; 824 - err = __do_follow_link(path, nd, &cookie); 825 - if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link) 826 - path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie); 827 - path_put(path); 828 - current->link_count--; 829 - nd->depth--; 830 - return err; 831 - loop: 832 - path_put_conditional(path, nd); 833 - path_put(&nd->path); 834 - return err; 835 793 } 836 794 837 795 static int follow_up_rcu(struct path *path) ··· 1034 1068 1035 1069 seq = read_seqcount_begin(&parent->d_seq); 1036 1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1037 - return -ECHILD; 1071 + goto failed; 1038 1072 inode = parent->d_inode; 1039 1073 nd->path.dentry = parent; 1040 1074 nd->seq = seq; ··· 1047 1081 } 1048 1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1049 1083 nd->inode = inode; 1050 - 1051 1084 return 0; 1085 + 1086 + failed: 1087 + nd->flags &= ~LOOKUP_RCU; 1088 + if (!(nd->flags & LOOKUP_ROOT)) 1089 + nd->root.mnt = NULL; 1090 + rcu_read_unlock(); 1091 + br_read_unlock(vfsmount_lock); 1092 + return -ECHILD; 1052 1093 } 1053 1094 1054 1095 /* ··· 1189 1216 { 1190 1217 struct vfsmount *mnt = nd->path.mnt; 1191 1218 struct dentry *dentry, *parent = nd->path.dentry; 1192 - struct inode *dir; 1219 + int need_reval = 1; 1220 + int status = 1; 1193 1221 int err; 1194 - 1195 - /* 1196 - * See if the low-level filesystem might want 1197 - * to use its own hash.. 1198 - */ 1199 - if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { 1200 - err = parent->d_op->d_hash(parent, nd->inode, name); 1201 - if (err < 0) 1202 - return err; 1203 - } 1204 1222 1205 1223 /* 1206 1224 * Rename seqlock is not required here because in the off chance ··· 1200 1236 */ 1201 1237 if (nd->flags & LOOKUP_RCU) { 1202 1238 unsigned seq; 1203 - 1204 1239 *inode = nd->inode; 1205 1240 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1206 - if (!dentry) { 1207 - if (nameidata_drop_rcu(nd)) 1208 - return -ECHILD; 1209 - goto need_lookup; 1210 - } 1241 + if (!dentry) 1242 + goto unlazy; 1243 + 1211 1244 /* Memory barrier in read_seqcount_begin of child is enough */ 1212 1245 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1213 1246 return -ECHILD; 1214 - 1215 1247 nd->seq = seq; 1248 + 1216 1249 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1217 - dentry = do_revalidate_rcu(dentry, nd); 1218 - if (!dentry) 1219 - goto need_lookup; 1220 - if (IS_ERR(dentry)) 1221 - goto fail; 1222 - if (!(nd->flags & LOOKUP_RCU)) 1223 - goto done; 1250 + status = d_revalidate(dentry, nd); 1251 + if (unlikely(status <= 0)) { 1252 + if (status != -ECHILD) 1253 + need_reval = 0; 1254 + goto unlazy; 1255 + } 1224 1256 } 1225 1257 path->mnt = mnt; 1226 1258 path->dentry = dentry; 1227 1259 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1228 1260 return 0; 1229 - if (nameidata_drop_rcu(nd)) 1230 - return -ECHILD; 1231 - /* fallthru */ 1261 + unlazy: 1262 + if (dentry) { 1263 + if (nameidata_dentry_drop_rcu(nd, dentry)) 1264 + return -ECHILD; 1265 + } else { 1266 + if (nameidata_drop_rcu(nd)) 1267 + return -ECHILD; 1268 + } 1269 + } else { 1270 + dentry = __d_lookup(parent, name); 1232 1271 } 1233 - dentry = __d_lookup(parent, name); 1234 - if (!dentry) 1235 - goto need_lookup; 1236 - found: 1237 - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1238 - dentry = do_revalidate(dentry, nd); 1239 - if (!dentry) 1240 - goto need_lookup; 1241 - if (IS_ERR(dentry)) 1242 - goto fail; 1272 + 1273 + retry: 1274 + if (unlikely(!dentry)) { 1275 + struct inode *dir = parent->d_inode; 1276 + BUG_ON(nd->inode != dir); 1277 + 1278 + mutex_lock(&dir->i_mutex); 1279 + dentry = d_lookup(parent, name); 1280 + if (likely(!dentry)) { 1281 + dentry = d_alloc_and_lookup(parent, name, nd); 1282 + if (IS_ERR(dentry)) { 1283 + mutex_unlock(&dir->i_mutex); 1284 + return PTR_ERR(dentry); 1285 + } 1286 + /* known good */ 1287 + need_reval = 0; 1288 + status = 1; 1289 + } 1290 + mutex_unlock(&dir->i_mutex); 1243 1291 } 1244 - done: 1292 + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) 1293 + status = d_revalidate(dentry, nd); 1294 + if (unlikely(status <= 0)) { 1295 + if (status < 0) { 1296 + dput(dentry); 1297 + return status; 1298 + } 1299 + if (!d_invalidate(dentry)) { 1300 + dput(dentry); 1301 + dentry = NULL; 1302 + need_reval = 1; 1303 + goto retry; 1304 + } 1305 + } 1306 + 1245 1307 path->mnt = mnt; 1246 1308 path->dentry = dentry; 1247 1309 err = follow_managed(path, nd->flags); ··· 1277 1287 } 1278 1288 *inode = path->dentry->d_inode; 1279 1289 return 0; 1290 + } 1280 1291 1281 - need_lookup: 1282 - dir = parent->d_inode; 1283 - BUG_ON(nd->inode != dir); 1284 - 1285 - mutex_lock(&dir->i_mutex); 1286 - /* 1287 - * First re-do the cached lookup just in case it was created 1288 - * while we waited for the directory semaphore, or the first 1289 - * lookup failed due to an unrelated rename. 1290 - * 1291 - * This could use version numbering or similar to avoid unnecessary 1292 - * cache lookups, but then we'd have to do the first lookup in the 1293 - * non-racy way. However in the common case here, everything should 1294 - * be hot in cache, so would it be a big win? 1295 - */ 1296 - dentry = d_lookup(parent, name); 1297 - if (likely(!dentry)) { 1298 - dentry = d_alloc_and_lookup(parent, name, nd); 1299 - mutex_unlock(&dir->i_mutex); 1300 - if (IS_ERR(dentry)) 1301 - goto fail; 1302 - goto done; 1292 + static inline int may_lookup(struct nameidata *nd) 1293 + { 1294 + if (nd->flags & LOOKUP_RCU) { 1295 + int err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1296 + if (err != -ECHILD) 1297 + return err; 1298 + if (nameidata_drop_rcu(nd)) 1299 + return -ECHILD; 1303 1300 } 1304 - /* 1305 - * Uhhuh! Nasty case: the cache was re-populated while 1306 - * we waited on the semaphore. Need to revalidate. 1307 - */ 1308 - mutex_unlock(&dir->i_mutex); 1309 - goto found; 1301 + return exec_permission(nd->inode, 0); 1302 + } 1310 1303 1311 - fail: 1312 - return PTR_ERR(dentry); 1304 + static inline int handle_dots(struct nameidata *nd, int type) 1305 + { 1306 + if (type == LAST_DOTDOT) { 1307 + if (nd->flags & LOOKUP_RCU) { 1308 + if (follow_dotdot_rcu(nd)) 1309 + return -ECHILD; 1310 + } else 1311 + follow_dotdot(nd); 1312 + } 1313 + return 0; 1314 + } 1315 + 1316 + static void terminate_walk(struct nameidata *nd) 1317 + { 1318 + if (!(nd->flags & LOOKUP_RCU)) { 1319 + path_put(&nd->path); 1320 + } else { 1321 + nd->flags &= ~LOOKUP_RCU; 1322 + if (!(nd->flags & LOOKUP_ROOT)) 1323 + nd->root.mnt = NULL; 1324 + rcu_read_unlock(); 1325 + br_read_unlock(vfsmount_lock); 1326 + } 1327 + } 1328 + 1329 + static inline int walk_component(struct nameidata *nd, struct path *path, 1330 + struct qstr *name, int type, int follow) 1331 + { 1332 + struct inode *inode; 1333 + int err; 1334 + /* 1335 + * "." and ".." are special - ".." especially so because it has 1336 + * to be able to know about the current root directory and 1337 + * parent relationships. 1338 + */ 1339 + if (unlikely(type != LAST_NORM)) 1340 + return handle_dots(nd, type); 1341 + err = do_lookup(nd, name, path, &inode); 1342 + if (unlikely(err)) { 1343 + terminate_walk(nd); 1344 + return err; 1345 + } 1346 + if (!inode) { 1347 + path_to_nameidata(path, nd); 1348 + terminate_walk(nd); 1349 + return -ENOENT; 1350 + } 1351 + if (unlikely(inode->i_op->follow_link) && follow) { 1352 + if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) 1353 + return -ECHILD; 1354 + BUG_ON(inode != path->dentry->d_inode); 1355 + return 1; 1356 + } 1357 + path_to_nameidata(path, nd); 1358 + nd->inode = inode; 1359 + return 0; 1360 + } 1361 + 1362 + /* 1363 + * This limits recursive symlink follows to 8, while 1364 + * limiting consecutive symlinks to 40. 1365 + * 1366 + * Without that kind of total limit, nasty chains of consecutive 1367 + * symlinks can cause almost arbitrarily long lookups. 1368 + */ 1369 + static inline int nested_symlink(struct path *path, struct nameidata *nd) 1370 + { 1371 + int res; 1372 + 1373 + BUG_ON(nd->depth >= MAX_NESTED_LINKS); 1374 + if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { 1375 + path_put_conditional(path, nd); 1376 + path_put(&nd->path); 1377 + return -ELOOP; 1378 + } 1379 + 1380 + nd->depth++; 1381 + current->link_count++; 1382 + 1383 + do { 1384 + struct path link = *path; 1385 + void *cookie; 1386 + 1387 + res = follow_link(&link, nd, &cookie); 1388 + if (!res) 1389 + res = walk_component(nd, path, &nd->last, 1390 + nd->last_type, LOOKUP_FOLLOW); 1391 + put_link(nd, &link, cookie); 1392 + } while (res > 0); 1393 + 1394 + current->link_count--; 1395 + nd->depth--; 1396 + return res; 1313 1397 } 1314 1398 1315 1399 /* ··· 1403 1339 while (*name=='/') 1404 1340 name++; 1405 1341 if (!*name) 1406 - goto return_reval; 1407 - 1408 - if (nd->depth) 1409 - lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1342 + return 0; 1410 1343 1411 1344 /* At this point we know we have a real path component. */ 1412 1345 for(;;) { 1413 - struct inode *inode; 1414 1346 unsigned long hash; 1415 1347 struct qstr this; 1416 1348 unsigned int c; 1349 + int type; 1417 1350 1418 1351 nd->flags |= LOOKUP_CONTINUE; 1419 - if (nd->flags & LOOKUP_RCU) { 1420 - err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1421 - if (err == -ECHILD) { 1422 - if (nameidata_drop_rcu(nd)) 1423 - return -ECHILD; 1424 - goto exec_again; 1425 - } 1426 - } else { 1427 - exec_again: 1428 - err = exec_permission(nd->inode, 0); 1429 - } 1352 + 1353 + err = may_lookup(nd); 1430 1354 if (err) 1431 1355 break; 1432 1356 ··· 1430 1378 this.len = name - (const char *) this.name; 1431 1379 this.hash = end_name_hash(hash); 1432 1380 1381 + type = LAST_NORM; 1382 + if (this.name[0] == '.') switch (this.len) { 1383 + case 2: 1384 + if (this.name[1] == '.') { 1385 + type = LAST_DOTDOT; 1386 + nd->flags |= LOOKUP_JUMPED; 1387 + } 1388 + break; 1389 + case 1: 1390 + type = LAST_DOT; 1391 + } 1392 + if (likely(type == LAST_NORM)) { 1393 + struct dentry *parent = nd->path.dentry; 1394 + nd->flags &= ~LOOKUP_JUMPED; 1395 + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { 1396 + err = parent->d_op->d_hash(parent, nd->inode, 1397 + &this); 1398 + if (err < 0) 1399 + break; 1400 + } 1401 + } 1402 + 1433 1403 /* remove trailing slashes? */ 1434 1404 if (!c) 1435 1405 goto last_component; 1436 1406 while (*++name == '/'); 1437 1407 if (!*name) 1438 - goto last_with_slashes; 1408 + goto last_component; 1439 1409 1440 - /* 1441 - * "." and ".." are special - ".." especially so because it has 1442 - * to be able to know about the current root directory and 1443 - * parent relationships. 1444 - */ 1445 - if (this.name[0] == '.') switch (this.len) { 1446 - default: 1447 - break; 1448 - case 2: 1449 - if (this.name[1] != '.') 1450 - break; 1451 - if (nd->flags & LOOKUP_RCU) { 1452 - if (follow_dotdot_rcu(nd)) 1453 - return -ECHILD; 1454 - } else 1455 - follow_dotdot(nd); 1456 - /* fallthrough */ 1457 - case 1: 1458 - continue; 1459 - } 1460 - /* This does the actual lookups.. */ 1461 - err = do_lookup(nd, &this, &next, &inode); 1462 - if (err) 1463 - break; 1464 - err = -ENOENT; 1465 - if (!inode) 1466 - goto out_dput; 1410 + err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); 1411 + if (err < 0) 1412 + return err; 1467 1413 1468 - if (inode->i_op->follow_link) { 1469 - err = do_follow_link(inode, &next, nd); 1414 + if (err) { 1415 + err = nested_symlink(&next, nd); 1470 1416 if (err) 1471 - goto return_err; 1472 - nd->inode = nd->path.dentry->d_inode; 1473 - err = -ENOENT; 1474 - if (!nd->inode) 1475 - break; 1476 - } else { 1477 - path_to_nameidata(&next, nd); 1478 - nd->inode = inode; 1417 + return err; 1479 1418 } 1480 1419 err = -ENOTDIR; 1481 1420 if (!nd->inode->i_op->lookup) ··· 1474 1431 continue; 1475 1432 /* here ends the main loop */ 1476 1433 1477 - last_with_slashes: 1478 - lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 1479 1434 last_component: 1480 1435 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1481 1436 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1482 - if (lookup_flags & LOOKUP_PARENT) 1483 - goto lookup_parent; 1484 - if (this.name[0] == '.') switch (this.len) { 1485 - default: 1486 - break; 1487 - case 2: 1488 - if (this.name[1] != '.') 1489 - break; 1490 - if (nd->flags & LOOKUP_RCU) { 1491 - if (follow_dotdot_rcu(nd)) 1492 - return -ECHILD; 1493 - } else 1494 - follow_dotdot(nd); 1495 - /* fallthrough */ 1496 - case 1: 1497 - goto return_reval; 1498 - } 1499 - err = do_lookup(nd, &this, &next, &inode); 1500 - if (err) 1501 - break; 1502 - if (inode && unlikely(inode->i_op->follow_link) && 1503 - (lookup_flags & LOOKUP_FOLLOW)) { 1504 - err = do_follow_link(inode, &next, nd); 1505 - if (err) 1506 - goto return_err; 1507 - nd->inode = nd->path.dentry->d_inode; 1508 - } else { 1509 - path_to_nameidata(&next, nd); 1510 - nd->inode = inode; 1511 - } 1512 - err = -ENOENT; 1513 - if (!nd->inode) 1514 - break; 1515 - if (lookup_flags & LOOKUP_DIRECTORY) { 1516 - err = -ENOTDIR; 1517 - if (!nd->inode->i_op->lookup) 1518 - break; 1519 - } 1520 - goto return_base; 1521 - lookup_parent: 1522 1437 nd->last = this; 1523 - nd->last_type = LAST_NORM; 1524 - if (this.name[0] != '.') 1525 - goto return_base; 1526 - if (this.len == 1) 1527 - nd->last_type = LAST_DOT; 1528 - else if (this.len == 2 && this.name[1] == '.') 1529 - nd->last_type = LAST_DOTDOT; 1530 - else 1531 - goto return_base; 1532 - return_reval: 1533 - /* 1534 - * We bypassed the ordinary revalidation routines. 1535 - * We may need to check the cached dentry for staleness. 1536 - */ 1537 - if (need_reval_dot(nd->path.dentry)) { 1538 - if (nameidata_drop_rcu_last_maybe(nd)) 1539 - return -ECHILD; 1540 - /* Note: we do not d_invalidate() */ 1541 - err = d_revalidate(nd->path.dentry, nd); 1542 - if (!err) 1543 - err = -ESTALE; 1544 - if (err < 0) 1545 - break; 1546 - return 0; 1547 - } 1548 - return_base: 1549 - if (nameidata_drop_rcu_last_maybe(nd)) 1550 - return -ECHILD; 1438 + nd->last_type = type; 1551 1439 return 0; 1552 - out_dput: 1553 - if (!(nd->flags & LOOKUP_RCU)) 1554 - path_put_conditional(&next, nd); 1555 - break; 1556 1440 } 1557 - if (!(nd->flags & LOOKUP_RCU)) 1558 - path_put(&nd->path); 1559 - return_err: 1441 + terminate_walk(nd); 1560 1442 return err; 1561 1443 } 1562 1444 1563 - static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1564 - { 1565 - current->total_link_count = 0; 1566 - 1567 - return link_path_walk(name, nd); 1568 - } 1569 - 1570 - static inline int path_walk_simple(const char *name, struct nameidata *nd) 1571 - { 1572 - current->total_link_count = 0; 1573 - 1574 - return link_path_walk(name, nd); 1575 - } 1576 - 1577 - static int path_walk(const char *name, struct nameidata *nd) 1578 - { 1579 - struct path save = nd->path; 1580 - int result; 1581 - 1582 - current->total_link_count = 0; 1583 - 1584 - /* make sure the stuff we saved doesn't go away */ 1585 - path_get(&save); 1586 - 1587 - result = link_path_walk(name, nd); 1588 - if (result == -ESTALE) { 1589 - /* nd->path had been dropped */ 1590 - current->total_link_count = 0; 1591 - nd->path = save; 1592 - nd->inode = save.dentry->d_inode; 1593 - path_get(&nd->path); 1594 - nd->flags |= LOOKUP_REVAL; 1595 - result = link_path_walk(name, nd); 1596 - } 1597 - 1598 - path_put(&save); 1599 - 1600 - return result; 1601 - } 1602 - 1603 - static void path_finish_rcu(struct nameidata *nd) 1604 - { 1605 - if (nd->flags & LOOKUP_RCU) { 1606 - /* RCU dangling. Cancel it. */ 1607 - nd->flags &= ~LOOKUP_RCU; 1608 - nd->root.mnt = NULL; 1609 - rcu_read_unlock(); 1610 - br_read_unlock(vfsmount_lock); 1611 - } 1612 - if (nd->file) 1613 - fput(nd->file); 1614 - } 1615 - 1616 - static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1445 + static int path_init(int dfd, const char *name, unsigned int flags, 1446 + struct nameidata *nd, struct file **fp) 1617 1447 { 1618 1448 int retval = 0; 1619 1449 int fput_needed; 1620 1450 struct file *file; 1621 1451 1622 1452 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1623 - nd->flags = flags | LOOKUP_RCU; 1453 + nd->flags = flags | LOOKUP_JUMPED; 1624 1454 nd->depth = 0; 1625 - nd->root.mnt = NULL; 1626 - nd->file = NULL; 1627 - 1628 - if (*name=='/') { 1629 - struct fs_struct *fs = current->fs; 1630 - unsigned seq; 1631 - 1632 - br_read_lock(vfsmount_lock); 1633 - rcu_read_lock(); 1634 - 1635 - do { 1636 - seq = read_seqcount_begin(&fs->seq); 1637 - nd->root = fs->root; 1638 - nd->path = nd->root; 1639 - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1640 - } while (read_seqcount_retry(&fs->seq, seq)); 1641 - 1642 - } else if (dfd == AT_FDCWD) { 1643 - struct fs_struct *fs = current->fs; 1644 - unsigned seq; 1645 - 1646 - br_read_lock(vfsmount_lock); 1647 - rcu_read_lock(); 1648 - 1649 - do { 1650 - seq = read_seqcount_begin(&fs->seq); 1651 - nd->path = fs->pwd; 1652 - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1653 - } while (read_seqcount_retry(&fs->seq, seq)); 1654 - 1655 - } else { 1656 - struct dentry *dentry; 1657 - 1658 - file = fget_light(dfd, &fput_needed); 1659 - retval = -EBADF; 1660 - if (!file) 1661 - goto out_fail; 1662 - 1663 - dentry = file->f_path.dentry; 1664 - 1665 - retval = -ENOTDIR; 1666 - if (!S_ISDIR(dentry->d_inode->i_mode)) 1667 - goto fput_fail; 1668 - 1669 - retval = file_permission(file, MAY_EXEC); 1670 - if (retval) 1671 - goto fput_fail; 1672 - 1673 - nd->path = file->f_path; 1674 - if (fput_needed) 1675 - nd->file = file; 1676 - 1677 - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1678 - br_read_lock(vfsmount_lock); 1679 - rcu_read_lock(); 1680 - } 1681 - nd->inode = nd->path.dentry->d_inode; 1682 - return 0; 1683 - 1684 - fput_fail: 1685 - fput_light(file, fput_needed); 1686 - out_fail: 1687 - return retval; 1688 - } 1689 - 1690 - static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1691 - { 1692 - int retval = 0; 1693 - int fput_needed; 1694 - struct file *file; 1695 - 1696 - nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1697 - nd->flags = flags; 1698 - nd->depth = 0; 1699 - nd->root.mnt = NULL; 1700 - 1701 - if (*name=='/') { 1702 - set_root(nd); 1455 + if (flags & LOOKUP_ROOT) { 1456 + struct inode *inode = nd->root.dentry->d_inode; 1457 + if (*name) { 1458 + if (!inode->i_op->lookup) 1459 + return -ENOTDIR; 1460 + retval = inode_permission(inode, MAY_EXEC); 1461 + if (retval) 1462 + return retval; 1463 + } 1703 1464 nd->path = nd->root; 1704 - path_get(&nd->root); 1465 + nd->inode = inode; 1466 + if (flags & LOOKUP_RCU) { 1467 + br_read_lock(vfsmount_lock); 1468 + rcu_read_lock(); 1469 + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1470 + } else { 1471 + path_get(&nd->path); 1472 + } 1473 + return 0; 1474 + } 1475 + 1476 + nd->root.mnt = NULL; 1477 + 1478 + if (*name=='/') { 1479 + if (flags & LOOKUP_RCU) { 1480 + br_read_lock(vfsmount_lock); 1481 + rcu_read_lock(); 1482 + set_root_rcu(nd); 1483 + } else { 1484 + set_root(nd); 1485 + path_get(&nd->root); 1486 + } 1487 + nd->path = nd->root; 1705 1488 } else if (dfd == AT_FDCWD) { 1706 - get_fs_pwd(current->fs, &nd->path); 1489 + if (flags & LOOKUP_RCU) { 1490 + struct fs_struct *fs = current->fs; 1491 + unsigned seq; 1492 + 1493 + br_read_lock(vfsmount_lock); 1494 + rcu_read_lock(); 1495 + 1496 + do { 1497 + seq = read_seqcount_begin(&fs->seq); 1498 + nd->path = fs->pwd; 1499 + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1500 + } while (read_seqcount_retry(&fs->seq, seq)); 1501 + } else { 1502 + get_fs_pwd(current->fs, &nd->path); 1503 + } 1707 1504 } else { 1708 1505 struct dentry *dentry; 1709 1506 1710 - file = fget_light(dfd, &fput_needed); 1507 + file = fget_raw_light(dfd, &fput_needed); 1711 1508 retval = -EBADF; 1712 1509 if (!file) 1713 1510 goto out_fail; 1714 1511 1715 1512 dentry = file->f_path.dentry; 1716 1513 1717 - retval = -ENOTDIR; 1718 - if (!S_ISDIR(dentry->d_inode->i_mode)) 1719 - goto fput_fail; 1514 + if (*name) { 1515 + retval = -ENOTDIR; 1516 + if (!S_ISDIR(dentry->d_inode->i_mode)) 1517 + goto fput_fail; 1720 1518 1721 - retval = file_permission(file, MAY_EXEC); 1722 - if (retval) 1723 - goto fput_fail; 1519 + retval = file_permission(file, MAY_EXEC); 1520 + if (retval) 1521 + goto fput_fail; 1522 + } 1724 1523 1725 1524 nd->path = file->f_path; 1726 - path_get(&file->f_path); 1727 - 1728 - fput_light(file, fput_needed); 1525 + if (flags & LOOKUP_RCU) { 1526 + if (fput_needed) 1527 + *fp = file; 1528 + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1529 + br_read_lock(vfsmount_lock); 1530 + rcu_read_lock(); 1531 + } else { 1532 + path_get(&file->f_path); 1533 + fput_light(file, fput_needed); 1534 + } 1729 1535 } 1536 + 1730 1537 nd->inode = nd->path.dentry->d_inode; 1731 1538 return 0; 1732 1539 ··· 1584 1691 fput_light(file, fput_needed); 1585 1692 out_fail: 1586 1693 return retval; 1694 + } 1695 + 1696 + static inline int lookup_last(struct nameidata *nd, struct path *path) 1697 + { 1698 + if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) 1699 + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 1700 + 1701 + nd->flags &= ~LOOKUP_PARENT; 1702 + return walk_component(nd, path, &nd->last, nd->last_type, 1703 + nd->flags & LOOKUP_FOLLOW); 1587 1704 } 1588 1705 1589 1706 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1590 - static int do_path_lookup(int dfd, const char *name, 1707 + static int path_lookupat(int dfd, const char *name, 1591 1708 unsigned int flags, struct nameidata *nd) 1592 1709 { 1593 - int retval; 1710 + struct file *base = NULL; 1711 + struct path path; 1712 + int err; 1594 1713 1595 1714 /* 1596 1715 * Path walking is largely split up into 2 different synchronisation ··· 1618 1713 * be handled by restarting a traditional ref-walk (which will always 1619 1714 * be able to complete). 1620 1715 */ 1621 - retval = path_init_rcu(dfd, name, flags, nd); 1622 - if (unlikely(retval)) 1623 - return retval; 1624 - retval = path_walk_rcu(name, nd); 1625 - path_finish_rcu(nd); 1626 - if (nd->root.mnt) { 1716 + err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base); 1717 + 1718 + if (unlikely(err)) 1719 + return err; 1720 + 1721 + current->total_link_count = 0; 1722 + err = link_path_walk(name, nd); 1723 + 1724 + if (!err && !(flags & LOOKUP_PARENT)) { 1725 + err = lookup_last(nd, &path); 1726 + while (err > 0) { 1727 + void *cookie; 1728 + struct path link = path; 1729 + nd->flags |= LOOKUP_PARENT; 1730 + err = follow_link(&link, nd, &cookie); 1731 + if (!err) 1732 + err = lookup_last(nd, &path); 1733 + put_link(nd, &link, cookie); 1734 + } 1735 + } 1736 + 1737 + if (nd->flags & LOOKUP_RCU) { 1738 + /* went all way through without dropping RCU */ 1739 + BUG_ON(err); 1740 + if (nameidata_drop_rcu_last(nd)) 1741 + err = -ECHILD; 1742 + } 1743 + 1744 + if (!err) 1745 + err = handle_reval_path(nd); 1746 + 1747 + if (!err && nd->flags & LOOKUP_DIRECTORY) { 1748 + if (!nd->inode->i_op->lookup) { 1749 + path_put(&nd->path); 1750 + return -ENOTDIR; 1751 + } 1752 + } 1753 + 1754 + if (base) 1755 + fput(base); 1756 + 1757 + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 1627 1758 path_put(&nd->root); 1628 1759 nd->root.mnt = NULL; 1629 1760 } 1761 + return err; 1762 + } 1630 1763 1631 - if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1632 - /* slower, locked walk */ 1633 - if (retval == -ESTALE) 1634 - flags |= LOOKUP_REVAL; 1635 - retval = path_init(dfd, name, flags, nd); 1636 - if (unlikely(retval)) 1637 - return retval; 1638 - retval = path_walk(name, nd); 1639 - if (nd->root.mnt) { 1640 - path_put(&nd->root); 1641 - nd->root.mnt = NULL; 1642 - } 1643 - } 1764 + static int do_path_lookup(int dfd, const char *name, 1765 + unsigned int flags, struct nameidata *nd) 1766 + { 1767 + int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); 1768 + if (unlikely(retval == -ECHILD)) 1769 + retval = path_lookupat(dfd, name, flags, nd); 1770 + if (unlikely(retval == -ESTALE)) 1771 + retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); 1644 1772 1645 1773 if (likely(!retval)) { 1646 1774 if (unlikely(!audit_dummy_context())) { ··· 1681 1743 audit_inode(name, nd->path.dentry); 1682 1744 } 1683 1745 } 1684 - 1685 1746 return retval; 1686 1747 } 1687 1748 1688 - int path_lookup(const char *name, unsigned int flags, 1689 - struct nameidata *nd) 1749 + int kern_path_parent(const char *name, struct nameidata *nd) 1690 1750 { 1691 - return do_path_lookup(AT_FDCWD, name, flags, nd); 1751 + return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); 1692 1752 } 1693 1753 1694 1754 int kern_path(const char *name, unsigned int flags, struct path *path) ··· 1710 1774 const char *name, unsigned int flags, 1711 1775 struct nameidata *nd) 1712 1776 { 1713 - int retval; 1714 - 1715 - /* same as do_path_lookup */ 1716 - nd->last_type = LAST_ROOT; 1717 - nd->flags = flags; 1718 - nd->depth = 0; 1719 - 1720 - nd->path.dentry = dentry; 1721 - nd->path.mnt = mnt; 1722 - path_get(&nd->path); 1723 - nd->root = nd->path; 1724 - path_get(&nd->root); 1725 - nd->inode = nd->path.dentry->d_inode; 1726 - 1727 - retval = path_walk(name, nd); 1728 - if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1729 - nd->inode)) 1730 - audit_inode(name, nd->path.dentry); 1731 - 1732 - path_put(&nd->root); 1733 - nd->root.mnt = NULL; 1734 - 1735 - return retval; 1777 + nd->root.dentry = dentry; 1778 + nd->root.mnt = mnt; 1779 + /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 1780 + return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd); 1736 1781 } 1737 1782 1738 1783 static struct dentry *__lookup_hash(struct qstr *name, ··· 1728 1811 return ERR_PTR(err); 1729 1812 1730 1813 /* 1731 - * See if the low-level filesystem might want 1732 - * to use its own hash.. 1733 - */ 1734 - if (base->d_flags & DCACHE_OP_HASH) { 1735 - err = base->d_op->d_hash(base, inode, name); 1736 - dentry = ERR_PTR(err); 1737 - if (err < 0) 1738 - goto out; 1739 - } 1740 - 1741 - /* 1742 1814 * Don't bother with __d_lookup: callers are for creat as 1743 1815 * well as unlink, so a lot of the time it would cost 1744 1816 * a double lookup. ··· 1739 1833 1740 1834 if (!dentry) 1741 1835 dentry = d_alloc_and_lookup(base, name, nd); 1742 - out: 1836 + 1743 1837 return dentry; 1744 1838 } 1745 1839 ··· 1751 1845 static struct dentry *lookup_hash(struct nameidata *nd) 1752 1846 { 1753 1847 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1754 - } 1755 - 1756 - static int __lookup_one_len(const char *name, struct qstr *this, 1757 - struct dentry *base, int len) 1758 - { 1759 - unsigned long hash; 1760 - unsigned int c; 1761 - 1762 - this->name = name; 1763 - this->len = len; 1764 - if (!len) 1765 - return -EACCES; 1766 - 1767 - hash = init_name_hash(); 1768 - while (len--) { 1769 - c = *(const unsigned char *)name++; 1770 - if (c == '/' || c == '\0') 1771 - return -EACCES; 1772 - hash = partial_name_hash(c, hash); 1773 - } 1774 - this->hash = end_name_hash(hash); 1775 - return 0; 1776 1848 } 1777 1849 1778 1850 /** ··· 1766 1882 */ 1767 1883 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1768 1884 { 1769 - int err; 1770 1885 struct qstr this; 1886 + unsigned long hash; 1887 + unsigned int c; 1771 1888 1772 1889 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1773 1890 1774 - err = __lookup_one_len(name, &this, base, len); 1775 - if (err) 1776 - return ERR_PTR(err); 1891 + this.name = name; 1892 + this.len = len; 1893 + if (!len) 1894 + return ERR_PTR(-EACCES); 1895 + 1896 + hash = init_name_hash(); 1897 + while (len--) { 1898 + c = *(const unsigned char *)name++; 1899 + if (c == '/' || c == '\0') 1900 + return ERR_PTR(-EACCES); 1901 + hash = partial_name_hash(c, hash); 1902 + } 1903 + this.hash = end_name_hash(hash); 1904 + /* 1905 + * See if the low-level filesystem might want 1906 + * to use its own hash.. 1907 + */ 1908 + if (base->d_flags & DCACHE_OP_HASH) { 1909 + int err = base->d_op->d_hash(base, base->d_inode, &this); 1910 + if (err < 0) 1911 + return ERR_PTR(err); 1912 + } 1777 1913 1778 1914 return __lookup_hash(&this, base, NULL); 1779 1915 } ··· 1802 1898 struct path *path) 1803 1899 { 1804 1900 struct nameidata nd; 1805 - char *tmp = getname(name); 1901 + char *tmp = getname_flags(name, flags); 1806 1902 int err = PTR_ERR(tmp); 1807 1903 if (!IS_ERR(tmp)) { 1808 1904 ··· 1982 2078 return error; 1983 2079 } 1984 2080 1985 - int may_open(struct path *path, int acc_mode, int flag) 2081 + static int may_open(struct path *path, int acc_mode, int flag) 1986 2082 { 1987 2083 struct dentry *dentry = path->dentry; 1988 2084 struct inode *inode = dentry->d_inode; 1989 2085 int error; 2086 + 2087 + /* O_PATH? */ 2088 + if (!acc_mode) 2089 + return 0; 1990 2090 1991 2091 if (!inode) 1992 2092 return -ENOENT; ··· 2060 2152 } 2061 2153 2062 2154 /* 2063 - * Be careful about ever adding any more callers of this 2064 - * function. Its flags must be in the namei format, not 2065 - * what get passed to sys_open(). 2066 - */ 2067 - static int __open_namei_create(struct nameidata *nd, struct path *path, 2068 - int open_flag, int mode) 2069 - { 2070 - int error; 2071 - struct dentry *dir = nd->path.dentry; 2072 - 2073 - if (!IS_POSIXACL(dir->d_inode)) 2074 - mode &= ~current_umask(); 2075 - error = security_path_mknod(&nd->path, path->dentry, mode, 0); 2076 - if (error) 2077 - goto out_unlock; 2078 - error = vfs_create(dir->d_inode, path->dentry, mode, nd); 2079 - out_unlock: 2080 - mutex_unlock(&dir->d_inode->i_mutex); 2081 - dput(nd->path.dentry); 2082 - nd->path.dentry = path->dentry; 2083 - 2084 - if (error) 2085 - return error; 2086 - /* Don't check for write permission, don't truncate */ 2087 - return may_open(&nd->path, 0, open_flag & ~O_TRUNC); 2088 - } 2089 - 2090 - /* 2091 2155 * Note that while the flag value (low two bits) for sys_open means: 2092 2156 * 00 - read-only 2093 2157 * 01 - write-only ··· 2083 2203 return flag; 2084 2204 } 2085 2205 2086 - static int open_will_truncate(int flag, struct inode *inode) 2087 - { 2088 - /* 2089 - * We'll never write to the fs underlying 2090 - * a device file. 2091 - */ 2092 - if (special_file(inode->i_mode)) 2093 - return 0; 2094 - return (flag & O_TRUNC); 2095 - } 2096 - 2097 - static struct file *finish_open(struct nameidata *nd, 2098 - int open_flag, int acc_mode) 2099 - { 2100 - struct file *filp; 2101 - int will_truncate; 2102 - int error; 2103 - 2104 - will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode); 2105 - if (will_truncate) { 2106 - error = mnt_want_write(nd->path.mnt); 2107 - if (error) 2108 - goto exit; 2109 - } 2110 - error = may_open(&nd->path, acc_mode, open_flag); 2111 - if (error) { 2112 - if (will_truncate) 2113 - mnt_drop_write(nd->path.mnt); 2114 - goto exit; 2115 - } 2116 - filp = nameidata_to_filp(nd); 2117 - if (!IS_ERR(filp)) { 2118 - error = ima_file_check(filp, acc_mode); 2119 - if (error) { 2120 - fput(filp); 2121 - filp = ERR_PTR(error); 2122 - } 2123 - } 2124 - if (!IS_ERR(filp)) { 2125 - if (will_truncate) { 2126 - error = handle_truncate(filp); 2127 - if (error) { 2128 - fput(filp); 2129 - filp = ERR_PTR(error); 2130 - } 2131 - } 2132 - } 2133 - /* 2134 - * It is now safe to drop the mnt write 2135 - * because the filp has had a write taken 2136 - * on its behalf. 2137 - */ 2138 - if (will_truncate) 2139 - mnt_drop_write(nd->path.mnt); 2140 - path_put(&nd->path); 2141 - return filp; 2142 - 2143 - exit: 2144 - path_put(&nd->path); 2145 - return ERR_PTR(error); 2146 - } 2147 - 2148 2206 /* 2149 - * Handle O_CREAT case for do_filp_open 2207 + * Handle the last step of open() 2150 2208 */ 2151 2209 static struct file *do_last(struct nameidata *nd, struct path *path, 2152 - int open_flag, int acc_mode, 2153 - int mode, const char *pathname) 2210 + const struct open_flags *op, const char *pathname) 2154 2211 { 2155 2212 struct dentry *dir = nd->path.dentry; 2213 + struct dentry *dentry; 2214 + int open_flag = op->open_flag; 2215 + int will_truncate = open_flag & O_TRUNC; 2216 + int want_write = 0; 2217 + int acc_mode = op->acc_mode; 2156 2218 struct file *filp; 2157 - int error = -EISDIR; 2219 + int error; 2220 + 2221 + nd->flags &= ~LOOKUP_PARENT; 2222 + nd->flags |= op->intent; 2158 2223 2159 2224 switch (nd->last_type) { 2160 2225 case LAST_DOTDOT: 2161 - follow_dotdot(nd); 2162 - dir = nd->path.dentry; 2163 2226 case LAST_DOT: 2164 - if (need_reval_dot(dir)) { 2165 - int status = d_revalidate(nd->path.dentry, nd); 2166 - if (!status) 2167 - status = -ESTALE; 2168 - if (status < 0) { 2169 - error = status; 2170 - goto exit; 2171 - } 2172 - } 2227 + error = handle_dots(nd, nd->last_type); 2228 + if (error) 2229 + return ERR_PTR(error); 2173 2230 /* fallthrough */ 2174 2231 case LAST_ROOT: 2175 - goto exit; 2232 + if (nd->flags & LOOKUP_RCU) { 2233 + if (nameidata_drop_rcu_last(nd)) 2234 + return ERR_PTR(-ECHILD); 2235 + } 2236 + error = handle_reval_path(nd); 2237 + if (error) 2238 + goto exit; 2239 + audit_inode(pathname, nd->path.dentry); 2240 + if (open_flag & O_CREAT) { 2241 + error = -EISDIR; 2242 + goto exit; 2243 + } 2244 + goto ok; 2176 2245 case LAST_BIND: 2246 + /* can't be RCU mode here */ 2247 + error = handle_reval_path(nd); 2248 + if (error) 2249 + goto exit; 2177 2250 audit_inode(pathname, dir); 2178 2251 goto ok; 2179 2252 } 2180 2253 2254 + if (!(open_flag & O_CREAT)) { 2255 + int symlink_ok = 0; 2256 + if (nd->last.name[nd->last.len]) 2257 + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 2258 + if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) 2259 + symlink_ok = 1; 2260 + /* we _can_ be in RCU mode here */ 2261 + error = walk_component(nd, path, &nd->last, LAST_NORM, 2262 + !symlink_ok); 2263 + if (error < 0) 2264 + return ERR_PTR(error); 2265 + if (error) /* symlink */ 2266 + return NULL; 2267 + /* sayonara */ 2268 + if (nd->flags & LOOKUP_RCU) { 2269 + if (nameidata_drop_rcu_last(nd)) 2270 + return ERR_PTR(-ECHILD); 2271 + } 2272 + 2273 + error = -ENOTDIR; 2274 + if (nd->flags & LOOKUP_DIRECTORY) { 2275 + if (!nd->inode->i_op->lookup) 2276 + goto exit; 2277 + } 2278 + audit_inode(pathname, nd->path.dentry); 2279 + goto ok; 2280 + } 2281 + 2282 + /* create side of things */ 2283 + 2284 + if (nd->flags & LOOKUP_RCU) { 2285 + if (nameidata_drop_rcu_last(nd)) 2286 + return ERR_PTR(-ECHILD); 2287 + } 2288 + 2289 + audit_inode(pathname, dir); 2290 + error = -EISDIR; 2181 2291 /* trailing slashes? */ 2182 2292 if (nd->last.name[nd->last.len]) 2183 2293 goto exit; 2184 2294 2185 2295 mutex_lock(&dir->d_inode->i_mutex); 2186 2296 2187 - path->dentry = lookup_hash(nd); 2188 - path->mnt = nd->path.mnt; 2189 - 2190 - error = PTR_ERR(path->dentry); 2191 - if (IS_ERR(path->dentry)) { 2297 + dentry = lookup_hash(nd); 2298 + error = PTR_ERR(dentry); 2299 + if (IS_ERR(dentry)) { 2192 2300 mutex_unlock(&dir->d_inode->i_mutex); 2193 2301 goto exit; 2194 2302 } 2195 2303 2196 - if (IS_ERR(nd->intent.open.file)) { 2197 - error = PTR_ERR(nd->intent.open.file); 2198 - goto exit_mutex_unlock; 2199 - } 2304 + path->dentry = dentry; 2305 + path->mnt = nd->path.mnt; 2200 2306 2201 2307 /* Negative dentry, just create the file */ 2202 - if (!path->dentry->d_inode) { 2308 + if (!dentry->d_inode) { 2309 + int mode = op->mode; 2310 + if (!IS_POSIXACL(dir->d_inode)) 2311 + mode &= ~current_umask(); 2203 2312 /* 2204 2313 * This write is needed to ensure that a 2205 - * ro->rw transition does not occur between 2314 + * rw->ro transition does not occur between 2206 2315 * the time when the file is created and when 2207 2316 * a permanent write count is taken through 2208 2317 * the 'struct file' in nameidata_to_filp(). ··· 2199 2330 error = mnt_want_write(nd->path.mnt); 2200 2331 if (error) 2201 2332 goto exit_mutex_unlock; 2202 - error = __open_namei_create(nd, path, open_flag, mode); 2203 - if (error) { 2204 - mnt_drop_write(nd->path.mnt); 2205 - goto exit; 2206 - } 2207 - filp = nameidata_to_filp(nd); 2208 - mnt_drop_write(nd->path.mnt); 2209 - path_put(&nd->path); 2210 - if (!IS_ERR(filp)) { 2211 - error = ima_file_check(filp, acc_mode); 2212 - if (error) { 2213 - fput(filp); 2214 - filp = ERR_PTR(error); 2215 - } 2216 - } 2217 - return filp; 2333 + want_write = 1; 2334 + /* Don't check for write permission, don't truncate */ 2335 + open_flag &= ~O_TRUNC; 2336 + will_truncate = 0; 2337 + acc_mode = MAY_OPEN; 2338 + error = security_path_mknod(&nd->path, dentry, mode, 0); 2339 + if (error) 2340 + goto exit_mutex_unlock; 2341 + error = vfs_create(dir->d_inode, dentry, mode, nd); 2342 + if (error) 2343 + goto exit_mutex_unlock; 2344 + mutex_unlock(&dir->d_inode->i_mutex); 2345 + dput(nd->path.dentry); 2346 + nd->path.dentry = dentry; 2347 + goto common; 2218 2348 } 2219 2349 2220 2350 /* ··· 2243 2375 if (S_ISDIR(nd->inode->i_mode)) 2244 2376 goto exit; 2245 2377 ok: 2246 - filp = finish_open(nd, open_flag, acc_mode); 2378 + if (!S_ISREG(nd->inode->i_mode)) 2379 + will_truncate = 0; 2380 + 2381 + if (will_truncate) { 2382 + error = mnt_want_write(nd->path.mnt); 2383 + if (error) 2384 + goto exit; 2385 + want_write = 1; 2386 + } 2387 + common: 2388 + error = may_open(&nd->path, acc_mode, open_flag); 2389 + if (error) 2390 + goto exit; 2391 + filp = nameidata_to_filp(nd); 2392 + if (!IS_ERR(filp)) { 2393 + error = ima_file_check(filp, op->acc_mode); 2394 + if (error) { 2395 + fput(filp); 2396 + filp = ERR_PTR(error); 2397 + } 2398 + } 2399 + if (!IS_ERR(filp)) { 2400 + if (will_truncate) { 2401 + error = handle_truncate(filp); 2402 + if (error) { 2403 + fput(filp); 2404 + filp = ERR_PTR(error); 2405 + } 2406 + } 2407 + } 2408 + out: 2409 + if (want_write) 2410 + mnt_drop_write(nd->path.mnt); 2411 + path_put(&nd->path); 2247 2412 return filp; 2248 2413 2249 2414 exit_mutex_unlock: ··· 2284 2383 exit_dput: 2285 2384 path_put_conditional(path, nd); 2286 2385 exit: 2287 - path_put(&nd->path); 2288 - return ERR_PTR(error); 2386 + filp = ERR_PTR(error); 2387 + goto out; 2289 2388 } 2290 2389 2291 - /* 2292 - * Note that the low bits of the passed in "open_flag" 2293 - * are not the same as in the local variable "flag". See 2294 - * open_to_namei_flags() for more details. 2295 - */ 2296 - struct file *do_filp_open(int dfd, const char *pathname, 2297 - int open_flag, int mode, int acc_mode) 2390 + static struct file *path_openat(int dfd, const char *pathname, 2391 + struct nameidata *nd, const struct open_flags *op, int flags) 2298 2392 { 2393 + struct file *base = NULL; 2299 2394 struct file *filp; 2300 - struct nameidata nd; 2301 - int error; 2302 2395 struct path path; 2303 - int count = 0; 2304 - int flag = open_to_namei_flags(open_flag); 2305 - int flags; 2306 - 2307 - if (!(open_flag & O_CREAT)) 2308 - mode = 0; 2309 - 2310 - /* Must never be set by userspace */ 2311 - open_flag &= ~FMODE_NONOTIFY; 2312 - 2313 - /* 2314 - * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 2315 - * check for O_DSYNC if the need any syncing at all we enforce it's 2316 - * always set instead of having to deal with possibly weird behaviour 2317 - * for malicious applications setting only __O_SYNC. 2318 - */ 2319 - if (open_flag & __O_SYNC) 2320 - open_flag |= O_DSYNC; 2321 - 2322 - if (!acc_mode) 2323 - acc_mode = MAY_OPEN | ACC_MODE(open_flag); 2324 - 2325 - /* O_TRUNC implies we need access checks for write permissions */ 2326 - if (open_flag & O_TRUNC) 2327 - acc_mode |= MAY_WRITE; 2328 - 2329 - /* Allow the LSM permission hook to distinguish append 2330 - access from general write access. */ 2331 - if (open_flag & O_APPEND) 2332 - acc_mode |= MAY_APPEND; 2333 - 2334 - flags = LOOKUP_OPEN; 2335 - if (open_flag & O_CREAT) { 2336 - flags |= LOOKUP_CREATE; 2337 - if (open_flag & O_EXCL) 2338 - flags |= LOOKUP_EXCL; 2339 - } 2340 - if (open_flag & O_DIRECTORY) 2341 - flags |= LOOKUP_DIRECTORY; 2342 - if (!(open_flag & O_NOFOLLOW)) 2343 - flags |= LOOKUP_FOLLOW; 2396 + int error; 2344 2397 2345 2398 filp = get_empty_filp(); 2346 2399 if (!filp) 2347 2400 return ERR_PTR(-ENFILE); 2348 2401 2349 - filp->f_flags = open_flag; 2350 - nd.intent.open.file = filp; 2351 - nd.intent.open.flags = flag; 2352 - nd.intent.open.create_mode = mode; 2402 + filp->f_flags = op->open_flag; 2403 + nd->intent.open.file = filp; 2404 + nd->intent.open.flags = open_to_namei_flags(op->open_flag); 2405 + nd->intent.open.create_mode = op->mode; 2353 2406 2354 - if (open_flag & O_CREAT) 2355 - goto creat; 2356 - 2357 - /* !O_CREAT, simple open */ 2358 - error = do_path_lookup(dfd, pathname, flags, &nd); 2359 - if (unlikely(error)) 2360 - goto out_filp2; 2361 - error = -ELOOP; 2362 - if (!(nd.flags & LOOKUP_FOLLOW)) { 2363 - if (nd.inode->i_op->follow_link) 2364 - goto out_path2; 2365 - } 2366 - error = -ENOTDIR; 2367 - if (nd.flags & LOOKUP_DIRECTORY) { 2368 - if (!nd.inode->i_op->lookup) 2369 - goto out_path2; 2370 - } 2371 - audit_inode(pathname, nd.path.dentry); 2372 - filp = finish_open(&nd, open_flag, acc_mode); 2373 - out2: 2374 - release_open_intent(&nd); 2375 - return filp; 2376 - 2377 - out_path2: 2378 - path_put(&nd.path); 2379 - out_filp2: 2380 - filp = ERR_PTR(error); 2381 - goto out2; 2382 - 2383 - creat: 2384 - /* OK, have to create the file. Find the parent. */ 2385 - error = path_init_rcu(dfd, pathname, 2386 - LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); 2387 - if (error) 2388 - goto out_filp; 2389 - error = path_walk_rcu(pathname, &nd); 2390 - path_finish_rcu(&nd); 2391 - if (unlikely(error == -ECHILD || error == -ESTALE)) { 2392 - /* slower, locked walk */ 2393 - if (error == -ESTALE) { 2394 - reval: 2395 - flags |= LOOKUP_REVAL; 2396 - } 2397 - error = path_init(dfd, pathname, 2398 - LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); 2399 - if (error) 2400 - goto out_filp; 2401 - 2402 - error = path_walk_simple(pathname, &nd); 2403 - } 2407 + error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); 2404 2408 if (unlikely(error)) 2405 2409 goto out_filp; 2406 - if (unlikely(!audit_dummy_context())) 2407 - audit_inode(pathname, nd.path.dentry); 2408 2410 2409 - /* 2410 - * We have the parent and last component. 2411 - */ 2412 - nd.flags = flags; 2413 - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2411 + current->total_link_count = 0; 2412 + error = link_path_walk(pathname, nd); 2413 + if (unlikely(error)) 2414 + goto out_filp; 2415 + 2416 + filp = do_last(nd, &path, op, pathname); 2414 2417 while (unlikely(!filp)) { /* trailing symlink */ 2415 2418 struct path link = path; 2416 - struct inode *linki = link.dentry->d_inode; 2417 2419 void *cookie; 2418 - error = -ELOOP; 2419 - if (!(nd.flags & LOOKUP_FOLLOW)) 2420 - goto exit_dput; 2421 - if (count++ == 32) 2422 - goto exit_dput; 2423 - /* 2424 - * This is subtle. Instead of calling do_follow_link() we do 2425 - * the thing by hands. The reason is that this way we have zero 2426 - * link_count and path_walk() (called from ->follow_link) 2427 - * honoring LOOKUP_PARENT. After that we have the parent and 2428 - * last component, i.e. we are in the same situation as after 2429 - * the first path_walk(). Well, almost - if the last component 2430 - * is normal we get its copy stored in nd->last.name and we will 2431 - * have to putname() it when we are done. Procfs-like symlinks 2432 - * just set LAST_BIND. 2433 - */ 2434 - nd.flags |= LOOKUP_PARENT; 2435 - error = security_inode_follow_link(link.dentry, &nd); 2436 - if (error) 2437 - goto exit_dput; 2438 - error = __do_follow_link(&link, &nd, &cookie); 2439 - if (unlikely(error)) { 2440 - if (!IS_ERR(cookie) && linki->i_op->put_link) 2441 - linki->i_op->put_link(link.dentry, &nd, cookie); 2442 - /* nd.path had been dropped */ 2443 - nd.path = link; 2444 - goto out_path; 2420 + if (!(nd->flags & LOOKUP_FOLLOW)) { 2421 + path_put_conditional(&path, nd); 2422 + path_put(&nd->path); 2423 + filp = ERR_PTR(-ELOOP); 2424 + break; 2445 2425 } 2446 - nd.flags &= ~LOOKUP_PARENT; 2447 - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2448 - if (linki->i_op->put_link) 2449 - linki->i_op->put_link(link.dentry, &nd, cookie); 2450 - path_put(&link); 2426 + nd->flags |= LOOKUP_PARENT; 2427 + nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); 2428 + error = follow_link(&link, nd, &cookie); 2429 + if (unlikely(error)) 2430 + filp = ERR_PTR(error); 2431 + else 2432 + filp = do_last(nd, &path, op, pathname); 2433 + put_link(nd, &link, cookie); 2451 2434 } 2452 2435 out: 2453 - if (nd.root.mnt) 2454 - path_put(&nd.root); 2455 - if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2456 - goto reval; 2457 - release_open_intent(&nd); 2436 + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) 2437 + path_put(&nd->root); 2438 + if (base) 2439 + fput(base); 2440 + release_open_intent(nd); 2458 2441 return filp; 2459 2442 2460 - exit_dput: 2461 - path_put_conditional(&path, &nd); 2462 - out_path: 2463 - path_put(&nd.path); 2464 2443 out_filp: 2465 2444 filp = ERR_PTR(error); 2466 2445 goto out; 2467 2446 } 2468 2447 2469 - /** 2470 - * filp_open - open file and return file pointer 2471 - * 2472 - * @filename: path to open 2473 - * @flags: open flags as per the open(2) second argument 2474 - * @mode: mode for the new file if O_CREAT is set, else ignored 2475 - * 2476 - * This is the helper to open a file from kernelspace if you really 2477 - * have to. But in generally you should not do this, so please move 2478 - * along, nothing to see here.. 2479 - */ 2480 - struct file *filp_open(const char *filename, int flags, int mode) 2448 + struct file *do_filp_open(int dfd, const char *pathname, 2449 + const struct open_flags *op, int flags) 2481 2450 { 2482 - return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2451 + struct nameidata nd; 2452 + struct file *filp; 2453 + 2454 + filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); 2455 + if (unlikely(filp == ERR_PTR(-ECHILD))) 2456 + filp = path_openat(dfd, pathname, &nd, op, flags); 2457 + if (unlikely(filp == ERR_PTR(-ESTALE))) 2458 + filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); 2459 + return filp; 2483 2460 } 2484 - EXPORT_SYMBOL(filp_open); 2461 + 2462 + struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, 2463 + const char *name, const struct open_flags *op, int flags) 2464 + { 2465 + struct nameidata nd; 2466 + struct file *file; 2467 + 2468 + nd.root.mnt = mnt; 2469 + nd.root.dentry = dentry; 2470 + 2471 + flags |= LOOKUP_ROOT; 2472 + 2473 + if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) 2474 + return ERR_PTR(-ELOOP); 2475 + 2476 + file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); 2477 + if (unlikely(file == ERR_PTR(-ECHILD))) 2478 + file = path_openat(-1, name, &nd, op, flags); 2479 + if (unlikely(file == ERR_PTR(-ESTALE))) 2480 + file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); 2481 + return file; 2482 + } 2485 2483 2486 2484 /** 2487 2485 * lookup_create - lookup a dentry, creating it if it doesn't exist ··· 2919 3119 return error; 2920 3120 2921 3121 mutex_lock(&inode->i_mutex); 2922 - error = dir->i_op->link(old_dentry, dir, new_dentry); 3122 + /* Make sure we don't allow creating hardlink to an unlinked file */ 3123 + if (inode->i_nlink == 0) 3124 + error = -ENOENT; 3125 + else 3126 + error = dir->i_op->link(old_dentry, dir, new_dentry); 2923 3127 mutex_unlock(&inode->i_mutex); 2924 3128 if (!error) 2925 3129 fsnotify_link(dir, inode, new_dentry); ··· 2945 3141 struct dentry *new_dentry; 2946 3142 struct nameidata nd; 2947 3143 struct path old_path; 3144 + int how = 0; 2948 3145 int error; 2949 3146 char *to; 2950 3147 2951 - if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 3148 + if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 2952 3149 return -EINVAL; 3150 + /* 3151 + * To use null names we require CAP_DAC_READ_SEARCH 3152 + * This ensures that not everyone will be able to create 3153 + * handlink using the passed filedescriptor. 3154 + */ 3155 + if (flags & AT_EMPTY_PATH) { 3156 + if (!capable(CAP_DAC_READ_SEARCH)) 3157 + return -ENOENT; 3158 + how = LOOKUP_EMPTY; 3159 + } 2953 3160 2954 - error = user_path_at(olddfd, oldname, 2955 - flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, 2956 - &old_path); 3161 + if (flags & AT_SYMLINK_FOLLOW) 3162 + how |= LOOKUP_FOLLOW; 3163 + 3164 + error = user_path_at(olddfd, oldname, how, &old_path); 2957 3165 if (error) 2958 3166 return error; 2959 3167 ··· 3402 3586 EXPORT_SYMBOL(__page_symlink); 3403 3587 EXPORT_SYMBOL(page_symlink); 3404 3588 EXPORT_SYMBOL(page_symlink_inode_operations); 3405 - EXPORT_SYMBOL(path_lookup); 3589 + EXPORT_SYMBOL(kern_path_parent); 3406 3590 EXPORT_SYMBOL(kern_path); 3407 3591 EXPORT_SYMBOL(vfs_path_lookup); 3408 3592 EXPORT_SYMBOL(inode_permission);
+16
fs/namespace.c
··· 1002 1002 .show = show_vfsmnt 1003 1003 }; 1004 1004 1005 + static int uuid_is_nil(u8 *uuid) 1006 + { 1007 + int i; 1008 + u8 *cp = (u8 *)uuid; 1009 + 1010 + for (i = 0; i < 16; i++) { 1011 + if (*cp++) 1012 + return 0; 1013 + } 1014 + return 1; 1015 + } 1016 + 1005 1017 static int show_mountinfo(struct seq_file *m, void *v) 1006 1018 { 1007 1019 struct proc_mounts *p = m->private; ··· 1051 1039 } 1052 1040 if (IS_MNT_UNBINDABLE(mnt)) 1053 1041 seq_puts(m, " unbindable"); 1042 + 1043 + if (!uuid_is_nil(mnt->mnt_sb->s_uuid)) 1044 + /* print the uuid */ 1045 + seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid); 1054 1046 1055 1047 /* Filesystem specific data */ 1056 1048 seq_puts(m, " - ");
+4 -17
fs/nfsctl.c
··· 22 22 23 23 static struct file *do_open(char *name, int flags) 24 24 { 25 - struct nameidata nd; 26 25 struct vfsmount *mnt; 27 - int error; 26 + struct file *file; 28 27 29 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 30 29 if (IS_ERR(mnt)) 31 30 return (struct file *)mnt; 32 31 33 - error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 + file = file_open_root(mnt->mnt_root, mnt, name, flags); 33 + 34 34 mntput(mnt); /* drop do_kern_mount reference */ 35 - if (error) 36 - return ERR_PTR(error); 37 - 38 - if (flags == O_RDWR) 39 - error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags); 40 - else 41 - error = may_open(&nd.path, MAY_WRITE, flags); 42 - 43 - if (!error) 44 - return dentry_open(nd.path.dentry, nd.path.mnt, flags, 45 - current_cred()); 46 - 47 - path_put(&nd.path); 48 - return ERR_PTR(error); 35 + return file; 49 36 } 50 37 51 38 static struct {
+6 -2
fs/ocfs2/export.c
··· 197 197 dentry->d_name.len, dentry->d_name.name, 198 198 fh, len, connectable); 199 199 200 - if (len < 3 || (connectable && len < 6)) { 201 - mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 200 + if (connectable && (len < 6)) { 201 + *max_len = 6; 202 + type = 255; 203 + goto bail; 204 + } else if (len < 3) { 205 + *max_len = 3; 202 206 type = 255; 203 207 goto bail; 204 208 }
+1 -1
fs/ocfs2/refcounttree.c
··· 4379 4379 if (IS_ERR(s)) 4380 4380 return PTR_ERR(s); 4381 4381 4382 - error = path_lookup(s, LOOKUP_PARENT, nd); 4382 + error = kern_path_parent(s, nd); 4383 4383 if (error) 4384 4384 putname(s); 4385 4385 else
+118 -8
fs/open.c
··· 573 573 { 574 574 struct path path; 575 575 int error = -EINVAL; 576 - int follow; 576 + int lookup_flags; 577 577 578 - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) 579 579 goto out; 580 580 581 - follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 582 - error = user_path_at(dfd, filename, follow, &path); 581 + lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 582 + if (flag & AT_EMPTY_PATH) 583 + lookup_flags |= LOOKUP_EMPTY; 584 + error = user_path_at(dfd, filename, lookup_flags, &path); 583 585 if (error) 584 586 goto out; 585 587 error = mnt_want_write(path.mnt); ··· 671 669 int (*open)(struct inode *, struct file *), 672 670 const struct cred *cred) 673 671 { 672 + static const struct file_operations empty_fops = {}; 674 673 struct inode *inode; 675 674 int error; 676 675 677 676 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 677 FMODE_PREAD | FMODE_PWRITE; 678 + 679 + if (unlikely(f->f_flags & O_PATH)) 680 + f->f_mode = FMODE_PATH; 681 + 679 682 inode = dentry->d_inode; 680 683 if (f->f_mode & FMODE_WRITE) { 681 684 error = __get_file_write_access(inode, mnt); ··· 694 687 f->f_path.dentry = dentry; 695 688 f->f_path.mnt = mnt; 696 689 f->f_pos = 0; 697 - f->f_op = fops_get(inode->i_fop); 698 690 file_sb_list_add(f, inode->i_sb); 691 + 692 + if (unlikely(f->f_mode & FMODE_PATH)) { 693 + f->f_op = &empty_fops; 694 + return f; 695 + } 696 + 697 + f->f_op = fops_get(inode->i_fop); 699 698 700 699 error = security_dentry_open(f, cred); 701 700 if (error) ··· 903 890 904 891 EXPORT_SYMBOL(fd_install); 905 892 893 + static inline int build_open_flags(int flags, int mode, struct open_flags *op) 894 + { 895 + int lookup_flags = 0; 896 + int acc_mode; 897 + 898 + if (!(flags & O_CREAT)) 899 + mode = 0; 900 + op->mode = mode; 901 + 902 + /* Must never be set by userspace */ 903 + flags &= ~FMODE_NONOTIFY; 904 + 905 + /* 906 + * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 907 + * check for O_DSYNC if the need any syncing at all we enforce it's 908 + * always set instead of having to deal with possibly weird behaviour 909 + * for malicious applications setting only __O_SYNC. 910 + */ 911 + if (flags & __O_SYNC) 912 + flags |= O_DSYNC; 913 + 914 + /* 915 + * If we have O_PATH in the open flag. Then we 916 + * cannot have anything other than the below set of flags 917 + */ 918 + if (flags & O_PATH) { 919 + flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; 920 + acc_mode = 0; 921 + } else { 922 + acc_mode = MAY_OPEN | ACC_MODE(flags); 923 + } 924 + 925 + op->open_flag = flags; 926 + 927 + /* O_TRUNC implies we need access checks for write permissions */ 928 + if (flags & O_TRUNC) 929 + acc_mode |= MAY_WRITE; 930 + 931 + /* Allow the LSM permission hook to distinguish append 932 + access from general write access. */ 933 + if (flags & O_APPEND) 934 + acc_mode |= MAY_APPEND; 935 + 936 + op->acc_mode = acc_mode; 937 + 938 + op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; 939 + 940 + if (flags & O_CREAT) { 941 + op->intent |= LOOKUP_CREATE; 942 + if (flags & O_EXCL) 943 + op->intent |= LOOKUP_EXCL; 944 + } 945 + 946 + if (flags & O_DIRECTORY) 947 + lookup_flags |= LOOKUP_DIRECTORY; 948 + if (!(flags & O_NOFOLLOW)) 949 + lookup_flags |= LOOKUP_FOLLOW; 950 + return lookup_flags; 951 + } 952 + 953 + /** 954 + * filp_open - open file and return file pointer 955 + * 956 + * @filename: path to open 957 + * @flags: open flags as per the open(2) second argument 958 + * @mode: mode for the new file if O_CREAT is set, else ignored 959 + * 960 + * This is the helper to open a file from kernelspace if you really 961 + * have to. But in generally you should not do this, so please move 962 + * along, nothing to see here.. 963 + */ 964 + struct file *filp_open(const char *filename, int flags, int mode) 965 + { 966 + struct open_flags op; 967 + int lookup = build_open_flags(flags, mode, &op); 968 + return do_filp_open(AT_FDCWD, filename, &op, lookup); 969 + } 970 + EXPORT_SYMBOL(filp_open); 971 + 972 + struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, 973 + const char *filename, int flags) 974 + { 975 + struct open_flags op; 976 + int lookup = build_open_flags(flags, 0, &op); 977 + if (flags & O_CREAT) 978 + return ERR_PTR(-EINVAL); 979 + if (!filename && (flags & O_DIRECTORY)) 980 + if (!dentry->d_inode->i_op->lookup) 981 + return ERR_PTR(-ENOTDIR); 982 + return do_file_open_root(dentry, mnt, filename, &op, lookup); 983 + } 984 + EXPORT_SYMBOL(file_open_root); 985 + 906 986 long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 907 987 { 988 + struct open_flags op; 989 + int lookup = build_open_flags(flags, mode, &op); 908 990 char *tmp = getname(filename); 909 991 int fd = PTR_ERR(tmp); 910 992 911 993 if (!IS_ERR(tmp)) { 912 994 fd = get_unused_fd_flags(flags); 913 995 if (fd >= 0) { 914 - struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 996 + struct file *f = do_filp_open(dfd, tmp, &op, lookup); 915 997 if (IS_ERR(f)) { 916 998 put_unused_fd(fd); 917 999 fd = PTR_ERR(f); ··· 1076 968 if (filp->f_op && filp->f_op->flush) 1077 969 retval = filp->f_op->flush(filp, id); 1078 970 1079 - dnotify_flush(filp, id); 1080 - locks_remove_posix(filp, id); 971 + if (likely(!(filp->f_mode & FMODE_PATH))) { 972 + dnotify_flush(filp, id); 973 + locks_remove_posix(filp, id); 974 + } 1081 975 fput(filp); 1082 976 return retval; 1083 977 }
+6 -1
fs/reiserfs/inode.c
··· 1593 1593 struct inode *inode = dentry->d_inode; 1594 1594 int maxlen = *lenp; 1595 1595 1596 - if (maxlen < 3) 1596 + if (need_parent && (maxlen < 5)) { 1597 + *lenp = 5; 1597 1598 return 255; 1599 + } else if (maxlen < 3) { 1600 + *lenp = 3; 1601 + return 255; 1602 + } 1598 1603 1599 1604 data[0] = inode->i_ino; 1600 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
-4
fs/reiserfs/namei.c
··· 1122 1122 reiserfs_write_unlock(dir->i_sb); 1123 1123 return -EMLINK; 1124 1124 } 1125 - if (inode->i_nlink == 0) { 1126 - reiserfs_write_unlock(dir->i_sb); 1127 - return -ENOENT; 1128 - } 1129 1125 1130 1126 /* inc before scheduling so reiserfs_unlink knows we are here */ 1131 1127 inc_nlink(inode);
+5 -2
fs/stat.c
··· 75 75 int error = -EINVAL; 76 76 int lookup_flags = 0; 77 77 78 - if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | 79 + AT_EMPTY_PATH)) != 0) 79 80 goto out; 80 81 81 82 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 83 lookup_flags |= LOOKUP_FOLLOW; 83 84 if (flag & AT_NO_AUTOMOUNT) 84 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 86 + if (flag & AT_EMPTY_PATH) 87 + lookup_flags |= LOOKUP_EMPTY; 85 88 86 89 error = user_path_at(dfd, filename, lookup_flags, &path); 87 90 if (error) ··· 300 297 if (bufsiz <= 0) 301 298 return -EINVAL; 302 299 303 - error = user_path_at(dfd, pathname, 0, &path); 300 + error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); 304 301 if (!error) { 305 302 struct inode *inode = path.dentry->d_inode; 306 303
+81 -95
fs/statfs.c
··· 73 73 } 74 74 EXPORT_SYMBOL(vfs_statfs); 75 75 76 - static int do_statfs_native(struct path *path, struct statfs *buf) 76 + int user_statfs(const char __user *pathname, struct kstatfs *st) 77 77 { 78 - struct kstatfs st; 79 - int retval; 78 + struct path path; 79 + int error = user_path(pathname, &path); 80 + if (!error) { 81 + error = vfs_statfs(&path, st); 82 + path_put(&path); 83 + } 84 + return error; 85 + } 80 86 81 - retval = vfs_statfs(path, &st); 82 - if (retval) 83 - return retval; 87 + int fd_statfs(int fd, struct kstatfs *st) 88 + { 89 + struct file *file = fget(fd); 90 + int error = -EBADF; 91 + if (file) { 92 + error = vfs_statfs(&file->f_path, st); 93 + fput(file); 94 + } 95 + return error; 96 + } 84 97 85 - if (sizeof(*buf) == sizeof(st)) 86 - memcpy(buf, &st, sizeof(st)); 98 + static int do_statfs_native(struct kstatfs *st, struct statfs __user *p) 99 + { 100 + struct statfs buf; 101 + 102 + if (sizeof(buf) == sizeof(*st)) 103 + memcpy(&buf, st, sizeof(*st)); 87 104 else { 88 - if (sizeof buf->f_blocks == 4) { 89 - if ((st.f_blocks | st.f_bfree | st.f_bavail | 90 - st.f_bsize | st.f_frsize) & 105 + if (sizeof buf.f_blocks == 4) { 106 + if ((st->f_blocks | st->f_bfree | st->f_bavail | 107 + st->f_bsize | st->f_frsize) & 91 108 0xffffffff00000000ULL) 92 109 return -EOVERFLOW; 93 110 /* 94 111 * f_files and f_ffree may be -1; it's okay to stuff 95 112 * that into 32 bits 96 113 */ 97 - if (st.f_files != -1 && 98 - (st.f_files & 0xffffffff00000000ULL)) 114 + if (st->f_files != -1 && 115 + (st->f_files & 0xffffffff00000000ULL)) 99 116 return -EOVERFLOW; 100 - if (st.f_ffree != -1 && 101 - (st.f_ffree & 0xffffffff00000000ULL)) 117 + if (st->f_ffree != -1 && 118 + (st->f_ffree & 0xffffffff00000000ULL)) 102 119 return -EOVERFLOW; 103 120 } 104 121 105 - buf->f_type = st.f_type; 106 - buf->f_bsize = st.f_bsize; 107 - buf->f_blocks = st.f_blocks; 108 - buf->f_bfree = st.f_bfree; 109 - buf->f_bavail = st.f_bavail; 110 - buf->f_files = st.f_files; 111 - buf->f_ffree = st.f_ffree; 112 - buf->f_fsid = st.f_fsid; 113 - buf->f_namelen = st.f_namelen; 114 - buf->f_frsize = st.f_frsize; 115 - buf->f_flags = st.f_flags; 116 - memset(buf->f_spare, 0, sizeof(buf->f_spare)); 122 + buf.f_type = st->f_type; 123 + buf.f_bsize = st->f_bsize; 124 + buf.f_blocks = st->f_blocks; 125 + buf.f_bfree = st->f_bfree; 126 + buf.f_bavail = st->f_bavail; 127 + buf.f_files = st->f_files; 128 + buf.f_ffree = st->f_ffree; 129 + buf.f_fsid = st->f_fsid; 130 + buf.f_namelen = st->f_namelen; 131 + buf.f_frsize = st->f_frsize; 132 + buf.f_flags = st->f_flags; 133 + memset(buf.f_spare, 0, sizeof(buf.f_spare)); 117 134 } 135 + if (copy_to_user(p, &buf, sizeof(buf))) 136 + return -EFAULT; 118 137 return 0; 119 138 } 120 139 121 - static int do_statfs64(struct path *path, struct statfs64 *buf) 140 + static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p) 122 141 { 123 - struct kstatfs st; 124 - int retval; 125 - 126 - retval = vfs_statfs(path, &st); 127 - if (retval) 128 - return retval; 129 - 130 - if (sizeof(*buf) == sizeof(st)) 131 - memcpy(buf, &st, sizeof(st)); 142 + struct statfs64 buf; 143 + if (sizeof(buf) == sizeof(*st)) 144 + memcpy(&buf, st, sizeof(*st)); 132 145 else { 133 - buf->f_type = st.f_type; 134 - buf->f_bsize = st.f_bsize; 135 - buf->f_blocks = st.f_blocks; 136 - buf->f_bfree = st.f_bfree; 137 - buf->f_bavail = st.f_bavail; 138 - buf->f_files = st.f_files; 139 - buf->f_ffree = st.f_ffree; 140 - buf->f_fsid = st.f_fsid; 141 - buf->f_namelen = st.f_namelen; 142 - buf->f_frsize = st.f_frsize; 143 - buf->f_flags = st.f_flags; 144 - memset(buf->f_spare, 0, sizeof(buf->f_spare)); 146 + buf.f_type = st->f_type; 147 + buf.f_bsize = st->f_bsize; 148 + buf.f_blocks = st->f_blocks; 149 + buf.f_bfree = st->f_bfree; 150 + buf.f_bavail = st->f_bavail; 151 + buf.f_files = st->f_files; 152 + buf.f_ffree = st->f_ffree; 153 + buf.f_fsid = st->f_fsid; 154 + buf.f_namelen = st->f_namelen; 155 + buf.f_frsize = st->f_frsize; 156 + buf.f_flags = st->f_flags; 157 + memset(buf.f_spare, 0, sizeof(buf.f_spare)); 145 158 } 159 + if (copy_to_user(p, &buf, sizeof(buf))) 160 + return -EFAULT; 146 161 return 0; 147 162 } 148 163 149 164 SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 150 165 { 151 - struct path path; 152 - int error; 153 - 154 - error = user_path(pathname, &path); 155 - if (!error) { 156 - struct statfs tmp; 157 - error = do_statfs_native(&path, &tmp); 158 - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 159 - error = -EFAULT; 160 - path_put(&path); 161 - } 166 + struct kstatfs st; 167 + int error = user_statfs(pathname, &st); 168 + if (!error) 169 + error = do_statfs_native(&st, buf); 162 170 return error; 163 171 } 164 172 165 173 SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 166 174 { 167 - struct path path; 168 - long error; 169 - 175 + struct kstatfs st; 176 + int error; 170 177 if (sz != sizeof(*buf)) 171 178 return -EINVAL; 172 - error = user_path(pathname, &path); 173 - if (!error) { 174 - struct statfs64 tmp; 175 - error = do_statfs64(&path, &tmp); 176 - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 177 - error = -EFAULT; 178 - path_put(&path); 179 - } 179 + error = user_statfs(pathname, &st); 180 + if (!error) 181 + error = do_statfs64(&st, buf); 180 182 return error; 181 183 } 182 184 183 185 SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 184 186 { 185 - struct file *file; 186 - struct statfs tmp; 187 - int error; 188 - 189 - error = -EBADF; 190 - file = fget(fd); 191 - if (!file) 192 - goto out; 193 - error = do_statfs_native(&file->f_path, &tmp); 194 - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 195 - error = -EFAULT; 196 - fput(file); 197 - out: 187 + struct kstatfs st; 188 + int error = fd_statfs(fd, &st); 189 + if (!error) 190 + error = do_statfs_native(&st, buf); 198 191 return error; 199 192 } 200 193 201 194 SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 202 195 { 203 - struct file *file; 204 - struct statfs64 tmp; 196 + struct kstatfs st; 205 197 int error; 206 198 207 199 if (sz != sizeof(*buf)) 208 200 return -EINVAL; 209 201 210 - error = -EBADF; 211 - file = fget(fd); 212 - if (!file) 213 - goto out; 214 - error = do_statfs64(&file->f_path, &tmp); 215 - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 216 - error = -EFAULT; 217 - fput(file); 218 - out: 202 + error = fd_statfs(fd, &st); 203 + if (!error) 204 + error = do_statfs64(&st, buf); 219 205 return error; 220 206 } 221 207
-18
fs/ubifs/dir.c
··· 522 522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 523 523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 524 524 525 - /* 526 - * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing 527 - * otherwise has the potential to corrupt the orphan inode list. 528 - * 529 - * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and 530 - * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not 531 - * lock 'dirA->i_mutex', so this is possible. Both of the functions 532 - * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes 533 - * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this 534 - * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA' 535 - * to the list of orphans. After this, 'vfs_link()' will link 536 - * 'dirB/fileB' to 'inodeA'. This is a problem because, for example, 537 - * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode 538 - * to the list of orphans. 539 - */ 540 - if (inode->i_nlink == 0) 541 - return -ENOENT; 542 - 543 525 err = dbg_check_synced_i_size(inode); 544 526 if (err) 545 527 return err;
+6 -1
fs/udf/namei.c
··· 1286 1286 struct fid *fid = (struct fid *)fh; 1287 1287 int type = FILEID_UDF_WITHOUT_PARENT; 1288 1288 1289 - if (len < 3 || (connectable && len < 5)) 1289 + if (connectable && (len < 5)) { 1290 + *lenp = 5; 1290 1291 return 255; 1292 + } else if (len < 3) { 1293 + *lenp = 3; 1294 + return 255; 1295 + } 1291 1296 1292 1297 *lenp = 3; 1293 1298 fid->udf.block = location.logicalBlockNum;
+3 -1
fs/xfs/linux-2.6/xfs_export.c
··· 89 89 * seven combinations work. The real answer is "don't use v2". 90 90 */ 91 91 len = xfs_fileid_length(fileid_type); 92 - if (*max_len < len) 92 + if (*max_len < len) { 93 + *max_len = len; 93 94 return 255; 95 + } 94 96 *max_len = len; 95 97 96 98 switch (fileid_type) {
+4
include/asm-generic/fcntl.h
··· 80 80 #define O_SYNC (__O_SYNC|O_DSYNC) 81 81 #endif 82 82 83 + #ifndef O_PATH 84 + #define O_PATH 010000000 85 + #endif 86 + 83 87 #ifndef O_NDELAY 84 88 #define O_NDELAY O_NONBLOCK 85 89 #endif
+5 -1
include/asm-generic/unistd.h
··· 646 646 __SYSCALL(__NR_fanotify_init, sys_fanotify_init) 647 647 #define __NR_fanotify_mark 263 648 648 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 649 + #define __NR_name_to_handle_at 264 650 + __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) 651 + #define __NR_open_by_handle_at 265 652 + __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) 649 653 650 654 #undef __NR_syscalls 651 - #define __NR_syscalls 264 655 + #define __NR_syscalls 266 652 656 653 657 /* 654 658 * All syscalls below here should go away really,
+7 -2
include/linux/exportfs.h
··· 8 8 struct super_block; 9 9 struct vfsmount; 10 10 11 + /* limit the handle size to NFSv4 handle size now */ 12 + #define MAX_HANDLE_SZ 128 13 + 11 14 /* 12 15 * The fileid_type identifies how the file within the filesystem is encoded. 13 16 * In theory this is freely set and parsed by the filesystem, but we try to ··· 124 121 * set, the encode_fh() should store sufficient information so that a good 125 122 * attempt can be made to find not only the file but also it's place in the 126 123 * filesystem. This typically means storing a reference to de->d_parent in 127 - * the filehandle fragment. encode_fh() should return the number of bytes 128 - * stored or a negative error code such as %-ENOSPC 124 + * the filehandle fragment. encode_fh() should return the fileid_type on 125 + * success and on error returns 255 (if the space needed to encode fh is 126 + * greater than @max_len*4 bytes). On error @max_len contains the minimum 127 + * size(in 4 byte unit) needed to encode the file handle. 129 128 * 130 129 * fh_to_dentry: 131 130 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle
+1
include/linux/fcntl.h
··· 46 46 unlinking file. */ 47 47 #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ 48 48 #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ 49 + #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ 49 50 50 51 #ifdef __KERNEL__ 51 52
+2
include/linux/file.h
··· 29 29 30 30 extern struct file *fget(unsigned int fd); 31 31 extern struct file *fget_light(unsigned int fd, int *fput_needed); 32 + extern struct file *fget_raw(unsigned int fd); 33 + extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); 32 34 extern void set_close_on_exec(unsigned int fd, int flag); 33 35 extern void put_filp(struct file *); 34 36 extern int alloc_fd(unsigned start, unsigned flags);
+15 -4
include/linux/fs.h
··· 102 102 /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 103 103 #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 104 104 105 + /* File is opened with O_PATH; almost nothing can be done with it */ 106 + #define FMODE_PATH ((__force fmode_t)0x4000) 107 + 105 108 /* File was opened by fanotify and shouldn't generate fanotify events */ 106 109 #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 107 110 ··· 981 978 #endif 982 979 }; 983 980 981 + struct file_handle { 982 + __u32 handle_bytes; 983 + int handle_type; 984 + /* file identifier */ 985 + unsigned char f_handle[0]; 986 + }; 987 + 984 988 #define get_file(x) atomic_long_inc(&(x)->f_count) 985 989 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 986 990 #define file_count(x) atomic_long_read(&(x)->f_count) ··· 1411 1401 wait_queue_head_t s_wait_unfrozen; 1412 1402 1413 1403 char s_id[32]; /* Informational name */ 1404 + u8 s_uuid[16]; /* UUID */ 1414 1405 1415 1406 void *s_fs_info; /* Filesystem private info */ 1416 1407 fmode_t s_mode; ··· 1885 1874 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1886 1875 struct vfsmount *); 1887 1876 extern int vfs_statfs(struct path *, struct kstatfs *); 1877 + extern int user_statfs(const char __user *, struct kstatfs *); 1878 + extern int fd_statfs(int, struct kstatfs *); 1888 1879 extern int statfs_by_dentry(struct dentry *, struct kstatfs *); 1889 1880 extern int freeze_super(struct super_block *super); 1890 1881 extern int thaw_super(struct super_block *super); ··· 2003 1990 extern long do_sys_open(int dfd, const char __user *filename, int flags, 2004 1991 int mode); 2005 1992 extern struct file *filp_open(const char *, int, int); 1993 + extern struct file *file_open_root(struct dentry *, struct vfsmount *, 1994 + const char *, int); 2006 1995 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 2007 1996 const struct cred *); 2008 1997 extern int filp_close(struct file *, fl_owner_t id); ··· 2219 2204 extern struct file *create_read_pipe(struct file *f, int flags); 2220 2205 extern struct file *create_write_pipe(int flags); 2221 2206 extern void free_write_pipe(struct file *); 2222 - 2223 - extern struct file *do_filp_open(int dfd, const char *pathname, 2224 - int open_flag, int mode, int acc_mode); 2225 - extern int may_open(struct path *, int, int); 2226 2207 2227 2208 extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2228 2209 extern struct file * open_exec(const char *);
+5 -2
include/linux/namei.h
··· 19 19 struct path path; 20 20 struct qstr last; 21 21 struct path root; 22 - struct file *file; 23 22 struct inode *inode; /* path.dentry.d_inode */ 24 23 unsigned int flags; 25 24 unsigned seq; ··· 62 63 #define LOOKUP_EXCL 0x0400 63 64 #define LOOKUP_RENAME_TARGET 0x0800 64 65 66 + #define LOOKUP_JUMPED 0x1000 67 + #define LOOKUP_ROOT 0x2000 68 + #define LOOKUP_EMPTY 0x4000 69 + 65 70 extern int user_path_at(int, const char __user *, unsigned, struct path *); 66 71 67 72 #define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) ··· 75 72 76 73 extern int kern_path(const char *, unsigned, struct path *); 77 74 78 - extern int path_lookup(const char *, unsigned, struct nameidata *); 75 + extern int kern_path_parent(const char *, struct nameidata *); 79 76 extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 80 77 const char *, unsigned int, struct nameidata *); 81 78
+7 -1
include/linux/syscalls.h
··· 62 62 struct getcpu_cache; 63 63 struct old_linux_dirent; 64 64 struct perf_event_attr; 65 + struct file_handle; 65 66 66 67 #include <linux/types.h> 67 68 #include <linux/aio_abi.h> ··· 833 832 unsigned long prot, unsigned long flags, 834 833 unsigned long fd, unsigned long pgoff); 835 834 asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); 836 - 835 + asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, 836 + struct file_handle __user *handle, 837 + int __user *mnt_id, int flag); 838 + asmlinkage long sys_open_by_handle_at(int mountdirfd, 839 + struct file_handle __user *handle, 840 + int flags); 837 841 #endif
+12
init/Kconfig
··· 287 287 for processing it. A preliminary version of these tools is available 288 288 at <http://www.gnu.org/software/acct/>. 289 289 290 + config FHANDLE 291 + bool "open by fhandle syscalls" 292 + select EXPORTFS 293 + help 294 + If you say Y here, a user level program will be able to map 295 + file names to handle and then later use the handle for 296 + different file system operations. This is useful in implementing 297 + userspace file servers, which now track files using handles instead 298 + of names. The handle would remain the same even if file names 299 + get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) 300 + syscalls. 301 + 290 302 config TASKSTATS 291 303 bool "Export task/process statistics through netlink (EXPERIMENTAL)" 292 304 depends on NET
+33 -54
kernel/audit_watch.c
··· 144 144 } 145 145 146 146 /* Initialize a parent watch entry. */ 147 - static struct audit_parent *audit_init_parent(struct nameidata *ndp) 147 + static struct audit_parent *audit_init_parent(struct path *path) 148 148 { 149 - struct inode *inode = ndp->path.dentry->d_inode; 149 + struct inode *inode = path->dentry->d_inode; 150 150 struct audit_parent *parent; 151 151 int ret; 152 152 ··· 353 353 } 354 354 355 355 /* Get path information necessary for adding watches. */ 356 - static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) 356 + static int audit_get_nd(struct audit_watch *watch, struct path *parent) 357 357 { 358 - struct nameidata *ndparent, *ndwatch; 358 + struct nameidata nd; 359 + struct dentry *d; 359 360 int err; 360 361 361 - ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); 362 - if (unlikely(!ndparent)) 363 - return -ENOMEM; 364 - 365 - ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); 366 - if (unlikely(!ndwatch)) { 367 - kfree(ndparent); 368 - return -ENOMEM; 369 - } 370 - 371 - err = path_lookup(path, LOOKUP_PARENT, ndparent); 372 - if (err) { 373 - kfree(ndparent); 374 - kfree(ndwatch); 362 + err = kern_path_parent(watch->path, &nd); 363 + if (err) 375 364 return err; 365 + 366 + if (nd.last_type != LAST_NORM) { 367 + path_put(&nd.path); 368 + return -EINVAL; 376 369 } 377 370 378 - err = path_lookup(path, 0, ndwatch); 379 - if (err) { 380 - kfree(ndwatch); 381 - ndwatch = NULL; 371 + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 372 + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); 373 + if (IS_ERR(d)) { 374 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 375 + path_put(&nd.path); 376 + return PTR_ERR(d); 382 377 } 378 + if (d->d_inode) { 379 + /* update watch filter fields */ 380 + watch->dev = d->d_inode->i_sb->s_dev; 381 + watch->ino = d->d_inode->i_ino; 382 + } 383 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 383 384 384 - *ndp = ndparent; 385 - *ndw = ndwatch; 386 - 385 + *parent = nd.path; 386 + dput(d); 387 387 return 0; 388 - } 389 - 390 - /* Release resources used for watch path information. */ 391 - static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) 392 - { 393 - if (ndp) { 394 - path_put(&ndp->path); 395 - kfree(ndp); 396 - } 397 - if (ndw) { 398 - path_put(&ndw->path); 399 - kfree(ndw); 400 - } 401 388 } 402 389 403 390 /* Associate the given rule with an existing parent. ··· 427 440 { 428 441 struct audit_watch *watch = krule->watch; 429 442 struct audit_parent *parent; 430 - struct nameidata *ndp = NULL, *ndw = NULL; 443 + struct path parent_path; 431 444 int h, ret = 0; 432 445 433 446 mutex_unlock(&audit_filter_mutex); 434 447 435 448 /* Avoid calling path_lookup under audit_filter_mutex. */ 436 - ret = audit_get_nd(watch->path, &ndp, &ndw); 437 - if (ret) { 438 - /* caller expects mutex locked */ 439 - mutex_lock(&audit_filter_mutex); 440 - goto error; 441 - } 449 + ret = audit_get_nd(watch, &parent_path); 442 450 451 + /* caller expects mutex locked */ 443 452 mutex_lock(&audit_filter_mutex); 444 453 445 - /* update watch filter fields */ 446 - if (ndw) { 447 - watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; 448 - watch->ino = ndw->path.dentry->d_inode->i_ino; 449 - } 454 + if (ret) 455 + return ret; 450 456 451 457 /* either find an old parent or attach a new one */ 452 - parent = audit_find_parent(ndp->path.dentry->d_inode); 458 + parent = audit_find_parent(parent_path.dentry->d_inode); 453 459 if (!parent) { 454 - parent = audit_init_parent(ndp); 460 + parent = audit_init_parent(&parent_path); 455 461 if (IS_ERR(parent)) { 456 462 ret = PTR_ERR(parent); 457 463 goto error; ··· 459 479 h = audit_hash_ino((u32)watch->ino); 460 480 *list = &audit_inode_hash[h]; 461 481 error: 462 - audit_put_nd(ndp, ndw); /* NULL args OK */ 482 + path_put(&parent_path); 463 483 return ret; 464 - 465 484 } 466 485 467 486 void audit_remove_watch_rule(struct audit_krule *krule)
+5
kernel/sys_ni.c
··· 186 186 /* fanotify! */ 187 187 cond_syscall(sys_fanotify_init); 188 188 cond_syscall(sys_fanotify_mark); 189 + 190 + /* open by handle */ 191 + cond_syscall(sys_name_to_handle_at); 192 + cond_syscall(sys_open_by_handle_at); 193 + cond_syscall(compat_sys_open_by_handle_at);
+1 -18
kernel/sysctl_binary.c
··· 1321 1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) 1322 1322 { 1323 1323 const struct bin_table *table = NULL; 1324 - struct nameidata nd; 1325 1324 struct vfsmount *mnt; 1326 1325 struct file *file; 1327 1326 ssize_t result; 1328 1327 char *pathname; 1329 1328 int flags; 1330 - int acc_mode; 1331 1329 1332 1330 pathname = sysctl_getname(name, nlen, &table); 1333 1331 result = PTR_ERR(pathname); ··· 1335 1337 /* How should the sysctl be accessed? */ 1336 1338 if (oldval && oldlen && newval && newlen) { 1337 1339 flags = O_RDWR; 1338 - acc_mode = MAY_READ | MAY_WRITE; 1339 1340 } else if (newval && newlen) { 1340 1341 flags = O_WRONLY; 1341 - acc_mode = MAY_WRITE; 1342 1342 } else if (oldval && oldlen) { 1343 1343 flags = O_RDONLY; 1344 - acc_mode = MAY_READ; 1345 1344 } else { 1346 1345 result = 0; 1347 1346 goto out_putname; 1348 1347 } 1349 1348 1350 1349 mnt = current->nsproxy->pid_ns->proc_mnt; 1351 - result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); 1352 - if (result) 1353 - goto out_putname; 1354 - 1355 - result = may_open(&nd.path, acc_mode, flags); 1356 - if (result) 1357 - goto out_putpath; 1358 - 1359 - file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred()); 1350 + file = file_open_root(mnt->mnt_root, mnt, pathname, flags); 1360 1351 result = PTR_ERR(file); 1361 1352 if (IS_ERR(file)) 1362 1353 goto out_putname; ··· 1357 1370 putname(pathname); 1358 1371 out: 1359 1372 return result; 1360 - 1361 - out_putpath: 1362 - path_put(&nd.path); 1363 - goto out_putname; 1364 1373 } 1365 1374 1366 1375
+3 -1
mm/shmem.c
··· 2144 2144 { 2145 2145 struct inode *inode = dentry->d_inode; 2146 2146 2147 - if (*len < 3) 2147 + if (*len < 3) { 2148 + *len = 3; 2148 2149 return 255; 2150 + } 2149 2151 2150 2152 if (inode_unhashed(inode)) { 2151 2153 /* Unfortunately insert_inode_hash is not idempotent,
+1 -1
net/core/scm.c
··· 95 95 int fd = fdp[i]; 96 96 struct file *file; 97 97 98 - if (fd < 0 || !(file = fget(fd))) 98 + if (fd < 0 || !(file = fget_raw(fd))) 99 99 return -EBADF; 100 100 *fpp++ = file; 101 101 fpl->count++;
+1 -1
net/unix/af_unix.c
··· 850 850 * Get the parent directory, calculate the hash for last 851 851 * component. 852 852 */ 853 - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 853 + err = kern_path_parent(sunaddr->sun_path, &nd); 854 854 if (err) 855 855 goto out_mknod_parent; 856 856
+1 -1
net/unix/garbage.c
··· 104 104 /* 105 105 * Socket ? 106 106 */ 107 - if (S_ISSOCK(inode->i_mode)) { 107 + if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { 108 108 struct socket *sock = SOCKET_I(inode); 109 109 struct sock *s = sock->sk; 110 110