at v2.6.26-rc9 1250 lines 28 kB view raw
1/* 2 * linux/fs/open.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7#include <linux/string.h> 8#include <linux/mm.h> 9#include <linux/file.h> 10#include <linux/fdtable.h> 11#include <linux/quotaops.h> 12#include <linux/fsnotify.h> 13#include <linux/module.h> 14#include <linux/slab.h> 15#include <linux/tty.h> 16#include <linux/namei.h> 17#include <linux/backing-dev.h> 18#include <linux/capability.h> 19#include <linux/securebits.h> 20#include <linux/security.h> 21#include <linux/mount.h> 22#include <linux/vfs.h> 23#include <linux/fcntl.h> 24#include <asm/uaccess.h> 25#include <linux/fs.h> 26#include <linux/personality.h> 27#include <linux/pagemap.h> 28#include <linux/syscalls.h> 29#include <linux/rcupdate.h> 30#include <linux/audit.h> 31#include <linux/falloc.h> 32 33int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 34{ 35 int retval = -ENODEV; 36 37 if (dentry) { 38 retval = -ENOSYS; 39 if (dentry->d_sb->s_op->statfs) { 40 memset(buf, 0, sizeof(*buf)); 41 retval = security_sb_statfs(dentry); 42 if (retval) 43 return retval; 44 retval = dentry->d_sb->s_op->statfs(dentry, buf); 45 if (retval == 0 && buf->f_frsize == 0) 46 buf->f_frsize = buf->f_bsize; 47 } 48 } 49 return retval; 50} 51 52EXPORT_SYMBOL(vfs_statfs); 53 54static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) 55{ 56 struct kstatfs st; 57 int retval; 58 59 retval = vfs_statfs(dentry, &st); 60 if (retval) 61 return retval; 62 63 if (sizeof(*buf) == sizeof(st)) 64 memcpy(buf, &st, sizeof(st)); 65 else { 66 if (sizeof buf->f_blocks == 4) { 67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 68 0xffffffff00000000ULL) 69 return -EOVERFLOW; 70 /* 71 * f_files and f_ffree may be -1; it's okay to stuff 72 * that into 32 bits 73 */ 74 if (st.f_files != -1 && 75 (st.f_files & 0xffffffff00000000ULL)) 76 return -EOVERFLOW; 77 if (st.f_ffree != -1 && 78 (st.f_ffree & 0xffffffff00000000ULL)) 79 return -EOVERFLOW; 80 } 81 82 buf->f_type = st.f_type; 83 buf->f_bsize = st.f_bsize; 84 buf->f_blocks = st.f_blocks; 85 buf->f_bfree = st.f_bfree; 86 buf->f_bavail = st.f_bavail; 87 buf->f_files = st.f_files; 88 buf->f_ffree = st.f_ffree; 89 buf->f_fsid = st.f_fsid; 90 buf->f_namelen = st.f_namelen; 91 buf->f_frsize = st.f_frsize; 92 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 93 } 94 return 0; 95} 96 97static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) 98{ 99 struct kstatfs st; 100 int retval; 101 102 retval = vfs_statfs(dentry, &st); 103 if (retval) 104 return retval; 105 106 if (sizeof(*buf) == sizeof(st)) 107 memcpy(buf, &st, sizeof(st)); 108 else { 109 buf->f_type = st.f_type; 110 buf->f_bsize = st.f_bsize; 111 buf->f_blocks = st.f_blocks; 112 buf->f_bfree = st.f_bfree; 113 buf->f_bavail = st.f_bavail; 114 buf->f_files = st.f_files; 115 buf->f_ffree = st.f_ffree; 116 buf->f_fsid = st.f_fsid; 117 buf->f_namelen = st.f_namelen; 118 buf->f_frsize = st.f_frsize; 119 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 120 } 121 return 0; 122} 123 124asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 125{ 126 struct nameidata nd; 127 int error; 128 129 error = user_path_walk(path, &nd); 130 if (!error) { 131 struct statfs tmp; 132 error = vfs_statfs_native(nd.path.dentry, &tmp); 133 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 134 error = -EFAULT; 135 path_put(&nd.path); 136 } 137 return error; 138} 139 140 141asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 142{ 143 struct nameidata nd; 144 long error; 145 146 if (sz != sizeof(*buf)) 147 return -EINVAL; 148 error = user_path_walk(path, &nd); 149 if (!error) { 150 struct statfs64 tmp; 151 error = vfs_statfs64(nd.path.dentry, &tmp); 152 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 153 error = -EFAULT; 154 path_put(&nd.path); 155 } 156 return error; 157} 158 159 160asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) 161{ 162 struct file * file; 163 struct statfs tmp; 164 int error; 165 166 error = -EBADF; 167 file = fget(fd); 168 if (!file) 169 goto out; 170 error = vfs_statfs_native(file->f_path.dentry, &tmp); 171 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 172 error = -EFAULT; 173 fput(file); 174out: 175 return error; 176} 177 178asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) 179{ 180 struct file * file; 181 struct statfs64 tmp; 182 int error; 183 184 if (sz != sizeof(*buf)) 185 return -EINVAL; 186 187 error = -EBADF; 188 file = fget(fd); 189 if (!file) 190 goto out; 191 error = vfs_statfs64(file->f_path.dentry, &tmp); 192 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 193 error = -EFAULT; 194 fput(file); 195out: 196 return error; 197} 198 199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 200 struct file *filp) 201{ 202 int err; 203 struct iattr newattrs; 204 205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 206 if (length < 0) 207 return -EINVAL; 208 209 newattrs.ia_size = length; 210 newattrs.ia_valid = ATTR_SIZE | time_attrs; 211 if (filp) { 212 newattrs.ia_file = filp; 213 newattrs.ia_valid |= ATTR_FILE; 214 } 215 216 /* Remove suid/sgid on truncate too */ 217 newattrs.ia_valid |= should_remove_suid(dentry); 218 219 mutex_lock(&dentry->d_inode->i_mutex); 220 err = notify_change(dentry, &newattrs); 221 mutex_unlock(&dentry->d_inode->i_mutex); 222 return err; 223} 224 225static long do_sys_truncate(const char __user * path, loff_t length) 226{ 227 struct nameidata nd; 228 struct inode * inode; 229 int error; 230 231 error = -EINVAL; 232 if (length < 0) /* sorry, but loff_t says... */ 233 goto out; 234 235 error = user_path_walk(path, &nd); 236 if (error) 237 goto out; 238 inode = nd.path.dentry->d_inode; 239 240 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 241 error = -EISDIR; 242 if (S_ISDIR(inode->i_mode)) 243 goto dput_and_out; 244 245 error = -EINVAL; 246 if (!S_ISREG(inode->i_mode)) 247 goto dput_and_out; 248 249 error = mnt_want_write(nd.path.mnt); 250 if (error) 251 goto dput_and_out; 252 253 error = vfs_permission(&nd, MAY_WRITE); 254 if (error) 255 goto mnt_drop_write_and_out; 256 257 error = -EPERM; 258 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 259 goto mnt_drop_write_and_out; 260 261 error = get_write_access(inode); 262 if (error) 263 goto mnt_drop_write_and_out; 264 265 /* 266 * Make sure that there are no leases. get_write_access() protects 267 * against the truncate racing with a lease-granting setlease(). 268 */ 269 error = break_lease(inode, FMODE_WRITE); 270 if (error) 271 goto put_write_and_out; 272 273 error = locks_verify_truncate(inode, NULL, length); 274 if (!error) { 275 DQUOT_INIT(inode); 276 error = do_truncate(nd.path.dentry, length, 0, NULL); 277 } 278 279put_write_and_out: 280 put_write_access(inode); 281mnt_drop_write_and_out: 282 mnt_drop_write(nd.path.mnt); 283dput_and_out: 284 path_put(&nd.path); 285out: 286 return error; 287} 288 289asmlinkage long sys_truncate(const char __user * path, unsigned long length) 290{ 291 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 292 return do_sys_truncate(path, (long)length); 293} 294 295static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 296{ 297 struct inode * inode; 298 struct dentry *dentry; 299 struct file * file; 300 int error; 301 302 error = -EINVAL; 303 if (length < 0) 304 goto out; 305 error = -EBADF; 306 file = fget(fd); 307 if (!file) 308 goto out; 309 310 /* explicitly opened as large or we are on 64-bit box */ 311 if (file->f_flags & O_LARGEFILE) 312 small = 0; 313 314 dentry = file->f_path.dentry; 315 inode = dentry->d_inode; 316 error = -EINVAL; 317 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 318 goto out_putf; 319 320 error = -EINVAL; 321 /* Cannot ftruncate over 2^31 bytes without large file support */ 322 if (small && length > MAX_NON_LFS) 323 goto out_putf; 324 325 error = -EPERM; 326 if (IS_APPEND(inode)) 327 goto out_putf; 328 329 error = locks_verify_truncate(inode, file, length); 330 if (!error) 331 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 332out_putf: 333 fput(file); 334out: 335 return error; 336} 337 338asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 339{ 340 long ret = do_sys_ftruncate(fd, length, 1); 341 /* avoid REGPARM breakage on x86: */ 342 asmlinkage_protect(2, ret, fd, length); 343 return ret; 344} 345 346/* LFS versions of truncate are only needed on 32 bit machines */ 347#if BITS_PER_LONG == 32 348asmlinkage long sys_truncate64(const char __user * path, loff_t length) 349{ 350 return do_sys_truncate(path, length); 351} 352 353asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 354{ 355 long ret = do_sys_ftruncate(fd, length, 0); 356 /* avoid REGPARM breakage on x86: */ 357 asmlinkage_protect(2, ret, fd, length); 358 return ret; 359} 360#endif 361 362asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) 363{ 364 struct file *file; 365 struct inode *inode; 366 long ret = -EINVAL; 367 368 if (offset < 0 || len <= 0) 369 goto out; 370 371 /* Return error if mode is not supported */ 372 ret = -EOPNOTSUPP; 373 if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) 374 goto out; 375 376 ret = -EBADF; 377 file = fget(fd); 378 if (!file) 379 goto out; 380 if (!(file->f_mode & FMODE_WRITE)) 381 goto out_fput; 382 /* 383 * Revalidate the write permissions, in case security policy has 384 * changed since the files were opened. 385 */ 386 ret = security_file_permission(file, MAY_WRITE); 387 if (ret) 388 goto out_fput; 389 390 inode = file->f_path.dentry->d_inode; 391 392 ret = -ESPIPE; 393 if (S_ISFIFO(inode->i_mode)) 394 goto out_fput; 395 396 ret = -ENODEV; 397 /* 398 * Let individual file system decide if it supports preallocation 399 * for directories or not. 400 */ 401 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 402 goto out_fput; 403 404 ret = -EFBIG; 405 /* Check for wrap through zero too */ 406 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 407 goto out_fput; 408 409 if (inode->i_op && inode->i_op->fallocate) 410 ret = inode->i_op->fallocate(inode, mode, offset, len); 411 else 412 ret = -EOPNOTSUPP; 413 414out_fput: 415 fput(file); 416out: 417 return ret; 418} 419 420/* 421 * access() needs to use the real uid/gid, not the effective uid/gid. 422 * We do this by temporarily clearing all FS-related capabilities and 423 * switching the fsuid/fsgid around to the real ones. 424 */ 425asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 426{ 427 struct nameidata nd; 428 int old_fsuid, old_fsgid; 429 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ 430 int res; 431 432 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 433 return -EINVAL; 434 435 old_fsuid = current->fsuid; 436 old_fsgid = current->fsgid; 437 438 current->fsuid = current->uid; 439 current->fsgid = current->gid; 440 441 if (!issecure(SECURE_NO_SETUID_FIXUP)) { 442 /* 443 * Clear the capabilities if we switch to a non-root user 444 */ 445#ifndef CONFIG_SECURITY_FILE_CAPABILITIES 446 /* 447 * FIXME: There is a race here against sys_capset. The 448 * capabilities can change yet we will restore the old 449 * value below. We should hold task_capabilities_lock, 450 * but we cannot because user_path_walk can sleep. 451 */ 452#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 453 if (current->uid) 454 old_cap = cap_set_effective(__cap_empty_set); 455 else 456 old_cap = cap_set_effective(current->cap_permitted); 457 } 458 459 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 460 if (res) 461 goto out; 462 463 res = vfs_permission(&nd, mode); 464 /* SuS v2 requires we report a read only fs too */ 465 if(res || !(mode & S_IWOTH) || 466 special_file(nd.path.dentry->d_inode->i_mode)) 467 goto out_path_release; 468 /* 469 * This is a rare case where using __mnt_is_readonly() 470 * is OK without a mnt_want/drop_write() pair. Since 471 * no actual write to the fs is performed here, we do 472 * not need to telegraph to that to anyone. 473 * 474 * By doing this, we accept that this access is 475 * inherently racy and know that the fs may change 476 * state before we even see this result. 477 */ 478 if (__mnt_is_readonly(nd.path.mnt)) 479 res = -EROFS; 480 481out_path_release: 482 path_put(&nd.path); 483out: 484 current->fsuid = old_fsuid; 485 current->fsgid = old_fsgid; 486 487 if (!issecure(SECURE_NO_SETUID_FIXUP)) 488 cap_set_effective(old_cap); 489 490 return res; 491} 492 493asmlinkage long sys_access(const char __user *filename, int mode) 494{ 495 return sys_faccessat(AT_FDCWD, filename, mode); 496} 497 498asmlinkage long sys_chdir(const char __user * filename) 499{ 500 struct nameidata nd; 501 int error; 502 503 error = __user_walk(filename, 504 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); 505 if (error) 506 goto out; 507 508 error = vfs_permission(&nd, MAY_EXEC); 509 if (error) 510 goto dput_and_out; 511 512 set_fs_pwd(current->fs, &nd.path); 513 514dput_and_out: 515 path_put(&nd.path); 516out: 517 return error; 518} 519 520asmlinkage long sys_fchdir(unsigned int fd) 521{ 522 struct file *file; 523 struct inode *inode; 524 int error; 525 526 error = -EBADF; 527 file = fget(fd); 528 if (!file) 529 goto out; 530 531 inode = file->f_path.dentry->d_inode; 532 533 error = -ENOTDIR; 534 if (!S_ISDIR(inode->i_mode)) 535 goto out_putf; 536 537 error = file_permission(file, MAY_EXEC); 538 if (!error) 539 set_fs_pwd(current->fs, &file->f_path); 540out_putf: 541 fput(file); 542out: 543 return error; 544} 545 546asmlinkage long sys_chroot(const char __user * filename) 547{ 548 struct nameidata nd; 549 int error; 550 551 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 552 if (error) 553 goto out; 554 555 error = vfs_permission(&nd, MAY_EXEC); 556 if (error) 557 goto dput_and_out; 558 559 error = -EPERM; 560 if (!capable(CAP_SYS_CHROOT)) 561 goto dput_and_out; 562 563 set_fs_root(current->fs, &nd.path); 564 set_fs_altroot(); 565 error = 0; 566dput_and_out: 567 path_put(&nd.path); 568out: 569 return error; 570} 571 572asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) 573{ 574 struct inode * inode; 575 struct dentry * dentry; 576 struct file * file; 577 int err = -EBADF; 578 struct iattr newattrs; 579 580 file = fget(fd); 581 if (!file) 582 goto out; 583 584 dentry = file->f_path.dentry; 585 inode = dentry->d_inode; 586 587 audit_inode(NULL, dentry); 588 589 err = mnt_want_write(file->f_path.mnt); 590 if (err) 591 goto out_putf; 592 err = -EPERM; 593 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 594 goto out_drop_write; 595 mutex_lock(&inode->i_mutex); 596 if (mode == (mode_t) -1) 597 mode = inode->i_mode; 598 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 599 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 600 err = notify_change(dentry, &newattrs); 601 mutex_unlock(&inode->i_mutex); 602 603out_drop_write: 604 mnt_drop_write(file->f_path.mnt); 605out_putf: 606 fput(file); 607out: 608 return err; 609} 610 611asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 612 mode_t mode) 613{ 614 struct nameidata nd; 615 struct inode * inode; 616 int error; 617 struct iattr newattrs; 618 619 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 620 if (error) 621 goto out; 622 inode = nd.path.dentry->d_inode; 623 624 error = mnt_want_write(nd.path.mnt); 625 if (error) 626 goto dput_and_out; 627 628 error = -EPERM; 629 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 630 goto out_drop_write; 631 632 mutex_lock(&inode->i_mutex); 633 if (mode == (mode_t) -1) 634 mode = inode->i_mode; 635 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 636 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 637 error = notify_change(nd.path.dentry, &newattrs); 638 mutex_unlock(&inode->i_mutex); 639 640out_drop_write: 641 mnt_drop_write(nd.path.mnt); 642dput_and_out: 643 path_put(&nd.path); 644out: 645 return error; 646} 647 648asmlinkage long sys_chmod(const char __user *filename, mode_t mode) 649{ 650 return sys_fchmodat(AT_FDCWD, filename, mode); 651} 652 653static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 654{ 655 struct inode * inode; 656 int error; 657 struct iattr newattrs; 658 659 error = -ENOENT; 660 if (!(inode = dentry->d_inode)) { 661 printk(KERN_ERR "chown_common: NULL inode\n"); 662 goto out; 663 } 664 error = -EPERM; 665 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 666 goto out; 667 newattrs.ia_valid = ATTR_CTIME; 668 if (user != (uid_t) -1) { 669 newattrs.ia_valid |= ATTR_UID; 670 newattrs.ia_uid = user; 671 } 672 if (group != (gid_t) -1) { 673 newattrs.ia_valid |= ATTR_GID; 674 newattrs.ia_gid = group; 675 } 676 if (!S_ISDIR(inode->i_mode)) 677 newattrs.ia_valid |= 678 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 679 mutex_lock(&inode->i_mutex); 680 error = notify_change(dentry, &newattrs); 681 mutex_unlock(&inode->i_mutex); 682out: 683 return error; 684} 685 686asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 687{ 688 struct nameidata nd; 689 int error; 690 691 error = user_path_walk(filename, &nd); 692 if (error) 693 goto out; 694 error = mnt_want_write(nd.path.mnt); 695 if (error) 696 goto out_release; 697 error = chown_common(nd.path.dentry, user, group); 698 mnt_drop_write(nd.path.mnt); 699out_release: 700 path_put(&nd.path); 701out: 702 return error; 703} 704 705asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 706 gid_t group, int flag) 707{ 708 struct nameidata nd; 709 int error = -EINVAL; 710 int follow; 711 712 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 713 goto out; 714 715 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 716 error = __user_walk_fd(dfd, filename, follow, &nd); 717 if (error) 718 goto out; 719 error = mnt_want_write(nd.path.mnt); 720 if (error) 721 goto out_release; 722 error = chown_common(nd.path.dentry, user, group); 723 mnt_drop_write(nd.path.mnt); 724out_release: 725 path_put(&nd.path); 726out: 727 return error; 728} 729 730asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 731{ 732 struct nameidata nd; 733 int error; 734 735 error = user_path_walk_link(filename, &nd); 736 if (error) 737 goto out; 738 error = mnt_want_write(nd.path.mnt); 739 if (error) 740 goto out_release; 741 error = chown_common(nd.path.dentry, user, group); 742 mnt_drop_write(nd.path.mnt); 743out_release: 744 path_put(&nd.path); 745out: 746 return error; 747} 748 749 750asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) 751{ 752 struct file * file; 753 int error = -EBADF; 754 struct dentry * dentry; 755 756 file = fget(fd); 757 if (!file) 758 goto out; 759 760 error = mnt_want_write(file->f_path.mnt); 761 if (error) 762 goto out_fput; 763 dentry = file->f_path.dentry; 764 audit_inode(NULL, dentry); 765 error = chown_common(dentry, user, group); 766 mnt_drop_write(file->f_path.mnt); 767out_fput: 768 fput(file); 769out: 770 return error; 771} 772 773/* 774 * You have to be very careful that these write 775 * counts get cleaned up in error cases and 776 * upon __fput(). This should probably never 777 * be called outside of __dentry_open(). 778 */ 779static inline int __get_file_write_access(struct inode *inode, 780 struct vfsmount *mnt) 781{ 782 int error; 783 error = get_write_access(inode); 784 if (error) 785 return error; 786 /* 787 * Do not take mount writer counts on 788 * special files since no writes to 789 * the mount itself will occur. 790 */ 791 if (!special_file(inode->i_mode)) { 792 /* 793 * Balanced in __fput() 794 */ 795 error = mnt_want_write(mnt); 796 if (error) 797 put_write_access(inode); 798 } 799 return error; 800} 801 802static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 803 int flags, struct file *f, 804 int (*open)(struct inode *, struct file *)) 805{ 806 struct inode *inode; 807 int error; 808 809 f->f_flags = flags; 810 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 811 FMODE_PREAD | FMODE_PWRITE; 812 inode = dentry->d_inode; 813 if (f->f_mode & FMODE_WRITE) { 814 error = __get_file_write_access(inode, mnt); 815 if (error) 816 goto cleanup_file; 817 if (!special_file(inode->i_mode)) 818 file_take_write(f); 819 } 820 821 f->f_mapping = inode->i_mapping; 822 f->f_path.dentry = dentry; 823 f->f_path.mnt = mnt; 824 f->f_pos = 0; 825 f->f_op = fops_get(inode->i_fop); 826 file_move(f, &inode->i_sb->s_files); 827 828 error = security_dentry_open(f); 829 if (error) 830 goto cleanup_all; 831 832 if (!open && f->f_op) 833 open = f->f_op->open; 834 if (open) { 835 error = open(inode, f); 836 if (error) 837 goto cleanup_all; 838 } 839 840 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 841 842 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 843 844 /* NB: we're sure to have correct a_ops only after f_op->open */ 845 if (f->f_flags & O_DIRECT) { 846 if (!f->f_mapping->a_ops || 847 ((!f->f_mapping->a_ops->direct_IO) && 848 (!f->f_mapping->a_ops->get_xip_mem))) { 849 fput(f); 850 f = ERR_PTR(-EINVAL); 851 } 852 } 853 854 return f; 855 856cleanup_all: 857 fops_put(f->f_op); 858 if (f->f_mode & FMODE_WRITE) { 859 put_write_access(inode); 860 if (!special_file(inode->i_mode)) { 861 /* 862 * We don't consider this a real 863 * mnt_want/drop_write() pair 864 * because it all happenend right 865 * here, so just reset the state. 866 */ 867 file_reset_write(f); 868 mnt_drop_write(mnt); 869 } 870 } 871 file_kill(f); 872 f->f_path.dentry = NULL; 873 f->f_path.mnt = NULL; 874cleanup_file: 875 put_filp(f); 876 dput(dentry); 877 mntput(mnt); 878 return ERR_PTR(error); 879} 880 881/** 882 * lookup_instantiate_filp - instantiates the open intent filp 883 * @nd: pointer to nameidata 884 * @dentry: pointer to dentry 885 * @open: open callback 886 * 887 * Helper for filesystems that want to use lookup open intents and pass back 888 * a fully instantiated struct file to the caller. 889 * This function is meant to be called from within a filesystem's 890 * lookup method. 891 * Beware of calling it for non-regular files! Those ->open methods might block 892 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, 893 * leading to a deadlock, as nobody can open that fifo anymore, because 894 * another process to open fifo will block on locked parent when doing lookup). 895 * Note that in case of error, nd->intent.open.file is destroyed, but the 896 * path information remains valid. 897 * If the open callback is set to NULL, then the standard f_op->open() 898 * filesystem callback is substituted. 899 */ 900struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 901 int (*open)(struct inode *, struct file *)) 902{ 903 if (IS_ERR(nd->intent.open.file)) 904 goto out; 905 if (IS_ERR(dentry)) 906 goto out_err; 907 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 908 nd->intent.open.flags - 1, 909 nd->intent.open.file, 910 open); 911out: 912 return nd->intent.open.file; 913out_err: 914 release_open_intent(nd); 915 nd->intent.open.file = (struct file *)dentry; 916 goto out; 917} 918EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 919 920/** 921 * nameidata_to_filp - convert a nameidata to an open filp. 922 * @nd: pointer to nameidata 923 * @flags: open flags 924 * 925 * Note that this function destroys the original nameidata 926 */ 927struct file *nameidata_to_filp(struct nameidata *nd, int flags) 928{ 929 struct file *filp; 930 931 /* Pick up the filp from the open intent */ 932 filp = nd->intent.open.file; 933 /* Has the filesystem initialised the file for us? */ 934 if (filp->f_path.dentry == NULL) 935 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 936 NULL); 937 else 938 path_put(&nd->path); 939 return filp; 940} 941 942/* 943 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 944 * error. 945 */ 946struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 947{ 948 int error; 949 struct file *f; 950 951 /* 952 * We must always pass in a valid mount pointer. Historically 953 * callers got away with not passing it, but we must enforce this at 954 * the earliest possible point now to avoid strange problems deep in the 955 * filesystem stack. 956 */ 957 if (!mnt) { 958 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__); 959 dump_stack(); 960 return ERR_PTR(-EINVAL); 961 } 962 963 error = -ENFILE; 964 f = get_empty_filp(); 965 if (f == NULL) { 966 dput(dentry); 967 mntput(mnt); 968 return ERR_PTR(error); 969 } 970 971 return __dentry_open(dentry, mnt, flags, f, NULL); 972} 973EXPORT_SYMBOL(dentry_open); 974 975/* 976 * Find an empty file descriptor entry, and mark it busy. 977 */ 978int get_unused_fd_flags(int flags) 979{ 980 struct files_struct * files = current->files; 981 int fd, error; 982 struct fdtable *fdt; 983 984 error = -EMFILE; 985 spin_lock(&files->file_lock); 986 987repeat: 988 fdt = files_fdtable(files); 989 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, 990 files->next_fd); 991 992 /* 993 * N.B. For clone tasks sharing a files structure, this test 994 * will limit the total number of files that can be opened. 995 */ 996 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 997 goto out; 998 999 /* Do we need to expand the fd array or fd set? */ 1000 error = expand_files(files, fd); 1001 if (error < 0) 1002 goto out; 1003 1004 if (error) { 1005 /* 1006 * If we needed to expand the fs array we 1007 * might have blocked - try again. 1008 */ 1009 error = -EMFILE; 1010 goto repeat; 1011 } 1012 1013 FD_SET(fd, fdt->open_fds); 1014 if (flags & O_CLOEXEC) 1015 FD_SET(fd, fdt->close_on_exec); 1016 else 1017 FD_CLR(fd, fdt->close_on_exec); 1018 files->next_fd = fd + 1; 1019#if 1 1020 /* Sanity check */ 1021 if (fdt->fd[fd] != NULL) { 1022 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 1023 fdt->fd[fd] = NULL; 1024 } 1025#endif 1026 error = fd; 1027 1028out: 1029 spin_unlock(&files->file_lock); 1030 return error; 1031} 1032 1033int get_unused_fd(void) 1034{ 1035 return get_unused_fd_flags(0); 1036} 1037 1038EXPORT_SYMBOL(get_unused_fd); 1039 1040static void __put_unused_fd(struct files_struct *files, unsigned int fd) 1041{ 1042 struct fdtable *fdt = files_fdtable(files); 1043 __FD_CLR(fd, fdt->open_fds); 1044 if (fd < files->next_fd) 1045 files->next_fd = fd; 1046} 1047 1048void put_unused_fd(unsigned int fd) 1049{ 1050 struct files_struct *files = current->files; 1051 spin_lock(&files->file_lock); 1052 __put_unused_fd(files, fd); 1053 spin_unlock(&files->file_lock); 1054} 1055 1056EXPORT_SYMBOL(put_unused_fd); 1057 1058/* 1059 * Install a file pointer in the fd array. 1060 * 1061 * The VFS is full of places where we drop the files lock between 1062 * setting the open_fds bitmap and installing the file in the file 1063 * array. At any such point, we are vulnerable to a dup2() race 1064 * installing a file in the array before us. We need to detect this and 1065 * fput() the struct file we are about to overwrite in this case. 1066 * 1067 * It should never happen - if we allow dup2() do it, _really_ bad things 1068 * will follow. 1069 */ 1070 1071void fd_install(unsigned int fd, struct file *file) 1072{ 1073 struct files_struct *files = current->files; 1074 struct fdtable *fdt; 1075 spin_lock(&files->file_lock); 1076 fdt = files_fdtable(files); 1077 BUG_ON(fdt->fd[fd] != NULL); 1078 rcu_assign_pointer(fdt->fd[fd], file); 1079 spin_unlock(&files->file_lock); 1080} 1081 1082EXPORT_SYMBOL(fd_install); 1083 1084long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1085{ 1086 char *tmp = getname(filename); 1087 int fd = PTR_ERR(tmp); 1088 1089 if (!IS_ERR(tmp)) { 1090 fd = get_unused_fd_flags(flags); 1091 if (fd >= 0) { 1092 struct file *f = do_filp_open(dfd, tmp, flags, mode); 1093 if (IS_ERR(f)) { 1094 put_unused_fd(fd); 1095 fd = PTR_ERR(f); 1096 } else { 1097 fsnotify_open(f->f_path.dentry); 1098 fd_install(fd, f); 1099 } 1100 } 1101 putname(tmp); 1102 } 1103 return fd; 1104} 1105 1106asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1107{ 1108 long ret; 1109 1110 if (force_o_largefile()) 1111 flags |= O_LARGEFILE; 1112 1113 ret = do_sys_open(AT_FDCWD, filename, flags, mode); 1114 /* avoid REGPARM breakage on x86: */ 1115 asmlinkage_protect(3, ret, filename, flags, mode); 1116 return ret; 1117} 1118 1119asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1120 int mode) 1121{ 1122 long ret; 1123 1124 if (force_o_largefile()) 1125 flags |= O_LARGEFILE; 1126 1127 ret = do_sys_open(dfd, filename, flags, mode); 1128 /* avoid REGPARM breakage on x86: */ 1129 asmlinkage_protect(4, ret, dfd, filename, flags, mode); 1130 return ret; 1131} 1132 1133#ifndef __alpha__ 1134 1135/* 1136 * For backward compatibility? Maybe this should be moved 1137 * into arch/i386 instead? 1138 */ 1139asmlinkage long sys_creat(const char __user * pathname, int mode) 1140{ 1141 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1142} 1143 1144#endif 1145 1146/* 1147 * "id" is the POSIX thread ID. We use the 1148 * files pointer for this.. 1149 */ 1150int filp_close(struct file *filp, fl_owner_t id) 1151{ 1152 int retval = 0; 1153 1154 if (!file_count(filp)) { 1155 printk(KERN_ERR "VFS: Close: file count is 0\n"); 1156 return 0; 1157 } 1158 1159 if (filp->f_op && filp->f_op->flush) 1160 retval = filp->f_op->flush(filp, id); 1161 1162 dnotify_flush(filp, id); 1163 locks_remove_posix(filp, id); 1164 fput(filp); 1165 return retval; 1166} 1167 1168EXPORT_SYMBOL(filp_close); 1169 1170/* 1171 * Careful here! We test whether the file pointer is NULL before 1172 * releasing the fd. This ensures that one clone task can't release 1173 * an fd while another clone is opening it. 1174 */ 1175asmlinkage long sys_close(unsigned int fd) 1176{ 1177 struct file * filp; 1178 struct files_struct *files = current->files; 1179 struct fdtable *fdt; 1180 int retval; 1181 1182 spin_lock(&files->file_lock); 1183 fdt = files_fdtable(files); 1184 if (fd >= fdt->max_fds) 1185 goto out_unlock; 1186 filp = fdt->fd[fd]; 1187 if (!filp) 1188 goto out_unlock; 1189 rcu_assign_pointer(fdt->fd[fd], NULL); 1190 FD_CLR(fd, fdt->close_on_exec); 1191 __put_unused_fd(files, fd); 1192 spin_unlock(&files->file_lock); 1193 retval = filp_close(filp, files); 1194 1195 /* can't restart close syscall because file table entry was cleared */ 1196 if (unlikely(retval == -ERESTARTSYS || 1197 retval == -ERESTARTNOINTR || 1198 retval == -ERESTARTNOHAND || 1199 retval == -ERESTART_RESTARTBLOCK)) 1200 retval = -EINTR; 1201 1202 return retval; 1203 1204out_unlock: 1205 spin_unlock(&files->file_lock); 1206 return -EBADF; 1207} 1208 1209EXPORT_SYMBOL(sys_close); 1210 1211/* 1212 * This routine simulates a hangup on the tty, to arrange that users 1213 * are given clean terminals at login time. 1214 */ 1215asmlinkage long sys_vhangup(void) 1216{ 1217 if (capable(CAP_SYS_TTY_CONFIG)) { 1218 /* XXX: this needs locking */ 1219 tty_vhangup(current->signal->tty); 1220 return 0; 1221 } 1222 return -EPERM; 1223} 1224 1225/* 1226 * Called when an inode is about to be open. 1227 * We use this to disallow opening large files on 32bit systems if 1228 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1229 * on this flag in sys_open. 1230 */ 1231int generic_file_open(struct inode * inode, struct file * filp) 1232{ 1233 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1234 return -EOVERFLOW; 1235 return 0; 1236} 1237 1238EXPORT_SYMBOL(generic_file_open); 1239 1240/* 1241 * This is used by subsystems that don't want seekable 1242 * file descriptors 1243 */ 1244int nonseekable_open(struct inode *inode, struct file *filp) 1245{ 1246 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1247 return 0; 1248} 1249 1250EXPORT_SYMBOL(nonseekable_open);