Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.16-rc4 1209 lines 27 kB view raw
1/* 2 * linux/fs/open.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7#include <linux/string.h> 8#include <linux/mm.h> 9#include <linux/utime.h> 10#include <linux/file.h> 11#include <linux/smp_lock.h> 12#include <linux/quotaops.h> 13#include <linux/fsnotify.h> 14#include <linux/module.h> 15#include <linux/slab.h> 16#include <linux/tty.h> 17#include <linux/namei.h> 18#include <linux/backing-dev.h> 19#include <linux/capability.h> 20#include <linux/security.h> 21#include <linux/mount.h> 22#include <linux/vfs.h> 23#include <linux/fcntl.h> 24#include <asm/uaccess.h> 25#include <linux/fs.h> 26#include <linux/personality.h> 27#include <linux/pagemap.h> 28#include <linux/syscalls.h> 29#include <linux/rcupdate.h> 30 31#include <asm/unistd.h> 32 33int vfs_statfs(struct super_block *sb, struct kstatfs *buf) 34{ 35 int retval = -ENODEV; 36 37 if (sb) { 38 retval = -ENOSYS; 39 if (sb->s_op->statfs) { 40 memset(buf, 0, sizeof(*buf)); 41 retval = security_sb_statfs(sb); 42 if (retval) 43 return retval; 44 retval = sb->s_op->statfs(sb, buf); 45 if (retval == 0 && buf->f_frsize == 0) 46 buf->f_frsize = buf->f_bsize; 47 } 48 } 49 return retval; 50} 51 52EXPORT_SYMBOL(vfs_statfs); 53 54static int vfs_statfs_native(struct super_block *sb, struct statfs *buf) 55{ 56 struct kstatfs st; 57 int retval; 58 59 retval = vfs_statfs(sb, &st); 60 if (retval) 61 return retval; 62 63 if (sizeof(*buf) == sizeof(st)) 64 memcpy(buf, &st, sizeof(st)); 65 else { 66 if (sizeof buf->f_blocks == 4) { 67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 68 0xffffffff00000000ULL) 69 return -EOVERFLOW; 70 /* 71 * f_files and f_ffree may be -1; it's okay to stuff 72 * that into 32 bits 73 */ 74 if (st.f_files != -1 && 75 (st.f_files & 0xffffffff00000000ULL)) 76 return -EOVERFLOW; 77 if (st.f_ffree != -1 && 78 (st.f_ffree & 0xffffffff00000000ULL)) 79 return -EOVERFLOW; 80 } 81 82 buf->f_type = st.f_type; 83 buf->f_bsize = st.f_bsize; 84 buf->f_blocks = st.f_blocks; 85 buf->f_bfree = st.f_bfree; 86 buf->f_bavail = st.f_bavail; 87 buf->f_files = st.f_files; 88 buf->f_ffree = st.f_ffree; 89 buf->f_fsid = st.f_fsid; 90 buf->f_namelen = st.f_namelen; 91 buf->f_frsize = st.f_frsize; 92 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 93 } 94 return 0; 95} 96 97static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf) 98{ 99 struct kstatfs st; 100 int retval; 101 102 retval = vfs_statfs(sb, &st); 103 if (retval) 104 return retval; 105 106 if (sizeof(*buf) == sizeof(st)) 107 memcpy(buf, &st, sizeof(st)); 108 else { 109 buf->f_type = st.f_type; 110 buf->f_bsize = st.f_bsize; 111 buf->f_blocks = st.f_blocks; 112 buf->f_bfree = st.f_bfree; 113 buf->f_bavail = st.f_bavail; 114 buf->f_files = st.f_files; 115 buf->f_ffree = st.f_ffree; 116 buf->f_fsid = st.f_fsid; 117 buf->f_namelen = st.f_namelen; 118 buf->f_frsize = st.f_frsize; 119 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 120 } 121 return 0; 122} 123 124asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 125{ 126 struct nameidata nd; 127 int error; 128 129 error = user_path_walk(path, &nd); 130 if (!error) { 131 struct statfs tmp; 132 error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp); 133 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 134 error = -EFAULT; 135 path_release(&nd); 136 } 137 return error; 138} 139 140 141asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 142{ 143 struct nameidata nd; 144 long error; 145 146 if (sz != sizeof(*buf)) 147 return -EINVAL; 148 error = user_path_walk(path, &nd); 149 if (!error) { 150 struct statfs64 tmp; 151 error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp); 152 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 153 error = -EFAULT; 154 path_release(&nd); 155 } 156 return error; 157} 158 159 160asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) 161{ 162 struct file * file; 163 struct statfs tmp; 164 int error; 165 166 error = -EBADF; 167 file = fget(fd); 168 if (!file) 169 goto out; 170 error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp); 171 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 172 error = -EFAULT; 173 fput(file); 174out: 175 return error; 176} 177 178asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) 179{ 180 struct file * file; 181 struct statfs64 tmp; 182 int error; 183 184 if (sz != sizeof(*buf)) 185 return -EINVAL; 186 187 error = -EBADF; 188 file = fget(fd); 189 if (!file) 190 goto out; 191 error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp); 192 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 193 error = -EFAULT; 194 fput(file); 195out: 196 return error; 197} 198 199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 200 struct file *filp) 201{ 202 int err; 203 struct iattr newattrs; 204 205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 206 if (length < 0) 207 return -EINVAL; 208 209 newattrs.ia_size = length; 210 newattrs.ia_valid = ATTR_SIZE | time_attrs; 211 if (filp) { 212 newattrs.ia_file = filp; 213 newattrs.ia_valid |= ATTR_FILE; 214 } 215 216 mutex_lock(&dentry->d_inode->i_mutex); 217 err = notify_change(dentry, &newattrs); 218 mutex_unlock(&dentry->d_inode->i_mutex); 219 return err; 220} 221 222static long do_sys_truncate(const char __user * path, loff_t length) 223{ 224 struct nameidata nd; 225 struct inode * inode; 226 int error; 227 228 error = -EINVAL; 229 if (length < 0) /* sorry, but loff_t says... */ 230 goto out; 231 232 error = user_path_walk(path, &nd); 233 if (error) 234 goto out; 235 inode = nd.dentry->d_inode; 236 237 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 238 error = -EISDIR; 239 if (S_ISDIR(inode->i_mode)) 240 goto dput_and_out; 241 242 error = -EINVAL; 243 if (!S_ISREG(inode->i_mode)) 244 goto dput_and_out; 245 246 error = vfs_permission(&nd, MAY_WRITE); 247 if (error) 248 goto dput_and_out; 249 250 error = -EROFS; 251 if (IS_RDONLY(inode)) 252 goto dput_and_out; 253 254 error = -EPERM; 255 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 256 goto dput_and_out; 257 258 /* 259 * Make sure that there are no leases. 260 */ 261 error = break_lease(inode, FMODE_WRITE); 262 if (error) 263 goto dput_and_out; 264 265 error = get_write_access(inode); 266 if (error) 267 goto dput_and_out; 268 269 error = locks_verify_truncate(inode, NULL, length); 270 if (!error) { 271 DQUOT_INIT(inode); 272 error = do_truncate(nd.dentry, length, 0, NULL); 273 } 274 put_write_access(inode); 275 276dput_and_out: 277 path_release(&nd); 278out: 279 return error; 280} 281 282asmlinkage long sys_truncate(const char __user * path, unsigned long length) 283{ 284 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 285 return do_sys_truncate(path, (long)length); 286} 287 288static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 289{ 290 struct inode * inode; 291 struct dentry *dentry; 292 struct file * file; 293 int error; 294 295 error = -EINVAL; 296 if (length < 0) 297 goto out; 298 error = -EBADF; 299 file = fget(fd); 300 if (!file) 301 goto out; 302 303 /* explicitly opened as large or we are on 64-bit box */ 304 if (file->f_flags & O_LARGEFILE) 305 small = 0; 306 307 dentry = file->f_dentry; 308 inode = dentry->d_inode; 309 error = -EINVAL; 310 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 311 goto out_putf; 312 313 error = -EINVAL; 314 /* Cannot ftruncate over 2^31 bytes without large file support */ 315 if (small && length > MAX_NON_LFS) 316 goto out_putf; 317 318 error = -EPERM; 319 if (IS_APPEND(inode)) 320 goto out_putf; 321 322 error = locks_verify_truncate(inode, file, length); 323 if (!error) 324 error = do_truncate(dentry, length, 0, file); 325out_putf: 326 fput(file); 327out: 328 return error; 329} 330 331asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332{ 333 return do_sys_ftruncate(fd, length, 1); 334} 335 336/* LFS versions of truncate are only needed on 32 bit machines */ 337#if BITS_PER_LONG == 32 338asmlinkage long sys_truncate64(const char __user * path, loff_t length) 339{ 340 return do_sys_truncate(path, length); 341} 342 343asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 344{ 345 return do_sys_ftruncate(fd, length, 0); 346} 347#endif 348 349#ifdef __ARCH_WANT_SYS_UTIME 350 351/* 352 * sys_utime() can be implemented in user-level using sys_utimes(). 353 * Is this for backwards compatibility? If so, why not move it 354 * into the appropriate arch directory (for those architectures that 355 * need it). 356 */ 357 358/* If times==NULL, set access and modification to current time, 359 * must be owner or have write permission. 360 * Else, update from *times, must be owner or super user. 361 */ 362asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) 363{ 364 int error; 365 struct nameidata nd; 366 struct inode * inode; 367 struct iattr newattrs; 368 369 error = user_path_walk(filename, &nd); 370 if (error) 371 goto out; 372 inode = nd.dentry->d_inode; 373 374 error = -EROFS; 375 if (IS_RDONLY(inode)) 376 goto dput_and_out; 377 378 /* Don't worry, the checks are done in inode_change_ok() */ 379 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 380 if (times) { 381 error = -EPERM; 382 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 383 goto dput_and_out; 384 385 error = get_user(newattrs.ia_atime.tv_sec, &times->actime); 386 newattrs.ia_atime.tv_nsec = 0; 387 if (!error) 388 error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime); 389 newattrs.ia_mtime.tv_nsec = 0; 390 if (error) 391 goto dput_and_out; 392 393 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; 394 } else { 395 error = -EACCES; 396 if (IS_IMMUTABLE(inode)) 397 goto dput_and_out; 398 399 if (current->fsuid != inode->i_uid && 400 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 401 goto dput_and_out; 402 } 403 mutex_lock(&inode->i_mutex); 404 error = notify_change(nd.dentry, &newattrs); 405 mutex_unlock(&inode->i_mutex); 406dput_and_out: 407 path_release(&nd); 408out: 409 return error; 410} 411 412#endif 413 414/* If times==NULL, set access and modification to current time, 415 * must be owner or have write permission. 416 * Else, update from *times, must be owner or super user. 417 */ 418long do_utimes(int dfd, char __user *filename, struct timeval *times) 419{ 420 int error; 421 struct nameidata nd; 422 struct inode * inode; 423 struct iattr newattrs; 424 425 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 426 427 if (error) 428 goto out; 429 inode = nd.dentry->d_inode; 430 431 error = -EROFS; 432 if (IS_RDONLY(inode)) 433 goto dput_and_out; 434 435 /* Don't worry, the checks are done in inode_change_ok() */ 436 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 437 if (times) { 438 error = -EPERM; 439 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 440 goto dput_and_out; 441 442 newattrs.ia_atime.tv_sec = times[0].tv_sec; 443 newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000; 444 newattrs.ia_mtime.tv_sec = times[1].tv_sec; 445 newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000; 446 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; 447 } else { 448 error = -EACCES; 449 if (IS_IMMUTABLE(inode)) 450 goto dput_and_out; 451 452 if (current->fsuid != inode->i_uid && 453 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 454 goto dput_and_out; 455 } 456 mutex_lock(&inode->i_mutex); 457 error = notify_change(nd.dentry, &newattrs); 458 mutex_unlock(&inode->i_mutex); 459dput_and_out: 460 path_release(&nd); 461out: 462 return error; 463} 464 465asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) 466{ 467 struct timeval times[2]; 468 469 if (utimes && copy_from_user(&times, utimes, sizeof(times))) 470 return -EFAULT; 471 return do_utimes(dfd, filename, utimes ? times : NULL); 472} 473 474asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) 475{ 476 return sys_futimesat(AT_FDCWD, filename, utimes); 477} 478 479 480/* 481 * access() needs to use the real uid/gid, not the effective uid/gid. 482 * We do this by temporarily clearing all FS-related capabilities and 483 * switching the fsuid/fsgid around to the real ones. 484 */ 485asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 486{ 487 struct nameidata nd; 488 int old_fsuid, old_fsgid; 489 kernel_cap_t old_cap; 490 int res; 491 492 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 493 return -EINVAL; 494 495 old_fsuid = current->fsuid; 496 old_fsgid = current->fsgid; 497 old_cap = current->cap_effective; 498 499 current->fsuid = current->uid; 500 current->fsgid = current->gid; 501 502 /* 503 * Clear the capabilities if we switch to a non-root user 504 * 505 * FIXME: There is a race here against sys_capset. The 506 * capabilities can change yet we will restore the old 507 * value below. We should hold task_capabilities_lock, 508 * but we cannot because user_path_walk can sleep. 509 */ 510 if (current->uid) 511 cap_clear(current->cap_effective); 512 else 513 current->cap_effective = current->cap_permitted; 514 515 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 516 if (!res) { 517 res = vfs_permission(&nd, mode); 518 /* SuS v2 requires we report a read only fs too */ 519 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) 520 && !special_file(nd.dentry->d_inode->i_mode)) 521 res = -EROFS; 522 path_release(&nd); 523 } 524 525 current->fsuid = old_fsuid; 526 current->fsgid = old_fsgid; 527 current->cap_effective = old_cap; 528 529 return res; 530} 531 532asmlinkage long sys_access(const char __user *filename, int mode) 533{ 534 return sys_faccessat(AT_FDCWD, filename, mode); 535} 536 537asmlinkage long sys_chdir(const char __user * filename) 538{ 539 struct nameidata nd; 540 int error; 541 542 error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); 543 if (error) 544 goto out; 545 546 error = vfs_permission(&nd, MAY_EXEC); 547 if (error) 548 goto dput_and_out; 549 550 set_fs_pwd(current->fs, nd.mnt, nd.dentry); 551 552dput_and_out: 553 path_release(&nd); 554out: 555 return error; 556} 557 558asmlinkage long sys_fchdir(unsigned int fd) 559{ 560 struct file *file; 561 struct dentry *dentry; 562 struct inode *inode; 563 struct vfsmount *mnt; 564 int error; 565 566 error = -EBADF; 567 file = fget(fd); 568 if (!file) 569 goto out; 570 571 dentry = file->f_dentry; 572 mnt = file->f_vfsmnt; 573 inode = dentry->d_inode; 574 575 error = -ENOTDIR; 576 if (!S_ISDIR(inode->i_mode)) 577 goto out_putf; 578 579 error = file_permission(file, MAY_EXEC); 580 if (!error) 581 set_fs_pwd(current->fs, mnt, dentry); 582out_putf: 583 fput(file); 584out: 585 return error; 586} 587 588asmlinkage long sys_chroot(const char __user * filename) 589{ 590 struct nameidata nd; 591 int error; 592 593 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 594 if (error) 595 goto out; 596 597 error = vfs_permission(&nd, MAY_EXEC); 598 if (error) 599 goto dput_and_out; 600 601 error = -EPERM; 602 if (!capable(CAP_SYS_CHROOT)) 603 goto dput_and_out; 604 605 set_fs_root(current->fs, nd.mnt, nd.dentry); 606 set_fs_altroot(); 607 error = 0; 608dput_and_out: 609 path_release(&nd); 610out: 611 return error; 612} 613 614asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) 615{ 616 struct inode * inode; 617 struct dentry * dentry; 618 struct file * file; 619 int err = -EBADF; 620 struct iattr newattrs; 621 622 file = fget(fd); 623 if (!file) 624 goto out; 625 626 dentry = file->f_dentry; 627 inode = dentry->d_inode; 628 629 err = -EROFS; 630 if (IS_RDONLY(inode)) 631 goto out_putf; 632 err = -EPERM; 633 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 634 goto out_putf; 635 mutex_lock(&inode->i_mutex); 636 if (mode == (mode_t) -1) 637 mode = inode->i_mode; 638 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 639 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 640 err = notify_change(dentry, &newattrs); 641 mutex_unlock(&inode->i_mutex); 642 643out_putf: 644 fput(file); 645out: 646 return err; 647} 648 649asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 650 mode_t mode) 651{ 652 struct nameidata nd; 653 struct inode * inode; 654 int error; 655 struct iattr newattrs; 656 657 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 658 if (error) 659 goto out; 660 inode = nd.dentry->d_inode; 661 662 error = -EROFS; 663 if (IS_RDONLY(inode)) 664 goto dput_and_out; 665 666 error = -EPERM; 667 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 668 goto dput_and_out; 669 670 mutex_lock(&inode->i_mutex); 671 if (mode == (mode_t) -1) 672 mode = inode->i_mode; 673 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 674 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 675 error = notify_change(nd.dentry, &newattrs); 676 mutex_unlock(&inode->i_mutex); 677 678dput_and_out: 679 path_release(&nd); 680out: 681 return error; 682} 683 684asmlinkage long sys_chmod(const char __user *filename, mode_t mode) 685{ 686 return sys_fchmodat(AT_FDCWD, filename, mode); 687} 688 689static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 690{ 691 struct inode * inode; 692 int error; 693 struct iattr newattrs; 694 695 error = -ENOENT; 696 if (!(inode = dentry->d_inode)) { 697 printk(KERN_ERR "chown_common: NULL inode\n"); 698 goto out; 699 } 700 error = -EROFS; 701 if (IS_RDONLY(inode)) 702 goto out; 703 error = -EPERM; 704 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 705 goto out; 706 newattrs.ia_valid = ATTR_CTIME; 707 if (user != (uid_t) -1) { 708 newattrs.ia_valid |= ATTR_UID; 709 newattrs.ia_uid = user; 710 } 711 if (group != (gid_t) -1) { 712 newattrs.ia_valid |= ATTR_GID; 713 newattrs.ia_gid = group; 714 } 715 if (!S_ISDIR(inode->i_mode)) 716 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; 717 mutex_lock(&inode->i_mutex); 718 error = notify_change(dentry, &newattrs); 719 mutex_unlock(&inode->i_mutex); 720out: 721 return error; 722} 723 724asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 725{ 726 struct nameidata nd; 727 int error; 728 729 error = user_path_walk(filename, &nd); 730 if (!error) { 731 error = chown_common(nd.dentry, user, group); 732 path_release(&nd); 733 } 734 return error; 735} 736 737asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 738 gid_t group, int flag) 739{ 740 struct nameidata nd; 741 int error = -EINVAL; 742 int follow; 743 744 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 745 goto out; 746 747 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 748 error = __user_walk_fd(dfd, filename, follow, &nd); 749 if (!error) { 750 error = chown_common(nd.dentry, user, group); 751 path_release(&nd); 752 } 753out: 754 return error; 755} 756 757asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 758{ 759 struct nameidata nd; 760 int error; 761 762 error = user_path_walk_link(filename, &nd); 763 if (!error) { 764 error = chown_common(nd.dentry, user, group); 765 path_release(&nd); 766 } 767 return error; 768} 769 770 771asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) 772{ 773 struct file * file; 774 int error = -EBADF; 775 776 file = fget(fd); 777 if (file) { 778 error = chown_common(file->f_dentry, user, group); 779 fput(file); 780 } 781 return error; 782} 783 784static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 785 int flags, struct file *f, 786 int (*open)(struct inode *, struct file *)) 787{ 788 struct inode *inode; 789 int error; 790 791 f->f_flags = flags; 792 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 793 FMODE_PREAD | FMODE_PWRITE; 794 inode = dentry->d_inode; 795 if (f->f_mode & FMODE_WRITE) { 796 error = get_write_access(inode); 797 if (error) 798 goto cleanup_file; 799 } 800 801 f->f_mapping = inode->i_mapping; 802 f->f_dentry = dentry; 803 f->f_vfsmnt = mnt; 804 f->f_pos = 0; 805 f->f_op = fops_get(inode->i_fop); 806 file_move(f, &inode->i_sb->s_files); 807 808 if (!open && f->f_op) 809 open = f->f_op->open; 810 if (open) { 811 error = open(inode, f); 812 if (error) 813 goto cleanup_all; 814 } 815 816 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 817 818 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 819 820 /* NB: we're sure to have correct a_ops only after f_op->open */ 821 if (f->f_flags & O_DIRECT) { 822 if (!f->f_mapping->a_ops || 823 ((!f->f_mapping->a_ops->direct_IO) && 824 (!f->f_mapping->a_ops->get_xip_page))) { 825 fput(f); 826 f = ERR_PTR(-EINVAL); 827 } 828 } 829 830 return f; 831 832cleanup_all: 833 fops_put(f->f_op); 834 if (f->f_mode & FMODE_WRITE) 835 put_write_access(inode); 836 file_kill(f); 837 f->f_dentry = NULL; 838 f->f_vfsmnt = NULL; 839cleanup_file: 840 put_filp(f); 841 dput(dentry); 842 mntput(mnt); 843 return ERR_PTR(error); 844} 845 846/* 847 * Note that while the flag value (low two bits) for sys_open means: 848 * 00 - read-only 849 * 01 - write-only 850 * 10 - read-write 851 * 11 - special 852 * it is changed into 853 * 00 - no permissions needed 854 * 01 - read-permission 855 * 10 - write-permission 856 * 11 - read-write 857 * for the internal routines (ie open_namei()/follow_link() etc). 00 is 858 * used by symlinks. 859 */ 860static struct file *do_filp_open(int dfd, const char *filename, int flags, 861 int mode) 862{ 863 int namei_flags, error; 864 struct nameidata nd; 865 866 namei_flags = flags; 867 if ((namei_flags+1) & O_ACCMODE) 868 namei_flags++; 869 870 error = open_namei(dfd, filename, namei_flags, mode, &nd); 871 if (!error) 872 return nameidata_to_filp(&nd, flags); 873 874 return ERR_PTR(error); 875} 876 877struct file *filp_open(const char *filename, int flags, int mode) 878{ 879 return do_filp_open(AT_FDCWD, filename, flags, mode); 880} 881EXPORT_SYMBOL(filp_open); 882 883/** 884 * lookup_instantiate_filp - instantiates the open intent filp 885 * @nd: pointer to nameidata 886 * @dentry: pointer to dentry 887 * @open: open callback 888 * 889 * Helper for filesystems that want to use lookup open intents and pass back 890 * a fully instantiated struct file to the caller. 891 * This function is meant to be called from within a filesystem's 892 * lookup method. 893 * Note that in case of error, nd->intent.open.file is destroyed, but the 894 * path information remains valid. 895 * If the open callback is set to NULL, then the standard f_op->open() 896 * filesystem callback is substituted. 897 */ 898struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 899 int (*open)(struct inode *, struct file *)) 900{ 901 if (IS_ERR(nd->intent.open.file)) 902 goto out; 903 if (IS_ERR(dentry)) 904 goto out_err; 905 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), 906 nd->intent.open.flags - 1, 907 nd->intent.open.file, 908 open); 909out: 910 return nd->intent.open.file; 911out_err: 912 release_open_intent(nd); 913 nd->intent.open.file = (struct file *)dentry; 914 goto out; 915} 916EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 917 918/** 919 * nameidata_to_filp - convert a nameidata to an open filp. 920 * @nd: pointer to nameidata 921 * @flags: open flags 922 * 923 * Note that this function destroys the original nameidata 924 */ 925struct file *nameidata_to_filp(struct nameidata *nd, int flags) 926{ 927 struct file *filp; 928 929 /* Pick up the filp from the open intent */ 930 filp = nd->intent.open.file; 931 /* Has the filesystem initialised the file for us? */ 932 if (filp->f_dentry == NULL) 933 filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); 934 else 935 path_release(nd); 936 return filp; 937} 938 939/* 940 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 941 * error. 942 */ 943struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 944{ 945 int error; 946 struct file *f; 947 948 error = -ENFILE; 949 f = get_empty_filp(); 950 if (f == NULL) { 951 dput(dentry); 952 mntput(mnt); 953 return ERR_PTR(error); 954 } 955 956 return __dentry_open(dentry, mnt, flags, f, NULL); 957} 958EXPORT_SYMBOL(dentry_open); 959 960/* 961 * Find an empty file descriptor entry, and mark it busy. 962 */ 963int get_unused_fd(void) 964{ 965 struct files_struct * files = current->files; 966 int fd, error; 967 struct fdtable *fdt; 968 969 error = -EMFILE; 970 spin_lock(&files->file_lock); 971 972repeat: 973 fdt = files_fdtable(files); 974 fd = find_next_zero_bit(fdt->open_fds->fds_bits, 975 fdt->max_fdset, 976 fdt->next_fd); 977 978 /* 979 * N.B. For clone tasks sharing a files structure, this test 980 * will limit the total number of files that can be opened. 981 */ 982 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 983 goto out; 984 985 /* Do we need to expand the fd array or fd set? */ 986 error = expand_files(files, fd); 987 if (error < 0) 988 goto out; 989 990 if (error) { 991 /* 992 * If we needed to expand the fs array we 993 * might have blocked - try again. 994 */ 995 error = -EMFILE; 996 goto repeat; 997 } 998 999 FD_SET(fd, fdt->open_fds); 1000 FD_CLR(fd, fdt->close_on_exec); 1001 fdt->next_fd = fd + 1; 1002#if 1 1003 /* Sanity check */ 1004 if (fdt->fd[fd] != NULL) { 1005 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 1006 fdt->fd[fd] = NULL; 1007 } 1008#endif 1009 error = fd; 1010 1011out: 1012 spin_unlock(&files->file_lock); 1013 return error; 1014} 1015 1016EXPORT_SYMBOL(get_unused_fd); 1017 1018static void __put_unused_fd(struct files_struct *files, unsigned int fd) 1019{ 1020 struct fdtable *fdt = files_fdtable(files); 1021 __FD_CLR(fd, fdt->open_fds); 1022 if (fd < fdt->next_fd) 1023 fdt->next_fd = fd; 1024} 1025 1026void fastcall put_unused_fd(unsigned int fd) 1027{ 1028 struct files_struct *files = current->files; 1029 spin_lock(&files->file_lock); 1030 __put_unused_fd(files, fd); 1031 spin_unlock(&files->file_lock); 1032} 1033 1034EXPORT_SYMBOL(put_unused_fd); 1035 1036/* 1037 * Install a file pointer in the fd array. 1038 * 1039 * The VFS is full of places where we drop the files lock between 1040 * setting the open_fds bitmap and installing the file in the file 1041 * array. At any such point, we are vulnerable to a dup2() race 1042 * installing a file in the array before us. We need to detect this and 1043 * fput() the struct file we are about to overwrite in this case. 1044 * 1045 * It should never happen - if we allow dup2() do it, _really_ bad things 1046 * will follow. 1047 */ 1048 1049void fastcall fd_install(unsigned int fd, struct file * file) 1050{ 1051 struct files_struct *files = current->files; 1052 struct fdtable *fdt; 1053 spin_lock(&files->file_lock); 1054 fdt = files_fdtable(files); 1055 BUG_ON(fdt->fd[fd] != NULL); 1056 rcu_assign_pointer(fdt->fd[fd], file); 1057 spin_unlock(&files->file_lock); 1058} 1059 1060EXPORT_SYMBOL(fd_install); 1061 1062long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1063{ 1064 char *tmp = getname(filename); 1065 int fd = PTR_ERR(tmp); 1066 1067 if (!IS_ERR(tmp)) { 1068 fd = get_unused_fd(); 1069 if (fd >= 0) { 1070 struct file *f = do_filp_open(dfd, tmp, flags, mode); 1071 if (IS_ERR(f)) { 1072 put_unused_fd(fd); 1073 fd = PTR_ERR(f); 1074 } else { 1075 fsnotify_open(f->f_dentry); 1076 fd_install(fd, f); 1077 } 1078 } 1079 putname(tmp); 1080 } 1081 return fd; 1082} 1083 1084asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1085{ 1086 if (force_o_largefile()) 1087 flags |= O_LARGEFILE; 1088 1089 return do_sys_open(AT_FDCWD, filename, flags, mode); 1090} 1091EXPORT_SYMBOL_GPL(sys_open); 1092 1093asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1094 int mode) 1095{ 1096 if (force_o_largefile()) 1097 flags |= O_LARGEFILE; 1098 1099 return do_sys_open(dfd, filename, flags, mode); 1100} 1101EXPORT_SYMBOL_GPL(sys_openat); 1102 1103#ifndef __alpha__ 1104 1105/* 1106 * For backward compatibility? Maybe this should be moved 1107 * into arch/i386 instead? 1108 */ 1109asmlinkage long sys_creat(const char __user * pathname, int mode) 1110{ 1111 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1112} 1113 1114#endif 1115 1116/* 1117 * "id" is the POSIX thread ID. We use the 1118 * files pointer for this.. 1119 */ 1120int filp_close(struct file *filp, fl_owner_t id) 1121{ 1122 int retval = 0; 1123 1124 if (!file_count(filp)) { 1125 printk(KERN_ERR "VFS: Close: file count is 0\n"); 1126 return 0; 1127 } 1128 1129 if (filp->f_op && filp->f_op->flush) 1130 retval = filp->f_op->flush(filp); 1131 1132 dnotify_flush(filp, id); 1133 locks_remove_posix(filp, id); 1134 fput(filp); 1135 return retval; 1136} 1137 1138EXPORT_SYMBOL(filp_close); 1139 1140/* 1141 * Careful here! We test whether the file pointer is NULL before 1142 * releasing the fd. This ensures that one clone task can't release 1143 * an fd while another clone is opening it. 1144 */ 1145asmlinkage long sys_close(unsigned int fd) 1146{ 1147 struct file * filp; 1148 struct files_struct *files = current->files; 1149 struct fdtable *fdt; 1150 1151 spin_lock(&files->file_lock); 1152 fdt = files_fdtable(files); 1153 if (fd >= fdt->max_fds) 1154 goto out_unlock; 1155 filp = fdt->fd[fd]; 1156 if (!filp) 1157 goto out_unlock; 1158 rcu_assign_pointer(fdt->fd[fd], NULL); 1159 FD_CLR(fd, fdt->close_on_exec); 1160 __put_unused_fd(files, fd); 1161 spin_unlock(&files->file_lock); 1162 return filp_close(filp, files); 1163 1164out_unlock: 1165 spin_unlock(&files->file_lock); 1166 return -EBADF; 1167} 1168 1169EXPORT_SYMBOL(sys_close); 1170 1171/* 1172 * This routine simulates a hangup on the tty, to arrange that users 1173 * are given clean terminals at login time. 1174 */ 1175asmlinkage long sys_vhangup(void) 1176{ 1177 if (capable(CAP_SYS_TTY_CONFIG)) { 1178 tty_vhangup(current->signal->tty); 1179 return 0; 1180 } 1181 return -EPERM; 1182} 1183 1184/* 1185 * Called when an inode is about to be open. 1186 * We use this to disallow opening large files on 32bit systems if 1187 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1188 * on this flag in sys_open. 1189 */ 1190int generic_file_open(struct inode * inode, struct file * filp) 1191{ 1192 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1193 return -EFBIG; 1194 return 0; 1195} 1196 1197EXPORT_SYMBOL(generic_file_open); 1198 1199/* 1200 * This is used by subsystems that don't want seekable 1201 * file descriptors 1202 */ 1203int nonseekable_open(struct inode *inode, struct file *filp) 1204{ 1205 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1206 return 0; 1207} 1208 1209EXPORT_SYMBOL(nonseekable_open);