Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs pile two from Al Viro:

- orangefs fix

- series of fs/namei.c cleanups from me

- VFS stuff coming from overlayfs tree

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
orangefs: Use RCU for destroy_inode
vfs: use helper for calling f_op->fsync()
mm: use helper for calling f_op->mmap()
vfs: use helpers for calling f_op->{read,write}_iter()
vfs: pass type instead of fn to do_{loop,iter}_readv_writev()
vfs: extract common parts of {compat_,}do_readv_writev()
vfs: wrap write f_ops with file_{start,end}_write()
vfs: deny copy_file_range() for non regular files
vfs: deny fallocate() on directory
vfs: create vfs helper vfs_tmpfile()
namei.c: split unlazy_walk()
namei.c: fold the check for DCACHE_OP_REVALIDATE into d_revalidate()
lookup_fast(): clean up the logics around the fallback to non-rcu mode
namei: fold unlazy_link() into its sole caller

+261 -223
+2 -2
drivers/block/loop.c
··· 501 501 cmd->iocb.ki_flags = IOCB_DIRECT; 502 502 503 503 if (rw == WRITE) 504 - ret = file->f_op->write_iter(&cmd->iocb, &iter); 504 + ret = call_write_iter(file, &cmd->iocb, &iter); 505 505 else 506 - ret = file->f_op->read_iter(&cmd->iocb, &iter); 506 + ret = call_read_iter(file, &cmd->iocb, &iter); 507 507 508 508 if (ret != -EIOCBQUEUED) 509 509 cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
+1 -1
drivers/gpu/drm/i915/i915_gem_dmabuf.c
··· 141 141 if (!obj->base.filp) 142 142 return -ENODEV; 143 143 144 - ret = obj->base.filp->f_op->mmap(obj->base.filp, vma); 144 + ret = call_mmap(obj->base.filp, vma); 145 145 if (ret) 146 146 return ret; 147 147
+1 -1
drivers/gpu/drm/vgem/vgem_drv.c
··· 288 288 if (!obj->filp) 289 289 return -ENODEV; 290 290 291 - ret = obj->filp->f_op->mmap(obj->filp, vma); 291 + ret = call_mmap(obj->filp, vma); 292 292 if (ret) 293 293 return ret; 294 294
+2 -2
fs/aio.c
··· 1495 1495 return ret; 1496 1496 ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); 1497 1497 if (!ret) 1498 - ret = aio_ret(req, file->f_op->read_iter(req, &iter)); 1498 + ret = aio_ret(req, call_read_iter(file, req, &iter)); 1499 1499 kfree(iovec); 1500 1500 return ret; 1501 1501 } ··· 1520 1520 if (!ret) { 1521 1521 req->ki_flags |= IOCB_WRITE; 1522 1522 file_start_write(file); 1523 - ret = aio_ret(req, file->f_op->write_iter(req, &iter)); 1523 + ret = aio_ret(req, call_write_iter(file, req, &iter)); 1524 1524 /* 1525 1525 * We release freeze protection in aio_complete(). Fool lockdep 1526 1526 * by telling it the lock got released so that it doesn't
+1 -1
fs/coda/file.c
··· 96 96 cfi->cfi_mapcount++; 97 97 spin_unlock(&cii->c_lock); 98 98 99 - return host_file->f_op->mmap(host_file, vma); 99 + return call_mmap(host_file, vma); 100 100 } 101 101 102 102 int coda_open(struct inode *coda_inode, struct file *coda_file)
+138 -113
fs/namei.c
··· 672 672 /** 673 673 * unlazy_walk - try to switch to ref-walk mode. 674 674 * @nd: nameidata pathwalk data 675 - * @dentry: child of nd->path.dentry or NULL 676 - * @seq: seq number to check dentry against 677 675 * Returns: 0 on success, -ECHILD on failure 678 676 * 679 - * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry 680 - * for ref-walk mode. @dentry must be a path found by a do_lookup call on 681 - * @nd or NULL. Must be called from rcu-walk context. 677 + * unlazy_walk attempts to legitimize the current nd->path and nd->root 678 + * for ref-walk mode. 679 + * Must be called from rcu-walk context. 682 680 * Nothing should touch nameidata between unlazy_walk() failure and 683 681 * terminate_walk(). 684 682 */ 685 - static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq) 683 + static int unlazy_walk(struct nameidata *nd) 686 684 { 687 685 struct dentry *parent = nd->path.dentry; 688 686 ··· 689 691 nd->flags &= ~LOOKUP_RCU; 690 692 if (unlikely(!legitimize_links(nd))) 691 693 goto out2; 694 + if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) 695 + goto out1; 696 + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 697 + if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) 698 + goto out; 699 + } 700 + rcu_read_unlock(); 701 + BUG_ON(nd->inode != parent->d_inode); 702 + return 0; 703 + 704 + out2: 705 + nd->path.mnt = NULL; 706 + nd->path.dentry = NULL; 707 + out1: 708 + if (!(nd->flags & LOOKUP_ROOT)) 709 + nd->root.mnt = NULL; 710 + out: 711 + rcu_read_unlock(); 712 + return -ECHILD; 713 + } 714 + 715 + /** 716 + * unlazy_child - try to switch to ref-walk mode. 717 + * @nd: nameidata pathwalk data 718 + * @dentry: child of nd->path.dentry 719 + * @seq: seq number to check dentry against 720 + * Returns: 0 on success, -ECHILD on failure 721 + * 722 + * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry 723 + * for ref-walk mode. @dentry must be a path found by a do_lookup call on 724 + * @nd. Must be called from rcu-walk context. 725 + * Nothing should touch nameidata between unlazy_child() failure and 726 + * terminate_walk(). 727 + */ 728 + static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq) 729 + { 730 + BUG_ON(!(nd->flags & LOOKUP_RCU)); 731 + 732 + nd->flags &= ~LOOKUP_RCU; 733 + if (unlikely(!legitimize_links(nd))) 734 + goto out2; 692 735 if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) 693 736 goto out2; 694 - if (unlikely(!lockref_get_not_dead(&parent->d_lockref))) 737 + if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) 695 738 goto out1; 696 739 697 740 /* 698 - * For a negative lookup, the lookup sequence point is the parents 699 - * sequence point, and it only needs to revalidate the parent dentry. 700 - * 701 - * For a positive lookup, we need to move both the parent and the 702 - * dentry from the RCU domain to be properly refcounted. And the 703 - * sequence number in the dentry validates *both* dentry counters, 704 - * since we checked the sequence number of the parent after we got 705 - * the child sequence number. So we know the parent must still 706 - * be valid if the child sequence number is still valid. 741 + * We need to move both the parent and the dentry from the RCU domain 742 + * to be properly refcounted. And the sequence number in the dentry 743 + * validates *both* dentry counters, since we checked the sequence 744 + * number of the parent after we got the child sequence number. So we 745 + * know the parent must still be valid if the child sequence number is 707 746 */ 708 - if (!dentry) { 709 - if (read_seqcount_retry(&parent->d_seq, nd->seq)) 710 - goto out; 711 - BUG_ON(nd->inode != parent->d_inode); 712 - } else { 713 - if (!lockref_get_not_dead(&dentry->d_lockref)) 714 - goto out; 715 - if (read_seqcount_retry(&dentry->d_seq, seq)) 716 - goto drop_dentry; 747 + if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) 748 + goto out; 749 + if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) { 750 + rcu_read_unlock(); 751 + dput(dentry); 752 + goto drop_root_mnt; 717 753 } 718 - 719 754 /* 720 755 * Sequence counts matched. Now make sure that the root is 721 756 * still valid and get it if required. ··· 764 733 rcu_read_unlock(); 765 734 return 0; 766 735 767 - drop_dentry: 768 - rcu_read_unlock(); 769 - dput(dentry); 770 - goto drop_root_mnt; 771 736 out2: 772 737 nd->path.mnt = NULL; 773 738 out1: ··· 776 749 return -ECHILD; 777 750 } 778 751 779 - static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq) 780 - { 781 - if (unlikely(!legitimize_path(nd, link, seq))) { 782 - drop_links(nd); 783 - nd->depth = 0; 784 - nd->flags &= ~LOOKUP_RCU; 785 - nd->path.mnt = NULL; 786 - nd->path.dentry = NULL; 787 - if (!(nd->flags & LOOKUP_ROOT)) 788 - nd->root.mnt = NULL; 789 - rcu_read_unlock(); 790 - } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) { 791 - return 0; 792 - } 793 - path_put(link); 794 - return -ECHILD; 795 - } 796 - 797 752 static inline int d_revalidate(struct dentry *dentry, unsigned int flags) 798 753 { 799 - return dentry->d_op->d_revalidate(dentry, flags); 754 + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) 755 + return dentry->d_op->d_revalidate(dentry, flags); 756 + else 757 + return 1; 800 758 } 801 759 802 760 /** ··· 802 790 if (nd->flags & LOOKUP_RCU) { 803 791 if (!(nd->flags & LOOKUP_ROOT)) 804 792 nd->root.mnt = NULL; 805 - if (unlikely(unlazy_walk(nd, NULL, 0))) 793 + if (unlikely(unlazy_walk(nd))) 806 794 return -ECHILD; 807 795 } 808 796 ··· 1028 1016 touch_atime(&last->link); 1029 1017 cond_resched(); 1030 1018 } else if (atime_needs_update_rcu(&last->link, inode)) { 1031 - if (unlikely(unlazy_walk(nd, NULL, 0))) 1019 + if (unlikely(unlazy_walk(nd))) 1032 1020 return ERR_PTR(-ECHILD); 1033 1021 touch_atime(&last->link); 1034 1022 } ··· 1047 1035 if (nd->flags & LOOKUP_RCU) { 1048 1036 res = get(NULL, inode, &last->done); 1049 1037 if (res == ERR_PTR(-ECHILD)) { 1050 - if (unlikely(unlazy_walk(nd, NULL, 0))) 1038 + if (unlikely(unlazy_walk(nd))) 1051 1039 return ERR_PTR(-ECHILD); 1052 1040 res = get(dentry, inode, &last->done); 1053 1041 } ··· 1481 1469 struct dentry *dir, 1482 1470 unsigned int flags) 1483 1471 { 1484 - struct dentry *dentry; 1485 - int error; 1486 - 1487 - dentry = d_lookup(dir, name); 1472 + struct dentry *dentry = d_lookup(dir, name); 1488 1473 if (dentry) { 1489 - if (dentry->d_flags & DCACHE_OP_REVALIDATE) { 1490 - error = d_revalidate(dentry, flags); 1491 - if (unlikely(error <= 0)) { 1492 - if (!error) 1493 - d_invalidate(dentry); 1494 - dput(dentry); 1495 - return ERR_PTR(error); 1496 - } 1474 + int error = d_revalidate(dentry, flags); 1475 + if (unlikely(error <= 0)) { 1476 + if (!error) 1477 + d_invalidate(dentry); 1478 + dput(dentry); 1479 + return ERR_PTR(error); 1497 1480 } 1498 1481 } 1499 1482 return dentry; ··· 1553 1546 bool negative; 1554 1547 dentry = __d_lookup_rcu(parent, &nd->last, &seq); 1555 1548 if (unlikely(!dentry)) { 1556 - if (unlazy_walk(nd, NULL, 0)) 1549 + if (unlazy_walk(nd)) 1557 1550 return -ECHILD; 1558 1551 return 0; 1559 1552 } ··· 1578 1571 return -ECHILD; 1579 1572 1580 1573 *seqp = seq; 1581 - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) 1582 - status = d_revalidate(dentry, nd->flags); 1583 - if (unlikely(status <= 0)) { 1584 - if (unlazy_walk(nd, dentry, seq)) 1585 - return -ECHILD; 1586 - if (status == -ECHILD) 1587 - status = d_revalidate(dentry, nd->flags); 1588 - } else { 1574 + status = d_revalidate(dentry, nd->flags); 1575 + if (likely(status > 0)) { 1589 1576 /* 1590 1577 * Note: do negative dentry check after revalidation in 1591 1578 * case that drops it. ··· 1590 1589 path->dentry = dentry; 1591 1590 if (likely(__follow_mount_rcu(nd, path, inode, seqp))) 1592 1591 return 1; 1593 - if (unlazy_walk(nd, dentry, seq)) 1594 - return -ECHILD; 1595 1592 } 1593 + if (unlazy_child(nd, dentry, seq)) 1594 + return -ECHILD; 1595 + if (unlikely(status == -ECHILD)) 1596 + /* we'd been told to redo it in non-rcu mode */ 1597 + status = d_revalidate(dentry, nd->flags); 1596 1598 } else { 1597 1599 dentry = __d_lookup(parent, &nd->last); 1598 1600 if (unlikely(!dentry)) 1599 1601 return 0; 1600 - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) 1601 - status = d_revalidate(dentry, nd->flags); 1602 + status = d_revalidate(dentry, nd->flags); 1602 1603 } 1603 1604 if (unlikely(status <= 0)) { 1604 1605 if (!status) ··· 1639 1636 if (IS_ERR(dentry)) 1640 1637 goto out; 1641 1638 if (unlikely(!d_in_lookup(dentry))) { 1642 - if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && 1643 - !(flags & LOOKUP_NO_REVAL)) { 1639 + if (!(flags & LOOKUP_NO_REVAL)) { 1644 1640 int error = d_revalidate(dentry, flags); 1645 1641 if (unlikely(error <= 0)) { 1646 1642 if (!error) { ··· 1670 1668 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); 1671 1669 if (err != -ECHILD) 1672 1670 return err; 1673 - if (unlazy_walk(nd, NULL, 0)) 1671 + if (unlazy_walk(nd)) 1674 1672 return -ECHILD; 1675 1673 } 1676 1674 return inode_permission(nd->inode, MAY_EXEC); ··· 1705 1703 error = nd_alloc_stack(nd); 1706 1704 if (unlikely(error)) { 1707 1705 if (error == -ECHILD) { 1708 - if (unlikely(unlazy_link(nd, link, seq))) 1709 - return -ECHILD; 1710 - error = nd_alloc_stack(nd); 1706 + if (unlikely(!legitimize_path(nd, link, seq))) { 1707 + drop_links(nd); 1708 + nd->depth = 0; 1709 + nd->flags &= ~LOOKUP_RCU; 1710 + nd->path.mnt = NULL; 1711 + nd->path.dentry = NULL; 1712 + if (!(nd->flags & LOOKUP_ROOT)) 1713 + nd->root.mnt = NULL; 1714 + rcu_read_unlock(); 1715 + } else if (likely(unlazy_walk(nd)) == 0) 1716 + error = nd_alloc_stack(nd); 1711 1717 } 1712 1718 if (error) { 1713 1719 path_put(link); ··· 2132 2122 } 2133 2123 if (unlikely(!d_can_lookup(nd->path.dentry))) { 2134 2124 if (nd->flags & LOOKUP_RCU) { 2135 - if (unlazy_walk(nd, NULL, 0)) 2125 + if (unlazy_walk(nd)) 2136 2126 return -ECHILD; 2137 2127 } 2138 2128 return -ENOTDIR; ··· 2589 2579 2590 2580 /* If we're in rcuwalk, drop out of it to handle last component */ 2591 2581 if (nd->flags & LOOKUP_RCU) { 2592 - if (unlazy_walk(nd, NULL, 0)) 2582 + if (unlazy_walk(nd)) 2593 2583 return -ECHILD; 2594 2584 } 2595 2585 ··· 3082 3072 if (d_in_lookup(dentry)) 3083 3073 break; 3084 3074 3085 - if (!(dentry->d_flags & DCACHE_OP_REVALIDATE)) 3086 - break; 3087 - 3088 3075 error = d_revalidate(dentry, nd->flags); 3089 3076 if (likely(error > 0)) 3090 3077 break; ··· 3363 3356 return error; 3364 3357 } 3365 3358 3359 + struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag) 3360 + { 3361 + static const struct qstr name = QSTR_INIT("/", 1); 3362 + struct dentry *child = NULL; 3363 + struct inode *dir = dentry->d_inode; 3364 + struct inode *inode; 3365 + int error; 3366 + 3367 + /* we want directory to be writable */ 3368 + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 3369 + if (error) 3370 + goto out_err; 3371 + error = -EOPNOTSUPP; 3372 + if (!dir->i_op->tmpfile) 3373 + goto out_err; 3374 + error = -ENOMEM; 3375 + child = d_alloc(dentry, &name); 3376 + if (unlikely(!child)) 3377 + goto out_err; 3378 + error = dir->i_op->tmpfile(dir, child, mode); 3379 + if (error) 3380 + goto out_err; 3381 + error = -ENOENT; 3382 + inode = child->d_inode; 3383 + if (unlikely(!inode)) 3384 + goto out_err; 3385 + if (!(open_flag & O_EXCL)) { 3386 + spin_lock(&inode->i_lock); 3387 + inode->i_state |= I_LINKABLE; 3388 + spin_unlock(&inode->i_lock); 3389 + } 3390 + return child; 3391 + 3392 + out_err: 3393 + dput(child); 3394 + return ERR_PTR(error); 3395 + } 3396 + EXPORT_SYMBOL(vfs_tmpfile); 3397 + 3366 3398 static int do_tmpfile(struct nameidata *nd, unsigned flags, 3367 3399 const struct open_flags *op, 3368 3400 struct file *file, int *opened) 3369 3401 { 3370 - static const struct qstr name = QSTR_INIT("/", 1); 3371 3402 struct dentry *child; 3372 - struct inode *dir; 3373 3403 struct path path; 3374 3404 int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); 3375 3405 if (unlikely(error)) ··· 3414 3370 error = mnt_want_write(path.mnt); 3415 3371 if (unlikely(error)) 3416 3372 goto out; 3417 - dir = path.dentry->d_inode; 3418 - /* we want directory to be writable */ 3419 - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 3420 - if (error) 3373 + child = vfs_tmpfile(path.dentry, op->mode, op->open_flag); 3374 + error = PTR_ERR(child); 3375 + if (unlikely(IS_ERR(child))) 3421 3376 goto out2; 3422 - if (!dir->i_op->tmpfile) { 3423 - error = -EOPNOTSUPP; 3424 - goto out2; 3425 - } 3426 - child = d_alloc(path.dentry, &name); 3427 - if (unlikely(!child)) { 3428 - error = -ENOMEM; 3429 - goto out2; 3430 - } 3431 3377 dput(path.dentry); 3432 3378 path.dentry = child; 3433 - error = dir->i_op->tmpfile(dir, child, op->mode); 3434 - if (error) 3435 - goto out2; 3436 3379 audit_inode(nd->name, child, 0); 3437 3380 /* Don't check for other permissions, the inode was just created */ 3438 3381 error = may_open(&path, 0, op->open_flag); ··· 3430 3399 if (error) 3431 3400 goto out2; 3432 3401 error = open_check_o_direct(file); 3433 - if (error) { 3402 + if (error) 3434 3403 fput(file); 3435 - } else if (!(op->open_flag & O_EXCL)) { 3436 - struct inode *inode = file_inode(file); 3437 - spin_lock(&inode->i_lock); 3438 - inode->i_state |= I_LINKABLE; 3439 - spin_unlock(&inode->i_lock); 3440 - } 3441 3404 out2: 3442 3405 mnt_drop_write(path.mnt); 3443 3406 out:
+6 -8
fs/open.c
··· 301 301 if (S_ISFIFO(inode->i_mode)) 302 302 return -ESPIPE; 303 303 304 - /* 305 - * Let individual file system decide if it supports preallocation 306 - * for directories or not. 307 - */ 308 - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && 309 - !S_ISBLK(inode->i_mode)) 304 + if (S_ISDIR(inode->i_mode)) 305 + return -EISDIR; 306 + 307 + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 310 308 return -ENODEV; 311 309 312 310 /* Check for wrap through zero too */ ··· 314 316 if (!file->f_op->fallocate) 315 317 return -EOPNOTSUPP; 316 318 317 - sb_start_write(inode->i_sb); 319 + file_start_write(file); 318 320 ret = file->f_op->fallocate(file, mode, offset, len); 319 321 320 322 /* ··· 327 329 if (ret == 0) 328 330 fsnotify_modify(file); 329 331 330 - sb_end_write(inode->i_sb); 332 + file_end_write(file); 331 333 return ret; 332 334 } 333 335 EXPORT_SYMBOL_GPL(vfs_fallocate);
+8 -1
fs/orangefs/super.c
··· 115 115 return &orangefs_inode->vfs_inode; 116 116 } 117 117 118 + static void orangefs_i_callback(struct rcu_head *head) 119 + { 120 + struct inode *inode = container_of(head, struct inode, i_rcu); 121 + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 122 + kmem_cache_free(orangefs_inode_cache, orangefs_inode); 123 + } 124 + 118 125 static void orangefs_destroy_inode(struct inode *inode) 119 126 { 120 127 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); ··· 130 123 "%s: deallocated %p destroying inode %pU\n", 131 124 __func__, orangefs_inode, get_khandle_from_ino(inode)); 132 125 133 - kmem_cache_free(orangefs_inode_cache, orangefs_inode); 126 + call_rcu(&inode->i_rcu, orangefs_i_callback); 134 127 } 135 128 136 129 /*
+56 -74
fs/read_write.c
··· 23 23 #include <linux/uaccess.h> 24 24 #include <asm/unistd.h> 25 25 26 - typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 27 - typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); 28 - 29 26 const struct file_operations generic_ro_fops = { 30 27 .llseek = generic_file_llseek, 31 28 .read_iter = generic_file_read_iter, ··· 367 370 kiocb.ki_pos = *ppos; 368 371 369 372 iter->type |= READ; 370 - ret = file->f_op->read_iter(&kiocb, iter); 373 + ret = call_read_iter(file, &kiocb, iter); 371 374 BUG_ON(ret == -EIOCBQUEUED); 372 375 if (ret > 0) 373 376 *ppos = kiocb.ki_pos; ··· 387 390 kiocb.ki_pos = *ppos; 388 391 389 392 iter->type |= WRITE; 390 - ret = file->f_op->write_iter(&kiocb, iter); 393 + ret = call_write_iter(file, &kiocb, iter); 391 394 BUG_ON(ret == -EIOCBQUEUED); 392 395 if (ret > 0) 393 396 *ppos = kiocb.ki_pos; ··· 436 439 kiocb.ki_pos = *ppos; 437 440 iov_iter_init(&iter, READ, &iov, 1, len); 438 441 439 - ret = filp->f_op->read_iter(&kiocb, &iter); 442 + ret = call_read_iter(filp, &kiocb, &iter); 440 443 BUG_ON(ret == -EIOCBQUEUED); 441 444 *ppos = kiocb.ki_pos; 442 445 return ret; ··· 493 496 kiocb.ki_pos = *ppos; 494 497 iov_iter_init(&iter, WRITE, &iov, 1, len); 495 498 496 - ret = filp->f_op->write_iter(&kiocb, &iter); 499 + ret = call_write_iter(filp, &kiocb, &iter); 497 500 BUG_ON(ret == -EIOCBQUEUED); 498 501 if (ret > 0) 499 502 *ppos = kiocb.ki_pos; ··· 672 675 EXPORT_SYMBOL(iov_shorten); 673 676 674 677 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 675 - loff_t *ppos, iter_fn_t fn, int flags) 678 + loff_t *ppos, int type, int flags) 676 679 { 677 680 struct kiocb kiocb; 678 681 ssize_t ret; ··· 689 692 kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); 690 693 kiocb.ki_pos = *ppos; 691 694 692 - ret = fn(&kiocb, iter); 695 + if (type == READ) 696 + ret = call_read_iter(filp, &kiocb, iter); 697 + else 698 + ret = call_write_iter(filp, &kiocb, iter); 693 699 BUG_ON(ret == -EIOCBQUEUED); 694 700 *ppos = kiocb.ki_pos; 695 701 return ret; ··· 700 700 701 701 /* Do it by hand, with file-ops */ 702 702 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 703 - loff_t *ppos, io_fn_t fn, int flags) 703 + loff_t *ppos, int type, int flags) 704 704 { 705 705 ssize_t ret = 0; 706 706 ··· 711 711 struct iovec iovec = iov_iter_iovec(iter); 712 712 ssize_t nr; 713 713 714 - nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); 714 + if (type == READ) { 715 + nr = filp->f_op->read(filp, iovec.iov_base, 716 + iovec.iov_len, ppos); 717 + } else { 718 + nr = filp->f_op->write(filp, iovec.iov_base, 719 + iovec.iov_len, ppos); 720 + } 715 721 716 722 if (nr < 0) { 717 723 if (!ret) ··· 840 834 return ret; 841 835 } 842 836 843 - static ssize_t do_readv_writev(int type, struct file *file, 844 - const struct iovec __user * uvector, 845 - unsigned long nr_segs, loff_t *pos, 846 - int flags) 837 + static ssize_t __do_readv_writev(int type, struct file *file, 838 + struct iov_iter *iter, loff_t *pos, int flags) 847 839 { 848 840 size_t tot_len; 849 - struct iovec iovstack[UIO_FASTIOV]; 850 - struct iovec *iov = iovstack; 851 - struct iov_iter iter; 852 - ssize_t ret; 853 - io_fn_t fn; 854 - iter_fn_t iter_fn; 841 + ssize_t ret = 0; 855 842 856 - ret = import_iovec(type, uvector, nr_segs, 857 - ARRAY_SIZE(iovstack), &iov, &iter); 858 - if (ret < 0) 859 - return ret; 860 - 861 - tot_len = iov_iter_count(&iter); 843 + tot_len = iov_iter_count(iter); 862 844 if (!tot_len) 863 845 goto out; 864 846 ret = rw_verify_area(type, file, pos, tot_len); 865 847 if (ret < 0) 866 848 goto out; 867 849 868 - if (type == READ) { 869 - fn = file->f_op->read; 870 - iter_fn = file->f_op->read_iter; 871 - } else { 872 - fn = (io_fn_t)file->f_op->write; 873 - iter_fn = file->f_op->write_iter; 850 + if (type != READ) 874 851 file_start_write(file); 875 - } 876 852 877 - if (iter_fn) 878 - ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); 853 + if ((type == READ && file->f_op->read_iter) || 854 + (type == WRITE && file->f_op->write_iter)) 855 + ret = do_iter_readv_writev(file, iter, pos, type, flags); 879 856 else 880 - ret = do_loop_readv_writev(file, &iter, pos, fn, flags); 857 + ret = do_loop_readv_writev(file, iter, pos, type, flags); 881 858 882 859 if (type != READ) 883 860 file_end_write(file); 884 861 885 862 out: 886 - kfree(iov); 887 863 if ((ret + (type == READ)) > 0) { 888 864 if (type == READ) 889 865 fsnotify_access(file); 890 866 else 891 867 fsnotify_modify(file); 892 868 } 869 + return ret; 870 + } 871 + 872 + static ssize_t do_readv_writev(int type, struct file *file, 873 + const struct iovec __user *uvector, 874 + unsigned long nr_segs, loff_t *pos, 875 + int flags) 876 + { 877 + struct iovec iovstack[UIO_FASTIOV]; 878 + struct iovec *iov = iovstack; 879 + struct iov_iter iter; 880 + ssize_t ret; 881 + 882 + ret = import_iovec(type, uvector, nr_segs, 883 + ARRAY_SIZE(iovstack), &iov, &iter); 884 + if (ret < 0) 885 + return ret; 886 + 887 + ret = __do_readv_writev(type, file, &iter, pos, flags); 888 + kfree(iov); 889 + 893 890 return ret; 894 891 } 895 892 ··· 1073 1064 unsigned long nr_segs, loff_t *pos, 1074 1065 int flags) 1075 1066 { 1076 - compat_ssize_t tot_len; 1077 1067 struct iovec iovstack[UIO_FASTIOV]; 1078 1068 struct iovec *iov = iovstack; 1079 1069 struct iov_iter iter; 1080 1070 ssize_t ret; 1081 - io_fn_t fn; 1082 - iter_fn_t iter_fn; 1083 1071 1084 1072 ret = compat_import_iovec(type, uvector, nr_segs, 1085 1073 UIO_FASTIOV, &iov, &iter); 1086 1074 if (ret < 0) 1087 1075 return ret; 1088 1076 1089 - tot_len = iov_iter_count(&iter); 1090 - if (!tot_len) 1091 - goto out; 1092 - ret = rw_verify_area(type, file, pos, tot_len); 1093 - if (ret < 0) 1094 - goto out; 1095 - 1096 - if (type == READ) { 1097 - fn = file->f_op->read; 1098 - iter_fn = file->f_op->read_iter; 1099 - } else { 1100 - fn = (io_fn_t)file->f_op->write; 1101 - iter_fn = file->f_op->write_iter; 1102 - file_start_write(file); 1103 - } 1104 - 1105 - if (iter_fn) 1106 - ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); 1107 - else 1108 - ret = do_loop_readv_writev(file, &iter, pos, fn, flags); 1109 - 1110 - if (type != READ) 1111 - file_end_write(file); 1112 - 1113 - out: 1077 + ret = __do_readv_writev(type, file, &iter, pos, flags); 1114 1078 kfree(iov); 1115 - if ((ret + (type == READ)) > 0) { 1116 - if (type == READ) 1117 - fsnotify_access(file); 1118 - else 1119 - fsnotify_modify(file); 1120 - } 1079 + 1121 1080 return ret; 1122 1081 } 1123 1082 ··· 1495 1518 if (flags != 0) 1496 1519 return -EINVAL; 1497 1520 1521 + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1522 + return -EISDIR; 1523 + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1524 + return -EINVAL; 1525 + 1498 1526 ret = rw_verify_area(READ, file_in, &pos_in, len); 1499 1527 if (unlikely(ret)) 1500 1528 return ret; ··· 1520 1538 if (len == 0) 1521 1539 return 0; 1522 1540 1523 - sb_start_write(inode_out->i_sb); 1541 + file_start_write(file_out); 1524 1542 1525 1543 /* 1526 1544 * Try cloning first, this is supported by more file systems, and ··· 1556 1574 inc_syscr(current); 1557 1575 inc_syscw(current); 1558 1576 1559 - sb_end_write(inode_out->i_sb); 1577 + file_end_write(file_out); 1560 1578 1561 1579 return ret; 1562 1580 }
+1 -1
fs/splice.c
··· 307 307 idx = to.idx; 308 308 init_sync_kiocb(&kiocb, in); 309 309 kiocb.ki_pos = *ppos; 310 - ret = in->f_op->read_iter(&kiocb, &to); 310 + ret = call_read_iter(in, &kiocb, &to); 311 311 if (ret > 0) { 312 312 *ppos = kiocb.ki_pos; 313 313 file_accessed(in);
+1 -1
fs/sync.c
··· 192 192 spin_unlock(&inode->i_lock); 193 193 mark_inode_dirty_sync(inode); 194 194 } 195 - return file->f_op->fsync(file, start, end, datasync); 195 + return call_fsync(file, start, end, datasync); 196 196 } 197 197 EXPORT_SYMBOL(vfs_fsync_range); 198 198
+39 -13
include/linux/fs.h
··· 1567 1567 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); 1568 1568 extern int vfs_whiteout(struct inode *, struct dentry *); 1569 1569 1570 + extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, 1571 + int open_flag); 1572 + 1570 1573 /* 1571 1574 * VFS file helper functions. 1572 1575 */ ··· 1721 1718 int (*set_acl)(struct inode *, struct posix_acl *, int); 1722 1719 } ____cacheline_aligned; 1723 1720 1721 + static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, 1722 + struct iov_iter *iter) 1723 + { 1724 + return file->f_op->read_iter(kio, iter); 1725 + } 1726 + 1727 + static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, 1728 + struct iov_iter *iter) 1729 + { 1730 + return file->f_op->write_iter(kio, iter); 1731 + } 1732 + 1733 + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) 1734 + { 1735 + return file->f_op->mmap(file, vma); 1736 + } 1737 + 1738 + static inline int call_fsync(struct file *file, loff_t start, loff_t end, 1739 + int datasync) 1740 + { 1741 + return file->f_op->fsync(file, start, end, datasync); 1742 + } 1743 + 1724 1744 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 1725 1745 unsigned long nr_segs, unsigned long fast_segs, 1726 1746 struct iovec *fast_pointer, ··· 1769 1743 loff_t len, bool *is_same); 1770 1744 extern int vfs_dedupe_file_range(struct file *file, 1771 1745 struct file_dedupe_range *same); 1772 - 1773 - static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, 1774 - struct file *file_out, loff_t pos_out, 1775 - u64 len) 1776 - { 1777 - int ret; 1778 - 1779 - sb_start_write(file_inode(file_out)->i_sb); 1780 - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); 1781 - sb_end_write(file_inode(file_out)->i_sb); 1782 - 1783 - return ret; 1784 - } 1785 1746 1786 1747 struct super_operations { 1787 1748 struct inode *(*alloc_inode)(struct super_block *sb); ··· 2579 2566 if (!S_ISREG(file_inode(file)->i_mode)) 2580 2567 return; 2581 2568 __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); 2569 + } 2570 + 2571 + static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, 2572 + struct file *file_out, loff_t pos_out, 2573 + u64 len) 2574 + { 2575 + int ret; 2576 + 2577 + file_start_write(file_out); 2578 + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); 2579 + file_end_write(file_out); 2580 + 2581 + return ret; 2582 2582 } 2583 2583 2584 2584 /*
+2 -2
ipc/shm.c
··· 423 423 if (ret) 424 424 return ret; 425 425 426 - ret = sfd->file->f_op->mmap(sfd->file, vma); 426 + ret = call_mmap(sfd->file, vma); 427 427 if (ret) { 428 428 shm_close(vma); 429 429 return ret; ··· 452 452 453 453 if (!sfd->file->f_op->fsync) 454 454 return -EINVAL; 455 - return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 455 + return call_fsync(sfd->file, start, end, datasync); 456 456 } 457 457 458 458 static long shm_fallocate(struct file *file, int mode, loff_t offset,
+1 -1
mm/mmap.c
··· 1672 1672 * new file must not have been exposed to user-space, yet. 1673 1673 */ 1674 1674 vma->vm_file = get_file(file); 1675 - error = file->f_op->mmap(file, vma); 1675 + error = call_mmap(file, vma); 1676 1676 if (error) 1677 1677 goto unmap_and_free_vma; 1678 1678
+2 -2
mm/nommu.c
··· 1084 1084 { 1085 1085 int ret; 1086 1086 1087 - ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1087 + ret = call_mmap(vma->vm_file, vma); 1088 1088 if (ret == 0) { 1089 1089 vma->vm_region->vm_top = vma->vm_region->vm_end; 1090 1090 return 0; ··· 1115 1115 * - VM_MAYSHARE will be set if it may attempt to share 1116 1116 */ 1117 1117 if (capabilities & NOMMU_MAP_DIRECT) { 1118 - ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1118 + ret = call_mmap(vma->vm_file, vma); 1119 1119 if (ret == 0) { 1120 1120 /* shouldn't return success if we're not sharing */ 1121 1121 BUG_ON(!(vma->vm_flags & VM_MAYSHARE));