Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

change the locking order for namespace_sem

Have it nested inside ->i_mutex. Instead of using follow_down()
under namespace_sem, followed by grabbing i_mutex and checking that
mountpoint to be is not dead, do the following:
grab i_mutex
check that it's not dead
grab namespace_sem
see if anything is mounted there
if not, we've won
otherwise
drop locks
put_path on what we had
replace with what's mounted
retry everything with new mountpoint to be

New helper (lock_mount()) does that. do_add_mount(), do_move_mount(),
do_loopback() and pivot_root() switched to it; in case of the last
two that eliminates a race we used to have - original code didn't
do follow_down().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Al Viro b12cea91 27cb1572

+74 -61
+74 -61
fs/namespace.c
··· 1663 1663 return err; 1664 1664 } 1665 1665 1666 + static int lock_mount(struct path *path) 1667 + { 1668 + struct vfsmount *mnt; 1669 + retry: 1670 + mutex_lock(&path->dentry->d_inode->i_mutex); 1671 + if (unlikely(cant_mount(path->dentry))) { 1672 + mutex_unlock(&path->dentry->d_inode->i_mutex); 1673 + return -ENOENT; 1674 + } 1675 + down_write(&namespace_sem); 1676 + mnt = lookup_mnt(path); 1677 + if (likely(!mnt)) 1678 + return 0; 1679 + up_write(&namespace_sem); 1680 + mutex_unlock(&path->dentry->d_inode->i_mutex); 1681 + path_put(path); 1682 + path->mnt = mnt; 1683 + path->dentry = dget(mnt->mnt_root); 1684 + goto retry; 1685 + } 1686 + 1687 + static void unlock_mount(struct path *path) 1688 + { 1689 + up_write(&namespace_sem); 1690 + mutex_unlock(&path->dentry->d_inode->i_mutex); 1691 + } 1692 + 1666 1693 static int graft_tree(struct vfsmount *mnt, struct path *path) 1667 1694 { 1668 - int err; 1669 1695 if (mnt->mnt_sb->s_flags & MS_NOUSER) 1670 1696 return -EINVAL; 1671 1697 ··· 1699 1673 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 1700 1674 return -ENOTDIR; 1701 1675 1702 - err = -ENOENT; 1703 - mutex_lock(&path->dentry->d_inode->i_mutex); 1704 - if (cant_mount(path->dentry)) 1705 - goto out_unlock; 1676 + if (d_unlinked(path->dentry)) 1677 + return -ENOENT; 1706 1678 1707 - if (!d_unlinked(path->dentry)) 1708 - err = attach_recursive_mnt(mnt, path, NULL); 1709 - out_unlock: 1710 - mutex_unlock(&path->dentry->d_inode->i_mutex); 1711 - return err; 1679 + return attach_recursive_mnt(mnt, path, NULL); 1712 1680 } 1713 1681 1714 1682 /* ··· 1765 1745 static int do_loopback(struct path *path, char *old_name, 1766 1746 int recurse) 1767 1747 { 1748 + LIST_HEAD(umount_list); 1768 1749 struct path old_path; 1769 1750 struct vfsmount *mnt = NULL; 1770 1751 int err = mount_is_safe(path); ··· 1777 1756 if (err) 1778 1757 return err; 1779 1758 1780 - down_write(&namespace_sem); 1781 - err = -EINVAL; 1782 - if (IS_MNT_UNBINDABLE(old_path.mnt)) 1759 + err = lock_mount(path); 1760 + if (err) 1783 1761 goto out; 1784 1762 1763 + err = -EINVAL; 1764 + if (IS_MNT_UNBINDABLE(old_path.mnt)) 1765 + goto out2; 1766 + 1785 1767 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1786 - goto out; 1768 + goto out2; 1787 1769 1788 1770 err = -ENOMEM; 1789 1771 if (recurse) ··· 1795 1771 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); 1796 1772 1797 1773 if (!mnt) 1798 - goto out; 1774 + goto out2; 1799 1775 1800 1776 err = graft_tree(mnt, path); 1801 1777 if (err) { 1802 - LIST_HEAD(umount_list); 1803 - 1804 1778 br_write_lock(vfsmount_lock); 1805 1779 umount_tree(mnt, 0, &umount_list); 1806 1780 br_write_unlock(vfsmount_lock); 1807 - release_mounts(&umount_list); 1808 1781 } 1809 - 1782 + out2: 1783 + unlock_mount(path); 1784 + release_mounts(&umount_list); 1810 1785 out: 1811 - up_write(&namespace_sem); 1812 1786 path_put(&old_path); 1813 1787 return err; 1814 1788 } ··· 1895 1873 if (err) 1896 1874 return err; 1897 1875 1898 - down_write(&namespace_sem); 1899 - err = follow_down(path, true); 1876 + err = lock_mount(path); 1900 1877 if (err < 0) 1901 1878 goto out; 1902 1879 1903 1880 err = -EINVAL; 1904 1881 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1905 - goto out; 1906 - 1907 - err = -ENOENT; 1908 - mutex_lock(&path->dentry->d_inode->i_mutex); 1909 - if (cant_mount(path->dentry)) 1910 1882 goto out1; 1911 1883 1912 1884 if (d_unlinked(path->dentry)) ··· 1942 1926 * automatically */ 1943 1927 list_del_init(&old_path.mnt->mnt_expire); 1944 1928 out1: 1945 - mutex_unlock(&path->dentry->d_inode->i_mutex); 1929 + unlock_mount(path); 1946 1930 out: 1947 - up_write(&namespace_sem); 1948 1931 if (!err) 1949 1932 path_put(&parent_path); 1950 1933 path_put(&old_path); ··· 1998 1983 1999 1984 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); 2000 1985 2001 - down_write(&namespace_sem); 2002 - /* Something was mounted here while we slept */ 2003 - err = follow_down(path, true); 2004 - if (err < 0) 2005 - goto unlock; 1986 + err = lock_mount(path); 1987 + if (err) 1988 + return err; 2006 1989 2007 1990 err = -EINVAL; 2008 1991 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) ··· 2020 2007 err = graft_tree(newmnt, path); 2021 2008 2022 2009 unlock: 2023 - up_write(&namespace_sem); 2010 + unlock_mount(path); 2024 2011 return err; 2025 2012 } 2026 2013 ··· 2588 2575 goto out1; 2589 2576 2590 2577 error = security_sb_pivotroot(&old, &new); 2591 - if (error) { 2592 - path_put(&old); 2593 - goto out1; 2594 - } 2578 + if (error) 2579 + goto out2; 2595 2580 2596 2581 get_fs_root(current->fs, &root); 2597 - down_write(&namespace_sem); 2598 - mutex_lock(&old.dentry->d_inode->i_mutex); 2582 + error = lock_mount(&old); 2583 + if (error) 2584 + goto out3; 2585 + 2599 2586 error = -EINVAL; 2600 2587 if (IS_MNT_SHARED(old.mnt) || 2601 2588 IS_MNT_SHARED(new.mnt->mnt_parent) || 2602 2589 IS_MNT_SHARED(root.mnt->mnt_parent)) 2603 - goto out2; 2590 + goto out4; 2604 2591 if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) 2605 - goto out2; 2592 + goto out4; 2606 2593 error = -ENOENT; 2607 - if (cant_mount(old.dentry)) 2608 - goto out2; 2609 2594 if (d_unlinked(new.dentry)) 2610 - goto out2; 2595 + goto out4; 2611 2596 if (d_unlinked(old.dentry)) 2612 - goto out2; 2597 + goto out4; 2613 2598 error = -EBUSY; 2614 2599 if (new.mnt == root.mnt || 2615 2600 old.mnt == root.mnt) 2616 - goto out2; /* loop, on the same file system */ 2601 + goto out4; /* loop, on the same file system */ 2617 2602 error = -EINVAL; 2618 2603 if (root.mnt->mnt_root != root.dentry) 2619 - goto out2; /* not a mountpoint */ 2604 + goto out4; /* not a mountpoint */ 2620 2605 if (root.mnt->mnt_parent == root.mnt) 2621 - goto out2; /* not attached */ 2606 + goto out4; /* not attached */ 2622 2607 if (new.mnt->mnt_root != new.dentry) 2623 - goto out2; /* not a mountpoint */ 2608 + goto out4; /* not a mountpoint */ 2624 2609 if (new.mnt->mnt_parent == new.mnt) 2625 - goto out2; /* not attached */ 2610 + goto out4; /* not attached */ 2626 2611 /* make sure we can reach put_old from new_root */ 2627 2612 tmp = old.mnt; 2628 2613 if (tmp != new.mnt) { 2629 2614 for (;;) { 2630 2615 if (tmp->mnt_parent == tmp) 2631 - goto out2; /* already mounted on put_old */ 2616 + goto out4; /* already mounted on put_old */ 2632 2617 if (tmp->mnt_parent == new.mnt) 2633 2618 break; 2634 2619 tmp = tmp->mnt_parent; 2635 2620 } 2636 2621 if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) 2637 - goto out2; 2622 + goto out4; 2638 2623 } else if (!is_subdir(old.dentry, new.dentry)) 2639 - goto out2; 2624 + goto out4; 2640 2625 br_write_lock(vfsmount_lock); 2641 2626 detach_mnt(new.mnt, &parent_path); 2642 2627 detach_mnt(root.mnt, &root_parent); ··· 2645 2634 touch_mnt_namespace(current->nsproxy->mnt_ns); 2646 2635 br_write_unlock(vfsmount_lock); 2647 2636 chroot_fs_refs(&root, &new); 2648 - 2649 2637 error = 0; 2650 - path_put(&root_parent); 2651 - path_put(&parent_path); 2652 - out2: 2653 - mutex_unlock(&old.dentry->d_inode->i_mutex); 2654 - up_write(&namespace_sem); 2638 + out4: 2639 + unlock_mount(&old); 2640 + if (!error) { 2641 + path_put(&root_parent); 2642 + path_put(&parent_path); 2643 + } 2644 + out3: 2655 2645 path_put(&root); 2646 + out2: 2656 2647 path_put(&old); 2657 2648 out1: 2658 2649 path_put(&new);