Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
[PATCH] mnt_expire is protected by namespace_sem, no need for vfsmount_lock
[PATCH] do shrink_submounts() for all fs types
[PATCH] sanitize locking in mark_mounts_for_expiry() and shrink_submounts()
[PATCH] count ghost references to vfsmounts
[PATCH] reduce stack footprint in namespace.c

+71 -146
-1
fs/afs/internal.h
··· 573 573 574 574 extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); 575 575 extern void afs_mntpt_kill_timer(void); 576 - extern void afs_umount_begin(struct vfsmount *, int); 577 576 578 577 /* 579 578 * proc.c
-8
fs/afs/mntpt.c
··· 283 283 cancel_delayed_work(&afs_mntpt_expiry_timer); 284 284 flush_scheduled_work(); 285 285 } 286 - 287 - /* 288 - * begin unmount by attempting to remove all automounted mountpoints we added 289 - */ 290 - void afs_umount_begin(struct vfsmount *vfsmnt, int flags) 291 - { 292 - shrink_submounts(vfsmnt, &afs_vfsmounts); 293 - }
-1
fs/afs/super.c
··· 50 50 .write_inode = afs_write_inode, 51 51 .destroy_inode = afs_destroy_inode, 52 52 .clear_inode = afs_clear_inode, 53 - .umount_begin = afs_umount_begin, 54 53 .put_super = afs_put_super, 55 54 .show_options = generic_show_options, 56 55 };
-1
fs/cifs/cifs_dfs_ref.c
··· 33 33 { 34 34 mark_mounts_for_expiry(&cifs_dfs_automount_list); 35 35 mark_mounts_for_expiry(&cifs_dfs_automount_list); 36 - shrink_submounts(vfsmnt, &cifs_dfs_automount_list); 37 36 } 38 37 39 38 /**
+69 -131
fs/namespace.c
··· 155 155 } 156 156 } 157 157 158 - static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 158 + static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 159 159 { 160 - old_nd->path.dentry = mnt->mnt_mountpoint; 161 - old_nd->path.mnt = mnt->mnt_parent; 160 + old_path->dentry = mnt->mnt_mountpoint; 161 + old_path->mnt = mnt->mnt_parent; 162 162 mnt->mnt_parent = mnt; 163 163 mnt->mnt_mountpoint = mnt->mnt_root; 164 164 list_del_init(&mnt->mnt_child); 165 165 list_del_init(&mnt->mnt_hash); 166 - old_nd->path.dentry->d_mounted--; 166 + old_path->dentry->d_mounted--; 167 167 } 168 168 169 169 void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, ··· 174 174 dentry->d_mounted++; 175 175 } 176 176 177 - static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 177 + static void attach_mnt(struct vfsmount *mnt, struct path *path) 178 178 { 179 - mnt_set_mountpoint(nd->path.mnt, nd->path.dentry, mnt); 179 + mnt_set_mountpoint(path->mnt, path->dentry, mnt); 180 180 list_add_tail(&mnt->mnt_hash, mount_hashtable + 181 - hash(nd->path.mnt, nd->path.dentry)); 182 - list_add_tail(&mnt->mnt_child, &nd->path.mnt->mnt_mounts); 181 + hash(path->mnt, path->dentry)); 182 + list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); 183 183 } 184 184 185 185 /* ··· 262 262 /* stick the duplicate mount on the same expiry list 263 263 * as the original if that was on one */ 264 264 if (flag & CL_EXPIRE) { 265 - spin_lock(&vfsmount_lock); 266 265 if (!list_empty(&old->mnt_expire)) 267 266 list_add(&mnt->mnt_expire, &old->mnt_expire); 268 - spin_unlock(&vfsmount_lock); 269 267 } 270 268 } 271 269 return mnt; ··· 546 548 m = mnt->mnt_parent; 547 549 mnt->mnt_mountpoint = mnt->mnt_root; 548 550 mnt->mnt_parent = mnt; 551 + m->mnt_ghosts--; 549 552 spin_unlock(&vfsmount_lock); 550 553 dput(dentry); 551 554 mntput(m); ··· 571 572 __touch_mnt_namespace(p->mnt_ns); 572 573 p->mnt_ns = NULL; 573 574 list_del_init(&p->mnt_child); 574 - if (p->mnt_parent != p) 575 + if (p->mnt_parent != p) { 576 + p->mnt_parent->mnt_ghosts++; 575 577 p->mnt_mountpoint->d_mounted--; 578 + } 576 579 change_mnt_propagation(p, MS_PRIVATE); 577 580 } 578 581 } 582 + 583 + static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); 579 584 580 585 static int do_umount(struct vfsmount *mnt, int flags) 581 586 { ··· 652 649 down_write(&namespace_sem); 653 650 spin_lock(&vfsmount_lock); 654 651 event++; 652 + 653 + if (!(flags & MNT_DETACH)) 654 + shrink_submounts(mnt, &umount_list); 655 655 656 656 retval = -EBUSY; 657 657 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { ··· 750 744 int flag) 751 745 { 752 746 struct vfsmount *res, *p, *q, *r, *s; 753 - struct nameidata nd; 747 + struct path path; 754 748 755 749 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 756 750 return NULL; ··· 775 769 q = q->mnt_parent; 776 770 } 777 771 p = s; 778 - nd.path.mnt = q; 779 - nd.path.dentry = p->mnt_mountpoint; 772 + path.mnt = q; 773 + path.dentry = p->mnt_mountpoint; 780 774 q = clone_mnt(p, p->mnt_root, flag); 781 775 if (!q) 782 776 goto Enomem; 783 777 spin_lock(&vfsmount_lock); 784 778 list_add_tail(&q->mnt_list, &res->mnt_list); 785 - attach_mnt(q, &nd); 779 + attach_mnt(q, &path); 786 780 spin_unlock(&vfsmount_lock); 787 781 } 788 782 } ··· 882 876 * in allocations. 883 877 */ 884 878 static int attach_recursive_mnt(struct vfsmount *source_mnt, 885 - struct nameidata *nd, struct nameidata *parent_nd) 879 + struct path *path, struct path *parent_path) 886 880 { 887 881 LIST_HEAD(tree_list); 888 - struct vfsmount *dest_mnt = nd->path.mnt; 889 - struct dentry *dest_dentry = nd->path.dentry; 882 + struct vfsmount *dest_mnt = path->mnt; 883 + struct dentry *dest_dentry = path->dentry; 890 884 struct vfsmount *child, *p; 891 885 892 886 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) ··· 898 892 } 899 893 900 894 spin_lock(&vfsmount_lock); 901 - if (parent_nd) { 902 - detach_mnt(source_mnt, parent_nd); 903 - attach_mnt(source_mnt, nd); 895 + if (parent_path) { 896 + detach_mnt(source_mnt, parent_path); 897 + attach_mnt(source_mnt, path); 904 898 touch_mnt_namespace(current->nsproxy->mnt_ns); 905 899 } else { 906 900 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); ··· 936 930 937 931 err = -ENOENT; 938 932 if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) 939 - err = attach_recursive_mnt(mnt, nd, NULL); 933 + err = attach_recursive_mnt(mnt, &nd->path, NULL); 940 934 out_unlock: 941 935 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 942 936 if (!err) ··· 1065 1059 */ 1066 1060 static noinline int do_move_mount(struct nameidata *nd, char *old_name) 1067 1061 { 1068 - struct nameidata old_nd, parent_nd; 1062 + struct nameidata old_nd; 1063 + struct path parent_path; 1069 1064 struct vfsmount *p; 1070 1065 int err = 0; 1071 1066 if (!capable(CAP_SYS_ADMIN)) ··· 1121 1114 if (p == old_nd.path.mnt) 1122 1115 goto out1; 1123 1116 1124 - err = attach_recursive_mnt(old_nd.path.mnt, nd, &parent_nd); 1117 + err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); 1125 1118 if (err) 1126 1119 goto out1; 1127 1120 1128 - spin_lock(&vfsmount_lock); 1129 1121 /* if the mount is moved, it should no longer be expire 1130 1122 * automatically */ 1131 1123 list_del_init(&old_nd.path.mnt->mnt_expire); 1132 - spin_unlock(&vfsmount_lock); 1133 1124 out1: 1134 1125 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 1135 1126 out: 1136 1127 up_write(&namespace_sem); 1137 1128 if (!err) 1138 - path_put(&parent_nd.path); 1129 + path_put(&parent_path); 1139 1130 path_put(&old_nd.path); 1140 1131 return err; 1141 1132 } ··· 1194 1189 if ((err = graft_tree(newmnt, nd))) 1195 1190 goto unlock; 1196 1191 1197 - if (fslist) { 1198 - /* add to the specified expiration list */ 1199 - spin_lock(&vfsmount_lock); 1192 + if (fslist) /* add to the specified expiration list */ 1200 1193 list_add_tail(&newmnt->mnt_expire, fslist); 1201 - spin_unlock(&vfsmount_lock); 1202 - } 1194 + 1203 1195 up_write(&namespace_sem); 1204 1196 return 0; 1205 1197 ··· 1208 1206 1209 1207 EXPORT_SYMBOL_GPL(do_add_mount); 1210 1208 1211 - static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, 1212 - struct list_head *umounts) 1213 - { 1214 - spin_lock(&vfsmount_lock); 1215 - 1216 - /* 1217 - * Check if mount is still attached, if not, let whoever holds it deal 1218 - * with the sucker 1219 - */ 1220 - if (mnt->mnt_parent == mnt) { 1221 - spin_unlock(&vfsmount_lock); 1222 - return; 1223 - } 1224 - 1225 - /* 1226 - * Check that it is still dead: the count should now be 2 - as 1227 - * contributed by the vfsmount parent and the mntget above 1228 - */ 1229 - if (!propagate_mount_busy(mnt, 2)) { 1230 - /* delete from the namespace */ 1231 - touch_mnt_namespace(mnt->mnt_ns); 1232 - list_del_init(&mnt->mnt_list); 1233 - mnt->mnt_ns = NULL; 1234 - umount_tree(mnt, 1, umounts); 1235 - spin_unlock(&vfsmount_lock); 1236 - } else { 1237 - /* 1238 - * Someone brought it back to life whilst we didn't have any 1239 - * locks held so return it to the expiration list 1240 - */ 1241 - list_add_tail(&mnt->mnt_expire, mounts); 1242 - spin_unlock(&vfsmount_lock); 1243 - } 1244 - } 1245 - 1246 - /* 1247 - * go through the vfsmounts we've just consigned to the graveyard to 1248 - * - check that they're still dead 1249 - * - delete the vfsmount from the appropriate namespace under lock 1250 - * - dispose of the corpse 1251 - */ 1252 - static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts) 1253 - { 1254 - struct mnt_namespace *ns; 1255 - struct vfsmount *mnt; 1256 - 1257 - while (!list_empty(graveyard)) { 1258 - LIST_HEAD(umounts); 1259 - mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire); 1260 - list_del_init(&mnt->mnt_expire); 1261 - 1262 - /* don't do anything if the namespace is dead - all the 1263 - * vfsmounts from it are going away anyway */ 1264 - ns = mnt->mnt_ns; 1265 - if (!ns || !ns->root) 1266 - continue; 1267 - get_mnt_ns(ns); 1268 - 1269 - spin_unlock(&vfsmount_lock); 1270 - down_write(&namespace_sem); 1271 - expire_mount(mnt, mounts, &umounts); 1272 - up_write(&namespace_sem); 1273 - release_mounts(&umounts); 1274 - mntput(mnt); 1275 - put_mnt_ns(ns); 1276 - spin_lock(&vfsmount_lock); 1277 - } 1278 - } 1279 - 1280 1209 /* 1281 1210 * process a list of expirable mountpoints with the intent of discarding any 1282 1211 * mountpoints that aren't in use and haven't been touched since last we came ··· 1217 1284 { 1218 1285 struct vfsmount *mnt, *next; 1219 1286 LIST_HEAD(graveyard); 1287 + LIST_HEAD(umounts); 1220 1288 1221 1289 if (list_empty(mounts)) 1222 1290 return; 1223 1291 1292 + down_write(&namespace_sem); 1224 1293 spin_lock(&vfsmount_lock); 1225 1294 1226 1295 /* extract from the expiration list every vfsmount that matches the ··· 1233 1298 */ 1234 1299 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { 1235 1300 if (!xchg(&mnt->mnt_expiry_mark, 1) || 1236 - atomic_read(&mnt->mnt_count) != 1) 1301 + propagate_mount_busy(mnt, 1)) 1237 1302 continue; 1238 - 1239 - mntget(mnt); 1240 1303 list_move(&mnt->mnt_expire, &graveyard); 1241 1304 } 1242 - 1243 - expire_mount_list(&graveyard, mounts); 1244 - 1305 + while (!list_empty(&graveyard)) { 1306 + mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire); 1307 + touch_mnt_namespace(mnt->mnt_ns); 1308 + umount_tree(mnt, 1, &umounts); 1309 + } 1245 1310 spin_unlock(&vfsmount_lock); 1311 + up_write(&namespace_sem); 1312 + 1313 + release_mounts(&umounts); 1246 1314 } 1247 1315 1248 1316 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); ··· 1281 1343 } 1282 1344 1283 1345 if (!propagate_mount_busy(mnt, 1)) { 1284 - mntget(mnt); 1285 1346 list_move_tail(&mnt->mnt_expire, graveyard); 1286 1347 found++; 1287 1348 } ··· 1300 1363 * process a list of expirable mountpoints with the intent of discarding any 1301 1364 * submounts of a specific parent mountpoint 1302 1365 */ 1303 - void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) 1366 + static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 1304 1367 { 1305 1368 LIST_HEAD(graveyard); 1306 - int found; 1307 - 1308 - spin_lock(&vfsmount_lock); 1369 + struct vfsmount *m; 1309 1370 1310 1371 /* extract submounts of 'mountpoint' from the expiration list */ 1311 - while ((found = select_submounts(mountpoint, &graveyard)) != 0) 1312 - expire_mount_list(&graveyard, mounts); 1313 - 1314 - spin_unlock(&vfsmount_lock); 1372 + while (select_submounts(mnt, &graveyard)) { 1373 + while (!list_empty(&graveyard)) { 1374 + m = list_first_entry(&graveyard, struct vfsmount, 1375 + mnt_expire); 1376 + touch_mnt_namespace(mnt->mnt_ns); 1377 + umount_tree(mnt, 1, umounts); 1378 + } 1379 + } 1315 1380 } 1316 - 1317 - EXPORT_SYMBOL_GPL(shrink_submounts); 1318 1381 1319 1382 /* 1320 1383 * Some copy_from_user() implementations do not return the exact number of ··· 1620 1683 path_put(&old_pwd); 1621 1684 } 1622 1685 1623 - static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) 1686 + static void chroot_fs_refs(struct path *old_root, struct path *new_root) 1624 1687 { 1625 1688 struct task_struct *g, *p; 1626 1689 struct fs_struct *fs; ··· 1632 1695 if (fs) { 1633 1696 atomic_inc(&fs->count); 1634 1697 task_unlock(p); 1635 - if (fs->root.dentry == old_nd->path.dentry 1636 - && fs->root.mnt == old_nd->path.mnt) 1637 - set_fs_root(fs, &new_nd->path); 1638 - if (fs->pwd.dentry == old_nd->path.dentry 1639 - && fs->pwd.mnt == old_nd->path.mnt) 1640 - set_fs_pwd(fs, &new_nd->path); 1698 + if (fs->root.dentry == old_root->dentry 1699 + && fs->root.mnt == old_root->mnt) 1700 + set_fs_root(fs, new_root); 1701 + if (fs->pwd.dentry == old_root->dentry 1702 + && fs->pwd.mnt == old_root->mnt) 1703 + set_fs_pwd(fs, new_root); 1641 1704 put_fs_struct(fs); 1642 1705 } else 1643 1706 task_unlock(p); ··· 1674 1737 const char __user * put_old) 1675 1738 { 1676 1739 struct vfsmount *tmp; 1677 - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1740 + struct nameidata new_nd, old_nd, user_nd; 1741 + struct path parent_path, root_parent; 1678 1742 int error; 1679 1743 1680 1744 if (!capable(CAP_SYS_ADMIN)) ··· 1749 1811 goto out3; 1750 1812 } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) 1751 1813 goto out3; 1752 - detach_mnt(new_nd.path.mnt, &parent_nd); 1814 + detach_mnt(new_nd.path.mnt, &parent_path); 1753 1815 detach_mnt(user_nd.path.mnt, &root_parent); 1754 1816 /* mount old root on put_old */ 1755 - attach_mnt(user_nd.path.mnt, &old_nd); 1817 + attach_mnt(user_nd.path.mnt, &old_nd.path); 1756 1818 /* mount new_root on / */ 1757 1819 attach_mnt(new_nd.path.mnt, &root_parent); 1758 1820 touch_mnt_namespace(current->nsproxy->mnt_ns); 1759 1821 spin_unlock(&vfsmount_lock); 1760 - chroot_fs_refs(&user_nd, &new_nd); 1822 + chroot_fs_refs(&user_nd.path, &new_nd.path); 1761 1823 security_sb_post_pivotroot(&user_nd, &new_nd); 1762 1824 error = 0; 1763 - path_put(&root_parent.path); 1764 - path_put(&parent_nd.path); 1825 + path_put(&root_parent); 1826 + path_put(&parent_path); 1765 1827 out2: 1766 1828 mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); 1767 1829 up_write(&namespace_sem);
-2
fs/nfs/super.c
··· 589 589 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); 590 590 struct rpc_clnt *rpc; 591 591 592 - shrink_submounts(vfsmnt, &nfs_automount_list); 593 - 594 592 if (!(flags & MNT_FORCE)) 595 593 return; 596 594 /* -EIO all pending I/O */
+1 -1
fs/pnode.c
··· 225 225 */ 226 226 static inline int do_refcount_check(struct vfsmount *mnt, int count) 227 227 { 228 - int mycount = atomic_read(&mnt->mnt_count); 228 + int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; 229 229 return (mycount > count); 230 230 } 231 231
+1 -1
include/linux/mount.h
··· 61 61 atomic_t mnt_count; 62 62 int mnt_expiry_mark; /* true if marked for expiry */ 63 63 int mnt_pinned; 64 + int mnt_ghosts; 64 65 }; 65 66 66 67 static inline struct vfsmount *mntget(struct vfsmount *mnt) ··· 99 98 int mnt_flags, struct list_head *fslist); 100 99 101 100 extern void mark_mounts_for_expiry(struct list_head *mounts); 102 - extern void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts); 103 101 104 102 extern spinlock_t vfsmount_lock; 105 103 extern dev_t name_to_dev_t(char *name);