Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfs: Add setns support for the mount namespace

setns support for the mount namespace is a little tricky as an
arbitrary decision must be made about what to set fs->root and
fs->pwd to, as there is no expectation of a relationship between
the two mount namespaces. Therefore I arbitrarily find the root
mount point, and follow every mount on top of it to find the top
of the mount stack. Then I set fs->root and fs->pwd to that
location. The topmost root of the mount stack seems like a
reasonable place to be.

Bind mount support for the mount namespace inodes has the
possibility of creating circular dependencies between mount
namespaces. Circular dependencies can result in loops that
prevent mount namespaces from every being freed. I avoid
creating those circular dependencies by adding a sequence number
to the mount namespace and require all bind mounts be of a
younger mount namespace into an older mount namespace.

Add a helper function proc_ns_inode so it is possible to
detect when we are attempting to bind mound a namespace inode.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

+108
+1
fs/mount.h
··· 6 6 atomic_t count; 7 7 struct mount * root; 8 8 struct list_head list; 9 + u64 seq; /* Sequence number to prevent loops */ 9 10 wait_queue_head_t poll; 10 11 int event; 11 12 };
+95
fs/namespace.c
··· 20 20 #include <linux/fs_struct.h> /* get_fs_root et.al. */ 21 21 #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22 22 #include <linux/uaccess.h> 23 + #include <linux/proc_fs.h> 23 24 #include "pnode.h" 24 25 #include "internal.h" 25 26 ··· 1309 1308 #endif 1310 1309 } 1311 1310 1311 + static bool mnt_ns_loop(struct path *path) 1312 + { 1313 + /* Could bind mounting the mount namespace inode cause a 1314 + * mount namespace loop? 1315 + */ 1316 + struct inode *inode = path->dentry->d_inode; 1317 + struct proc_inode *ei; 1318 + struct mnt_namespace *mnt_ns; 1319 + 1320 + if (!proc_ns_inode(inode)) 1321 + return false; 1322 + 1323 + ei = PROC_I(inode); 1324 + if (ei->ns_ops != &mntns_operations) 1325 + return false; 1326 + 1327 + mnt_ns = ei->ns; 1328 + return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 1329 + } 1330 + 1312 1331 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1313 1332 int flag) 1314 1333 { ··· 1675 1654 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); 1676 1655 if (err) 1677 1656 return err; 1657 + 1658 + err = -EINVAL; 1659 + if (mnt_ns_loop(&old_path)) 1660 + goto out; 1678 1661 1679 1662 err = lock_mount(path); 1680 1663 if (err) ··· 2286 2261 return retval; 2287 2262 } 2288 2263 2264 + /* 2265 + * Assign a sequence number so we can detect when we attempt to bind 2266 + * mount a reference to an older mount namespace into the current 2267 + * mount namespace, preventing reference counting loops. A 64bit 2268 + * number incrementing at 10Ghz will take 12,427 years to wrap which 2269 + * is effectively never, so we can ignore the possibility. 2270 + */ 2271 + static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); 2272 + 2289 2273 static struct mnt_namespace *alloc_mnt_ns(void) 2290 2274 { 2291 2275 struct mnt_namespace *new_ns; ··· 2302 2268 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2303 2269 if (!new_ns) 2304 2270 return ERR_PTR(-ENOMEM); 2271 + new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); 2305 2272 atomic_set(&new_ns->count, 1); 2306 2273 new_ns->root = NULL; 2307 2274 INIT_LIST_HEAD(&new_ns->list); ··· 2716 2681 { 2717 2682 return check_mnt(real_mount(mnt)); 2718 2683 } 2684 + 2685 + static void *mntns_get(struct task_struct *task) 2686 + { 2687 + struct mnt_namespace *ns = NULL; 2688 + struct nsproxy *nsproxy; 2689 + 2690 + rcu_read_lock(); 2691 + nsproxy = task_nsproxy(task); 2692 + if (nsproxy) { 2693 + ns = nsproxy->mnt_ns; 2694 + get_mnt_ns(ns); 2695 + } 2696 + rcu_read_unlock(); 2697 + 2698 + return ns; 2699 + } 2700 + 2701 + static void mntns_put(void *ns) 2702 + { 2703 + put_mnt_ns(ns); 2704 + } 2705 + 2706 + static int mntns_install(struct nsproxy *nsproxy, void *ns) 2707 + { 2708 + struct fs_struct *fs = current->fs; 2709 + struct mnt_namespace *mnt_ns = ns; 2710 + struct path root; 2711 + 2712 + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT)) 2713 + return -EINVAL; 2714 + 2715 + if (fs->users != 1) 2716 + return -EINVAL; 2717 + 2718 + get_mnt_ns(mnt_ns); 2719 + put_mnt_ns(nsproxy->mnt_ns); 2720 + nsproxy->mnt_ns = mnt_ns; 2721 + 2722 + /* Find the root */ 2723 + root.mnt = &mnt_ns->root->mnt; 2724 + root.dentry = mnt_ns->root->mnt.mnt_root; 2725 + path_get(&root); 2726 + while(d_mountpoint(root.dentry) && follow_down_one(&root)) 2727 + ; 2728 + 2729 + /* Update the pwd and root */ 2730 + set_fs_pwd(fs, &root); 2731 + set_fs_root(fs, &root); 2732 + 2733 + path_put(&root); 2734 + return 0; 2735 + } 2736 + 2737 + const struct proc_ns_operations mntns_operations = { 2738 + .name = "mnt", 2739 + .type = CLONE_NEWNS, 2740 + .get = mntns_get, 2741 + .put = mntns_put, 2742 + .install = mntns_install, 2743 + };
+5
fs/proc/namespaces.c
··· 27 27 #ifdef CONFIG_PID_NS 28 28 &pidns_operations, 29 29 #endif 30 + &mntns_operations, 30 31 }; 31 32 32 33 static const struct file_operations ns_file_operations = { ··· 202 201 return ERR_PTR(-EINVAL); 203 202 } 204 203 204 + bool proc_ns_inode(struct inode *inode) 205 + { 206 + return inode->i_fop == &ns_file_operations; 207 + }
+7
include/linux/proc_fs.h
··· 174 174 struct proc_dir_entry *parent); 175 175 176 176 extern struct file *proc_ns_fget(int fd); 177 + extern bool proc_ns_inode(struct inode *inode); 177 178 178 179 #else 179 180 ··· 230 229 return ERR_PTR(-EINVAL); 231 230 } 232 231 232 + static inline bool proc_ns_inode(struct inode *inode) 233 + { 234 + return false; 235 + } 236 + 233 237 #endif /* CONFIG_PROC_FS */ 234 238 235 239 #if !defined(CONFIG_PROC_KCORE) ··· 258 252 extern const struct proc_ns_operations utsns_operations; 259 253 extern const struct proc_ns_operations ipcns_operations; 260 254 extern const struct proc_ns_operations pidns_operations; 255 + extern const struct proc_ns_operations mntns_operations; 261 256 262 257 union proc_op { 263 258 int (*proc_get_link)(struct dentry *, struct path *);