Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

+53 -1

fs/namespace.c

··· 798 798 } 799 799 800 800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 801 + /* Don't allow unprivileged users to change mount flags */ 802 + if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 803 + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 804 + 801 805 atomic_inc(&sb->s_active); 802 806 mnt->mnt.mnt_sb = sb; 803 807 mnt->mnt.mnt_root = dget(root); ··· 1717 1713 if (readonly_request == __mnt_is_readonly(mnt)) 1718 1714 return 0; 1719 1715 1716 + if (mnt->mnt_flags & MNT_LOCK_READONLY) 1717 + return -EPERM; 1718 + 1720 1719 if (readonly_request) 1721 1720 error = mnt_make_readonly(real_mount(mnt)); 1722 1721 else ··· 2346 2339 /* First pass: copy the tree topology */ 2347 2340 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2348 2341 if (user_ns != mnt_ns->user_ns) 2349 - copy_flags |= CL_SHARED_TO_SLAVE; 2342 + copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2350 2343 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2351 2344 if (IS_ERR(new)) { 2352 2345 up_write(&namespace_sem); ··· 2737 2730 bool our_mnt(struct vfsmount *mnt) 2738 2731 { 2739 2732 return check_mnt(real_mount(mnt)); 2733 + } 2734 + 2735 + bool current_chrooted(void) 2736 + { 2737 + /* Does the current process have a non-standard root */ 2738 + struct path ns_root; 2739 + struct path fs_root; 2740 + bool chrooted; 2741 + 2742 + /* Find the namespace root */ 2743 + ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt; 2744 + ns_root.dentry = ns_root.mnt->mnt_root; 2745 + path_get(&ns_root); 2746 + while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) 2747 + ; 2748 + 2749 + get_fs_root(current->fs, &fs_root); 2750 + 2751 + chrooted = !path_equal(&fs_root, &ns_root); 2752 + 2753 + path_put(&fs_root); 2754 + path_put(&ns_root); 2755 + 2756 + return chrooted; 2757 + } 2758 + 2759 + void update_mnt_policy(struct user_namespace *userns) 2760 + { 2761 + struct mnt_namespace *ns = current->nsproxy->mnt_ns; 2762 + struct mount *mnt; 2763 + 2764 + down_read(&namespace_sem); 2765 + list_for_each_entry(mnt, &ns->list, mnt_list) { 2766 + switch (mnt->mnt.mnt_sb->s_magic) { 2767 + case SYSFS_MAGIC: 2768 + userns->may_mount_sysfs = true; 2769 + break; 2770 + case PROC_SUPER_MAGIC: 2771 + userns->may_mount_proc = true; 2772 + break; 2773 + } 2774 + if (userns->may_mount_sysfs && userns->may_mount_proc) 2775 + break; 2776 + } 2777 + up_read(&namespace_sem); 2740 2778 } 2741 2779 2742 2780 static void *mntns_get(struct task_struct *task)

+6

fs/pnode.c

··· 9 9 #include <linux/mnt_namespace.h> 10 10 #include <linux/mount.h> 11 11 #include <linux/fs.h> 12 + #include <linux/nsproxy.h> 12 13 #include "internal.h" 13 14 #include "pnode.h" 14 15 ··· 221 220 int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, 222 221 struct mount *source_mnt, struct list_head *tree_list) 223 222 { 223 + struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 224 224 struct mount *m, *child; 225 225 int ret = 0; 226 226 struct mount *prev_dest_mnt = dest_mnt; ··· 238 236 continue; 239 237 240 238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 239 + 240 + /* Notice when we are propagating across user namespaces */ 241 + if (m->mnt_ns->user_ns != user_ns) 242 + type |= CL_UNPRIVILEGED; 241 243 242 244 child = copy_tree(source, source->mnt.mnt_root, type); 243 245 if (IS_ERR(child)) {

+1

fs/pnode.h

··· 23 23 #define CL_MAKE_SHARED 0x08 24 24 #define CL_PRIVATE 0x10 25 25 #define CL_SHARED_TO_SLAVE 0x20 26 + #define CL_UNPRIVILEGED 0x40 26 27 27 28 static inline void set_mnt_shared(struct mount *mnt) 28 29 {

+4

fs/proc/root.c

··· 16 16 #include <linux/sched.h> 17 17 #include <linux/module.h> 18 18 #include <linux/bitops.h> 19 + #include <linux/user_namespace.h> 19 20 #include <linux/mount.h> 20 21 #include <linux/pid_namespace.h> 21 22 #include <linux/parser.h> ··· 109 108 } else { 110 109 ns = task_active_pid_ns(current); 111 110 options = data; 111 + 112 + if (!current_user_ns()->may_mount_proc) 113 + return ERR_PTR(-EPERM); 112 114 } 113 115 114 116 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);

+4

fs/sysfs/mount.c

··· 19 19 #include <linux/module.h> 20 20 #include <linux/magic.h> 21 21 #include <linux/slab.h> 22 + #include <linux/user_namespace.h> 22 23 23 24 #include "sysfs.h" 24 25 ··· 111 110 enum kobj_ns_type type; 112 111 struct super_block *sb; 113 112 int error; 113 + 114 + if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) 115 + return ERR_PTR(-EPERM); 114 116 115 117 info = kzalloc(sizeof(*info), GFP_KERNEL); 116 118 if (!info)

+2

include/linux/fs_struct.h

··· 50 50 spin_unlock(&fs->lock); 51 51 } 52 52 53 + extern bool current_chrooted(void); 54 + 53 55 #endif /* _LINUX_FS_STRUCT_H */

+2

include/linux/mount.h

··· 47 47 48 48 #define MNT_INTERNAL 0x4000 49 49 50 + #define MNT_LOCK_READONLY 0x400000 51 + 50 52 struct vfsmount { 51 53 struct dentry *mnt_root; /* root of the mounted tree */ 52 54 struct super_block *mnt_sb; /* pointer to superblock */

+4

include/linux/user_namespace.h

··· 26 26 kuid_t owner; 27 27 kgid_t group; 28 28 unsigned int proc_inum; 29 + bool may_mount_sysfs; 30 + bool may_mount_proc; 29 31 }; 30 32 31 33 extern struct user_namespace init_user_ns; ··· 83 81 } 84 82 85 83 #endif 84 + 85 + void update_mnt_policy(struct user_namespace *userns); 86 86 87 87 #endif /* _LINUX_USER_H */

+10 -2

ipc/mqueue.c

··· 330 330 int flags, const char *dev_name, 331 331 void *data) 332 332 { 333 - if (!(flags & MS_KERNMOUNT)) 334 - data = current->nsproxy->ipc_ns; 333 + if (!(flags & MS_KERNMOUNT)) { 334 + struct ipc_namespace *ns = current->nsproxy->ipc_ns; 335 + /* Don't allow mounting unless the caller has CAP_SYS_ADMIN 336 + * over the ipc namespace. 337 + */ 338 + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) 339 + return ERR_PTR(-EPERM); 340 + 341 + data = ns; 342 + } 335 343 return mount_ns(fs_type, flags, data, mqueue_fill_super); 336 344 } 337 345

+2 -1

kernel/pid_namespace.c

··· 181 181 int nr; 182 182 int rc; 183 183 struct task_struct *task, *me = current; 184 + int init_pids = thread_group_leader(me) ? 1 : 2; 184 185 185 186 /* Don't allow any more processes into the pid namespace */ 186 187 disable_pid_allocation(pid_ns); ··· 231 230 */ 232 231 for (;;) { 233 232 set_current_state(TASK_UNINTERRUPTIBLE); 234 - if (pid_ns->nr_hashed == 1) 233 + if (pid_ns->nr_hashed == init_pids) 235 234 break; 236 235 schedule(); 237 236 }

+2

kernel/user.c

··· 51 51 .owner = GLOBAL_ROOT_UID, 52 52 .group = GLOBAL_ROOT_GID, 53 53 .proc_inum = PROC_USER_INIT_INO, 54 + .may_mount_sysfs = true, 55 + .may_mount_proc = true, 54 56 }; 55 57 EXPORT_SYMBOL_GPL(init_user_ns); 56 58

+11

kernel/user_namespace.c

··· 61 61 kgid_t group = new->egid; 62 62 int ret; 63 63 64 + /* 65 + * Verify that we can not violate the policy of which files 66 + * may be accessed that is specified by the root directory, 67 + * by verifing that the root directory is at the root of the 68 + * mount namespace which allows all files to be accessed. 69 + */ 70 + if (current_chrooted()) 71 + return -EPERM; 72 + 64 73 /* The creator needs a mapping in the parent user namespace 65 74 * or else we won't be able to reasonably tell userspace who 66 75 * created a user_namespace. ··· 95 86 ns->group = group; 96 87 97 88 set_cred_user_ns(new, ns); 89 + 90 + update_mnt_policy(ns); 98 91 99 92 return 0; 100 93 }

+3 -1

net/core/scm.c

··· 24 24 #include <linux/interrupt.h> 25 25 #include <linux/netdevice.h> 26 26 #include <linux/security.h> 27 + #include <linux/pid_namespace.h> 27 28 #include <linux/pid.h> 28 29 #include <linux/nsproxy.h> 29 30 #include <linux/slab.h> ··· 53 52 if (!uid_valid(uid) || !gid_valid(gid)) 54 53 return -EINVAL; 55 54 56 - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && 55 + if ((creds->pid == task_tgid_vnr(current) || 56 + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && 57 57 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 58 58 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && 59 59 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||

+1 -3

security/yama/yama_lsm.c

··· 347 347 /* Only disallow PTRACE_TRACEME on more aggressive settings. */ 348 348 switch (ptrace_scope) { 349 349 case YAMA_SCOPE_CAPABILITY: 350 - rcu_read_lock(); 351 - if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE)) 350 + if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) 352 351 rc = -EPERM; 353 - rcu_read_unlock(); 354 352 break; 355 353 case YAMA_SCOPE_NO_ATTACH: 356 354 rc = -EPERM;