Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace changes from Eric Biederman:
"This is an assorted mishmash of small cleanups, enhancements and bug
fixes.

The major theme is user namespace mount restrictions. nsown_capable
is killed as it encourages not thinking about details that need to be
considered. A very hard to hit pid namespace exiting bug was finally
tracked and fixed. A couple of cleanups to the basic namespace
infrastructure.

Finally there is an enhancement that makes per user namespace
capabilities usable as capabilities, and an enhancement that allows
the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
userns: Kill nsown_capable it makes the wrong thing easy
capabilities: allow nice if we are privileged
pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
userns: Allow PR_CAPBSET_DROP in a user namespace.
namespaces: Simplify copy_namespaces so it is clear what is going on.
pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
sysfs: Restrict mounting sysfs
userns: Better restrictions on when proc and sysfs can be mounted
vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
kernel/nsproxy.c: Improving a snippet of code.
proc: Restrict mounting the proc filesystem
vfs: Lock in place mounts from more privileged users

Linus Torvalds 12 years ago c7c4591d 11c7b03d

+177 -104

27 changed files

expand all collapse all

namespace.c

open.c

pnode.h

proc

root.c

sysfs

mount.c

include

linux

capability.h

fs.h

kobject_ns.h

mount.h

user_namespace.h

ipc

namespace.c

kernel

capability.c

fork.c

groups.c

nsproxy.c

pid.c

pid_namespace.c

sys.c

uid16.c

user.c

user_namespace.c

utsname.c

lib

kobject.c

net

core

net-sysfs.c

net_namespace.c

scm.c

security

commoncap.c

+95 -26

fs/namespace.c

reviewed

··· 831 831 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 832 832 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 833 833 834 834 + /* Don't allow unprivileged users to reveal what is under a mount */ 835 835 + if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 836 836 + mnt->mnt.mnt_flags |= MNT_LOCKED; 837 837 + 834 838 atomic_inc(&sb->s_active); 835 839 mnt->mnt.mnt_sb = sb; 836 840 mnt->mnt.mnt_root = dget(root); ··· 1331 1327 goto dput_and_out; 1332 1328 if (!check_mnt(mnt)) 1333 1329 goto dput_and_out; 1330 1330 + if (mnt->mnt.mnt_flags & MNT_LOCKED) 1331 1331 + goto dput_and_out; 1334 1332 1335 1333 retval = do_umount(mnt, flags); 1336 1334 dput_and_out: ··· 1355 1349 1356 1350 #endif 1357 1351 1358 1358 - static bool mnt_ns_loop(struct path *path) 1352 1352 + static bool is_mnt_ns_file(struct dentry *dentry) 1359 1353 { 1360 1360 - /* Could bind mounting the mount namespace inode cause a 1361 1361 - * mount namespace loop? 1362 1362 - */ 1363 1363 - struct inode *inode = path->dentry->d_inode; 1354 1354 + /* Is this a proxy for a mount namespace? */ 1355 1355 + struct inode *inode = dentry->d_inode; 1364 1356 struct proc_ns *ei; 1365 1365 - struct mnt_namespace *mnt_ns; 1366 1357 1367 1358 if (!proc_ns_inode(inode)) 1368 1359 return false; ··· 1368 1365 if (ei->ns_ops != &mntns_operations) 1369 1366 return false; 1370 1367 1371 1371 - mnt_ns = ei->ns; 1368 1368 + return true; 1369 1369 + } 1370 1370 + 1371 1371 + static bool mnt_ns_loop(struct dentry *dentry) 1372 1372 + { 1373 1373 + /* Could bind mounting the mount namespace inode cause a 1374 1374 + * mount namespace loop? 1375 1375 + */ 1376 1376 + struct mnt_namespace *mnt_ns; 1377 1377 + if (!is_mnt_ns_file(dentry)) 1378 1378 + return false; 1379 1379 + 1380 1380 + mnt_ns = get_proc_ns(dentry->d_inode)->ns; 1372 1381 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 1373 1382 } 1374 1383 ··· 1389 1374 { 1390 1375 struct mount *res, *p, *q, *r, *parent; 1391 1376 1392 1392 - if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1377 1377 + if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt)) 1378 1378 + return ERR_PTR(-EINVAL); 1379 1379 + 1380 1380 + if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry)) 1393 1381 return ERR_PTR(-EINVAL); 1394 1382 1395 1383 res = q = clone_mnt(mnt, dentry, flag); 1396 1384 if (IS_ERR(q)) 1397 1385 return q; 1398 1386 1387 1387 + q->mnt.mnt_flags &= ~MNT_LOCKED; 1399 1388 q->mnt_mountpoint = mnt->mnt_mountpoint; 1400 1389 1401 1390 p = mnt; ··· 1409 1390 continue; 1410 1391 1411 1392 for (s = r; s; s = next_mnt(s, r)) { 1412 1412 - if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) { 1393 1393 + if (!(flag & CL_COPY_UNBINDABLE) && 1394 1394 + IS_MNT_UNBINDABLE(s)) { 1395 1395 + s = skip_mnt_tree(s); 1396 1396 + continue; 1397 1397 + } 1398 1398 + if (!(flag & CL_COPY_MNT_NS_FILE) && 1399 1399 + is_mnt_ns_file(s->mnt.mnt_root)) { 1413 1400 s = skip_mnt_tree(s); 1414 1401 continue; 1415 1402 } ··· 1721 1696 return err; 1722 1697 } 1723 1698 1699 1699 + static bool has_locked_children(struct mount *mnt, struct dentry *dentry) 1700 1700 + { 1701 1701 + struct mount *child; 1702 1702 + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { 1703 1703 + if (!is_subdir(child->mnt_mountpoint, dentry)) 1704 1704 + continue; 1705 1705 + 1706 1706 + if (child->mnt.mnt_flags & MNT_LOCKED) 1707 1707 + return true; 1708 1708 + } 1709 1709 + return false; 1710 1710 + } 1711 1711 + 1724 1712 /* 1725 1713 * do loopback mount. 1726 1714 */ ··· 1751 1713 return err; 1752 1714 1753 1715 err = -EINVAL; 1754 1754 - if (mnt_ns_loop(&old_path)) 1716 1716 + if (mnt_ns_loop(old_path.dentry)) 1755 1717 goto out; 1756 1718 1757 1719 mp = lock_mount(path); ··· 1769 1731 if (!check_mnt(parent) || !check_mnt(old)) 1770 1732 goto out2; 1771 1733 1734 1734 + if (!recurse && has_locked_children(old, old_path.dentry)) 1735 1735 + goto out2; 1736 1736 + 1772 1737 if (recurse) 1773 1773 - mnt = copy_tree(old, old_path.dentry, 0); 1738 1738 + mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); 1774 1739 else 1775 1740 mnt = clone_mnt(old, old_path.dentry, 0); 1776 1741 ··· 1781 1740 err = PTR_ERR(mnt); 1782 1741 goto out2; 1783 1742 } 1743 1743 + 1744 1744 + mnt->mnt.mnt_flags &= ~MNT_LOCKED; 1784 1745 1785 1746 err = graft_tree(mnt, parent, mp); 1786 1747 if (err) { ··· 1894 1851 1895 1852 err = -EINVAL; 1896 1853 if (!check_mnt(p) || !check_mnt(old)) 1854 1854 + goto out1; 1855 1855 + 1856 1856 + if (old->mnt.mnt_flags & MNT_LOCKED) 1897 1857 goto out1; 1898 1858 1899 1859 err = -EINVAL; ··· 2435 2389 2436 2390 namespace_lock(); 2437 2391 /* First pass: copy the tree topology */ 2438 2438 - copy_flags = CL_COPY_ALL | CL_EXPIRE; 2392 2392 + copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 2439 2393 if (user_ns != mnt_ns->user_ns) 2440 2394 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2441 2395 new = copy_tree(old, old->mnt.mnt_root, copy_flags); ··· 2470 2424 } 2471 2425 p = next_mnt(p, old); 2472 2426 q = next_mnt(q, new); 2427 2427 + if (!q) 2428 2428 + break; 2429 2429 + while (p->mnt.mnt_root != q->mnt.mnt_root) 2430 2430 + p = next_mnt(p, old); 2473 2431 } 2474 2432 namespace_unlock(); 2475 2433 ··· 2680 2630 goto out4; 2681 2631 if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 2682 2632 goto out4; 2633 2633 + if (new_mnt->mnt.mnt_flags & MNT_LOCKED) 2634 2634 + goto out4; 2683 2635 error = -ENOENT; 2684 2636 if (d_unlinked(new.dentry)) 2685 2637 goto out4; ··· 2705 2653 br_write_lock(&vfsmount_lock); 2706 2654 detach_mnt(new_mnt, &parent_path); 2707 2655 detach_mnt(root_mnt, &root_parent); 2656 2656 + if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { 2657 2657 + new_mnt->mnt.mnt_flags |= MNT_LOCKED; 2658 2658 + root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; 2659 2659 + } 2708 2660 /* mount old root on put_old */ 2709 2661 attach_mnt(root_mnt, old_mnt, old_mp); 2710 2662 /* mount new_root on / */ ··· 2867 2811 return chrooted; 2868 2812 } 2869 2813 2870 2870 - void update_mnt_policy(struct user_namespace *userns) 2814 2814 + bool fs_fully_visible(struct file_system_type *type) 2871 2815 { 2872 2816 struct mnt_namespace *ns = current->nsproxy->mnt_ns; 2873 2817 struct mount *mnt; 2818 2818 + bool visible = false; 2874 2819 2875 2875 - down_read(&namespace_sem); 2820 2820 + if (unlikely(!ns)) 2821 2821 + return false; 2822 2822 + 2823 2823 + namespace_lock(); 2876 2824 list_for_each_entry(mnt, &ns->list, mnt_list) { 2877 2877 - switch (mnt->mnt.mnt_sb->s_magic) { 2878 2878 - case SYSFS_MAGIC: 2879 2879 - userns->may_mount_sysfs = true; 2880 2880 - break; 2881 2881 - case PROC_SUPER_MAGIC: 2882 2882 - userns->may_mount_proc = true; 2883 2883 - break; 2825 2825 + struct mount *child; 2826 2826 + if (mnt->mnt.mnt_sb->s_type != type) 2827 2827 + continue; 2828 2828 + 2829 2829 + /* This mount is not fully visible if there are any child mounts 2830 2830 + * that cover anything except for empty directories. 2831 2831 + */ 2832 2832 + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { 2833 2833 + struct inode *inode = child->mnt_mountpoint->d_inode; 2834 2834 + if (!S_ISDIR(inode->i_mode)) 2835 2835 + goto next; 2836 2836 + if (inode->i_nlink != 2) 2837 2837 + goto next; 2884 2838 } 2885 2885 - if (userns->may_mount_sysfs && userns->may_mount_proc) 2886 2886 - break; 2839 2839 + visible = true; 2840 2840 + goto found; 2841 2841 + next: ; 2887 2842 } 2888 2888 - up_read(&namespace_sem); 2843 2843 + found: 2844 2844 + namespace_unlock(); 2845 2845 + return visible; 2889 2846 } 2890 2847 2891 2848 static void *mntns_get(struct task_struct *task) ··· 2929 2860 struct path root; 2930 2861 2931 2862 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || 2932 2932 - !nsown_capable(CAP_SYS_CHROOT) || 2933 2933 - !nsown_capable(CAP_SYS_ADMIN)) 2863 2863 + !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || 2864 2864 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 2934 2865 return -EPERM; 2935 2866 2936 2867 if (fs->users != 1)

+1 -1

fs/open.c

reviewed

··· 443 443 goto dput_and_out; 444 444 445 445 error = -EPERM; 446 446 - if (!nsown_capable(CAP_SYS_CHROOT)) 446 446 + if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) 447 447 goto dput_and_out; 448 448 error = security_path_chroot(&path); 449 449 if (error)

+4 -1

fs/pnode.h

reviewed

··· 19 19 20 20 #define CL_EXPIRE 0x01 21 21 #define CL_SLAVE 0x02 22 22 - #define CL_COPY_ALL 0x04 22 22 + #define CL_COPY_UNBINDABLE 0x04 23 23 #define CL_MAKE_SHARED 0x08 24 24 #define CL_PRIVATE 0x10 25 25 #define CL_SHARED_TO_SLAVE 0x20 26 26 #define CL_UNPRIVILEGED 0x40 27 27 + #define CL_COPY_MNT_NS_FILE 0x80 28 28 + 29 29 + #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) 27 30 28 31 static inline void set_mnt_shared(struct mount *mnt) 29 32 {

+5 -1

fs/proc/root.c

reviewed

··· 110 110 ns = task_active_pid_ns(current); 111 111 options = data; 112 112 113 113 - if (!current_user_ns()->may_mount_proc) 113 113 + if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 114 114 + return ERR_PTR(-EPERM); 115 115 + 116 116 + /* Does the mounter have privilege over the pid namespace? */ 117 117 + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) 114 118 return ERR_PTR(-EPERM); 115 119 } 116 120

+9 -2

fs/sysfs/mount.c

reviewed

··· 112 112 struct super_block *sb; 113 113 int error; 114 114 115 115 - if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) 116 116 - return ERR_PTR(-EPERM); 115 115 + if (!(flags & MS_KERNMOUNT)) { 116 116 + if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 117 117 + return ERR_PTR(-EPERM); 118 118 + 119 119 + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { 120 120 + if (!kobj_ns_current_may_mount(type)) 121 121 + return ERR_PTR(-EPERM); 122 122 + } 123 123 + } 117 124 118 125 info = kzalloc(sizeof(*info), GFP_KERNEL); 119 126 if (!info)

-1

include/linux/capability.h

reviewed

··· 210 210 struct user_namespace *ns, int cap); 211 211 extern bool capable(int cap); 212 212 extern bool ns_capable(struct user_namespace *ns, int cap); 213 213 - extern bool nsown_capable(int cap); 214 213 extern bool inode_capable(const struct inode *inode, int cap); 215 214 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); 216 215

include/linux/fs.h

reviewed

··· 1900 1900 extern int freeze_super(struct super_block *super); 1901 1901 extern int thaw_super(struct super_block *super); 1902 1902 extern bool our_mnt(struct vfsmount *mnt); 1903 1903 + extern bool fs_fully_visible(struct file_system_type *); 1903 1904 1904 1905 extern int current_umask(void); 1905 1906

include/linux/kobject_ns.h

reviewed

··· 39 39 */ 40 40 struct kobj_ns_type_operations { 41 41 enum kobj_ns_type type; 42 42 + bool (*current_may_mount)(void); 42 43 void *(*grab_current_ns)(void); 43 44 const void *(*netlink_ns)(struct sock *sk); 44 45 const void *(*initial_ns)(void); ··· 51 50 const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); 52 51 const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); 53 52 53 53 + bool kobj_ns_current_may_mount(enum kobj_ns_type type); 54 54 void *kobj_ns_grab_current(enum kobj_ns_type type); 55 55 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); 56 56 const void *kobj_ns_initial(enum kobj_ns_type type);

include/linux/mount.h

reviewed

··· 48 48 #define MNT_INTERNAL 0x4000 49 49 50 50 #define MNT_LOCK_READONLY 0x400000 51 51 + #define MNT_LOCKED 0x800000 51 52 52 53 struct vfsmount { 53 54 struct dentry *mnt_root; /* root of the mounted tree */

-4

include/linux/user_namespace.h

reviewed

··· 27 27 kuid_t owner; 28 28 kgid_t group; 29 29 unsigned int proc_inum; 30 30 - bool may_mount_sysfs; 31 31 - bool may_mount_proc; 32 30 }; 33 31 34 32 extern struct user_namespace init_user_ns; ··· 82 84 } 83 85 84 86 #endif 85 85 - 86 86 - void update_mnt_policy(struct user_namespace *userns); 87 87 88 88 #endif /* _LINUX_USER_H */

+1 -1

ipc/namespace.c

reviewed

··· 171 171 { 172 172 struct ipc_namespace *ns = new; 173 173 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 174 174 - !nsown_capable(CAP_SYS_ADMIN)) 174 174 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 175 175 return -EPERM; 176 176 177 177 /* Ditch state from the old ipc namespace */

-12

kernel/capability.c

reviewed

··· 433 433 EXPORT_SYMBOL(capable); 434 434 435 435 /** 436 436 - * nsown_capable - Check superior capability to one's own user_ns 437 437 - * @cap: The capability in question 438 438 - * 439 439 - * Return true if the current task has the given superior capability 440 440 - * targeted at its own user namespace. 441 441 - */ 442 442 - bool nsown_capable(int cap) 443 443 - { 444 444 - return ns_capable(current_user_ns(), cap); 445 445 - } 446 446 - 447 447 - /** 448 436 * inode_capable - Check superior capability over inode 449 437 * @inode: The inode in question 450 438 * @cap: The capability in question

-5

kernel/fork.c

reviewed

··· 1825 1825 if (unshare_flags & CLONE_NEWUSER) 1826 1826 unshare_flags |= CLONE_THREAD | CLONE_FS; 1827 1827 /* 1828 1828 - * If unsharing a pid namespace must also unshare the thread. 1829 1829 - */ 1830 1830 - if (unshare_flags & CLONE_NEWPID) 1831 1831 - unshare_flags |= CLONE_THREAD; 1832 1832 - /* 1833 1828 * If unsharing a thread from a thread group, must also unshare vm. 1834 1829 */ 1835 1830 if (unshare_flags & CLONE_THREAD)

+1 -1

kernel/groups.c

reviewed

··· 233 233 struct group_info *group_info; 234 234 int retval; 235 235 236 236 - if (!nsown_capable(CAP_SETGID)) 236 236 + if (!ns_capable(current_user_ns(), CAP_SETGID)) 237 237 return -EPERM; 238 238 if ((unsigned)gidsetsize > NGROUPS_MAX) 239 239 return -EINVAL;

+12 -24

kernel/nsproxy.c

reviewed

··· 126 126 struct nsproxy *old_ns = tsk->nsproxy; 127 127 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); 128 128 struct nsproxy *new_ns; 129 129 - int err = 0; 130 129 131 131 - if (!old_ns) 130 130 + if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 131 131 + CLONE_NEWPID | CLONE_NEWNET)))) { 132 132 + get_nsproxy(old_ns); 132 133 return 0; 133 133 - 134 134 - get_nsproxy(old_ns); 135 135 - 136 136 - if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 137 137 - CLONE_NEWPID | CLONE_NEWNET))) 138 138 - return 0; 139 139 - 140 140 - if (!ns_capable(user_ns, CAP_SYS_ADMIN)) { 141 141 - err = -EPERM; 142 142 - goto out; 143 134 } 135 135 + 136 136 + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 137 137 + return -EPERM; 144 138 145 139 /* 146 140 * CLONE_NEWIPC must detach from the undolist: after switching ··· 143 149 * means share undolist with parent, so we must forbid using 144 150 * it along with CLONE_NEWIPC. 145 151 */ 146 146 - if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { 147 147 - err = -EINVAL; 148 148 - goto out; 149 149 - } 152 152 + if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) == 153 153 + (CLONE_NEWIPC | CLONE_SYSVSEM)) 154 154 + return -EINVAL; 150 155 151 156 new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); 152 152 - if (IS_ERR(new_ns)) { 153 153 - err = PTR_ERR(new_ns); 154 154 - goto out; 155 155 - } 157 157 + if (IS_ERR(new_ns)) 158 158 + return PTR_ERR(new_ns); 156 159 157 160 tsk->nsproxy = new_ns; 158 158 - 159 159 - out: 160 160 - put_nsproxy(old_ns); 161 161 - return err; 161 161 + return 0; 162 162 } 163 163 164 164 void free_nsproxy(struct nsproxy *ns)

kernel/pid.c

reviewed

··· 265 265 struct pid_namespace *ns = upid->ns; 266 266 hlist_del_rcu(&upid->pid_chain); 267 267 switch(--ns->nr_hashed) { 268 268 + case 2: 268 269 case 1: 269 270 /* When all that is left in the pid namespace 270 271 * is the reaper wake up the reaper. The reaper

+1 -1

kernel/pid_namespace.c

reviewed

··· 329 329 struct pid_namespace *ancestor, *new = ns; 330 330 331 331 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || 332 332 - !nsown_capable(CAP_SYS_ADMIN)) 332 332 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 333 333 return -EPERM; 334 334 335 335 /*

+10 -10

kernel/sys.c

reviewed

··· 337 337 if (rgid != (gid_t) -1) { 338 338 if (gid_eq(old->gid, krgid) || 339 339 gid_eq(old->egid, krgid) || 340 340 - nsown_capable(CAP_SETGID)) 340 340 + ns_capable(old->user_ns, CAP_SETGID)) 341 341 new->gid = krgid; 342 342 else 343 343 goto error; ··· 346 346 if (gid_eq(old->gid, kegid) || 347 347 gid_eq(old->egid, kegid) || 348 348 gid_eq(old->sgid, kegid) || 349 349 - nsown_capable(CAP_SETGID)) 349 349 + ns_capable(old->user_ns, CAP_SETGID)) 350 350 new->egid = kegid; 351 351 else 352 352 goto error; ··· 387 387 old = current_cred(); 388 388 389 389 retval = -EPERM; 390 390 - if (nsown_capable(CAP_SETGID)) 390 390 + if (ns_capable(old->user_ns, CAP_SETGID)) 391 391 new->gid = new->egid = new->sgid = new->fsgid = kgid; 392 392 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 393 393 new->egid = new->fsgid = kgid; ··· 471 471 new->uid = kruid; 472 472 if (!uid_eq(old->uid, kruid) && 473 473 !uid_eq(old->euid, kruid) && 474 474 - !nsown_capable(CAP_SETUID)) 474 474 + !ns_capable(old->user_ns, CAP_SETUID)) 475 475 goto error; 476 476 } 477 477 ··· 480 480 if (!uid_eq(old->uid, keuid) && 481 481 !uid_eq(old->euid, keuid) && 482 482 !uid_eq(old->suid, keuid) && 483 483 - !nsown_capable(CAP_SETUID)) 483 483 + !ns_capable(old->user_ns, CAP_SETUID)) 484 484 goto error; 485 485 } 486 486 ··· 534 534 old = current_cred(); 535 535 536 536 retval = -EPERM; 537 537 - if (nsown_capable(CAP_SETUID)) { 537 537 + if (ns_capable(old->user_ns, CAP_SETUID)) { 538 538 new->suid = new->uid = kuid; 539 539 if (!uid_eq(kuid, old->uid)) { 540 540 retval = set_user(new); ··· 591 591 old = current_cred(); 592 592 593 593 retval = -EPERM; 594 594 - if (!nsown_capable(CAP_SETUID)) { 594 594 + if (!ns_capable(old->user_ns, CAP_SETUID)) { 595 595 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 596 596 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 597 597 goto error; ··· 673 673 old = current_cred(); 674 674 675 675 retval = -EPERM; 676 676 - if (!nsown_capable(CAP_SETGID)) { 676 676 + if (!ns_capable(old->user_ns, CAP_SETGID)) { 677 677 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 678 678 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 679 679 goto error; ··· 744 744 745 745 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 746 746 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 747 747 - nsown_capable(CAP_SETUID)) { 747 747 + ns_capable(old->user_ns, CAP_SETUID)) { 748 748 if (!uid_eq(kuid, old->fsuid)) { 749 749 new->fsuid = kuid; 750 750 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) ··· 783 783 784 784 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 785 785 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 786 786 - nsown_capable(CAP_SETGID)) { 786 786 + ns_capable(old->user_ns, CAP_SETGID)) { 787 787 if (!gid_eq(kgid, old->fsgid)) { 788 788 new->fsgid = kgid; 789 789 goto change_okay;

+1 -1

kernel/uid16.c

reviewed

··· 176 176 struct group_info *group_info; 177 177 int retval; 178 178 179 179 - if (!nsown_capable(CAP_SETGID)) 179 179 + if (!ns_capable(current_user_ns(), CAP_SETGID)) 180 180 return -EPERM; 181 181 if ((unsigned)gidsetsize > NGROUPS_MAX) 182 182 return -EINVAL;

-2

kernel/user.c

reviewed

··· 51 51 .owner = GLOBAL_ROOT_UID, 52 52 .group = GLOBAL_ROOT_GID, 53 53 .proc_inum = PROC_USER_INIT_INO, 54 54 - .may_mount_sysfs = true, 55 55 - .may_mount_proc = true, 56 54 }; 57 55 EXPORT_SYMBOL_GPL(init_user_ns); 58 56

-2

kernel/user_namespace.c

reviewed

··· 101 101 102 102 set_cred_user_ns(new, ns); 103 103 104 104 - update_mnt_policy(ns); 105 105 - 106 104 return 0; 107 105 } 108 106

+1 -1

kernel/utsname.c

reviewed

··· 114 114 struct uts_namespace *ns = new; 115 115 116 116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 117 117 - !nsown_capable(CAP_SYS_ADMIN)) 117 117 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 118 118 return -EPERM; 119 119 120 120 get_uts_ns(ns);

+15

lib/kobject.c

reviewed

··· 931 931 return kobj_child_ns_ops(kobj->parent); 932 932 } 933 933 934 934 + bool kobj_ns_current_may_mount(enum kobj_ns_type type) 935 935 + { 936 936 + bool may_mount = false; 937 937 + 938 938 + if (type == KOBJ_NS_TYPE_NONE) 939 939 + return true; 940 940 + 941 941 + spin_lock(&kobj_ns_type_lock); 942 942 + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && 943 943 + kobj_ns_ops_tbl[type]) 944 944 + may_mount = kobj_ns_ops_tbl[type]->current_may_mount(); 945 945 + spin_unlock(&kobj_ns_type_lock); 946 946 + 947 947 + return may_mount; 948 948 + } 934 949 935 950 void *kobj_ns_grab_current(enum kobj_ns_type type) 936 951 {

net/core/net-sysfs.c

reviewed

··· 1196 1196 #endif 1197 1197 } 1198 1198 1199 1199 + static bool net_current_may_mount(void) 1200 1200 + { 1201 1201 + struct net *net = current->nsproxy->net_ns; 1202 1202 + 1203 1203 + return ns_capable(net->user_ns, CAP_SYS_ADMIN); 1204 1204 + } 1205 1205 + 1199 1206 static void *net_grab_current_ns(void) 1200 1207 { 1201 1208 struct net *ns = current->nsproxy->net_ns; ··· 1225 1218 1226 1219 struct kobj_ns_type_operations net_ns_type_operations = { 1227 1220 .type = KOBJ_NS_TYPE_NET, 1221 1221 + .current_may_mount = net_current_may_mount, 1228 1222 .grab_current_ns = net_grab_current_ns, 1229 1223 .netlink_ns = net_netlink_ns, 1230 1224 .initial_ns = net_initial_ns,

+1 -1

net/core/net_namespace.c

reviewed

··· 651 651 struct net *net = ns; 652 652 653 653 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 654 654 - !nsown_capable(CAP_SYS_ADMIN)) 654 654 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 655 655 return -EPERM; 656 656 657 657 put_net(nsproxy->net_ns);

+2 -2

net/core/scm.c

reviewed

··· 56 56 if ((creds->pid == task_tgid_vnr(current) || 57 57 ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && 58 58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 59 59 - uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && 59 59 + uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) && 60 60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 61 61 - gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { 61 61 + gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) { 62 62 return 0; 63 63 } 64 64 return -EPERM;

+5 -5

security/commoncap.c

reviewed

··· 768 768 */ 769 769 static int cap_safe_nice(struct task_struct *p) 770 770 { 771 771 - int is_subset; 771 771 + int is_subset, ret = 0; 772 772 773 773 rcu_read_lock(); 774 774 is_subset = cap_issubset(__task_cred(p)->cap_permitted, 775 775 current_cred()->cap_permitted); 776 776 + if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) 777 777 + ret = -EPERM; 776 778 rcu_read_unlock(); 777 779 778 778 - if (!is_subset && !capable(CAP_SYS_NICE)) 779 779 - return -EPERM; 780 780 - return 0; 780 780 + return ret; 781 781 } 782 782 783 783 /** ··· 824 824 */ 825 825 static long cap_prctl_drop(struct cred *new, unsigned long cap) 826 826 { 827 827 - if (!capable(CAP_SETPCAP)) 827 827 + if (!ns_capable(current_user_ns(), CAP_SETPCAP)) 828 828 return -EPERM; 829 829 if (!cap_valid(cap)) 830 830 return -EINVAL;