Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace changes from Eric Biederman:
"This is an assorted mishmash of small cleanups, enhancements and bug
fixes.

The major theme is user namespace mount restrictions. nsown_capable
is killed as it encourages not thinking about details that need to be
considered. A very hard to hit pid namespace exiting bug was finally
tracked and fixed. A couple of cleanups to the basic namespace
infrastructure.

Finally there is an enhancement that makes per user namespace
capabilities usable as capabilities, and an enhancement that allows
the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
userns: Kill nsown_capable it makes the wrong thing easy
capabilities: allow nice if we are privileged
pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
userns: Allow PR_CAPBSET_DROP in a user namespace.
namespaces: Simplify copy_namespaces so it is clear what is going on.
pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
sysfs: Restrict mounting sysfs
userns: Better restrictions on when proc and sysfs can be mounted
vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
kernel/nsproxy.c: Improving a snippet of code.
proc: Restrict mounting the proc filesystem
vfs: Lock in place mounts from more privileged users

+177 -104
+95 -26
fs/namespace.c
··· 831 831 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 832 832 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 833 833 834 + /* Don't allow unprivileged users to reveal what is under a mount */ 835 + if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 836 + mnt->mnt.mnt_flags |= MNT_LOCKED; 837 + 834 838 atomic_inc(&sb->s_active); 835 839 mnt->mnt.mnt_sb = sb; 836 840 mnt->mnt.mnt_root = dget(root); ··· 1331 1327 goto dput_and_out; 1332 1328 if (!check_mnt(mnt)) 1333 1329 goto dput_and_out; 1330 + if (mnt->mnt.mnt_flags & MNT_LOCKED) 1331 + goto dput_and_out; 1334 1332 1335 1333 retval = do_umount(mnt, flags); 1336 1334 dput_and_out: ··· 1355 1349 1356 1350 #endif 1357 1351 1358 - static bool mnt_ns_loop(struct path *path) 1352 + static bool is_mnt_ns_file(struct dentry *dentry) 1359 1353 { 1360 - /* Could bind mounting the mount namespace inode cause a 1361 - * mount namespace loop? 1362 - */ 1363 - struct inode *inode = path->dentry->d_inode; 1354 + /* Is this a proxy for a mount namespace? */ 1355 + struct inode *inode = dentry->d_inode; 1364 1356 struct proc_ns *ei; 1365 - struct mnt_namespace *mnt_ns; 1366 1357 1367 1358 if (!proc_ns_inode(inode)) 1368 1359 return false; ··· 1368 1365 if (ei->ns_ops != &mntns_operations) 1369 1366 return false; 1370 1367 1371 - mnt_ns = ei->ns; 1368 + return true; 1369 + } 1370 + 1371 + static bool mnt_ns_loop(struct dentry *dentry) 1372 + { 1373 + /* Could bind mounting the mount namespace inode cause a 1374 + * mount namespace loop? 1375 + */ 1376 + struct mnt_namespace *mnt_ns; 1377 + if (!is_mnt_ns_file(dentry)) 1378 + return false; 1379 + 1380 + mnt_ns = get_proc_ns(dentry->d_inode)->ns; 1372 1381 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 1373 1382 } 1374 1383 ··· 1389 1374 { 1390 1375 struct mount *res, *p, *q, *r, *parent; 1391 1376 1392 - if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1377 + if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt)) 1378 + return ERR_PTR(-EINVAL); 1379 + 1380 + if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry)) 1393 1381 return ERR_PTR(-EINVAL); 1394 1382 1395 1383 res = q = clone_mnt(mnt, dentry, flag); 1396 1384 if (IS_ERR(q)) 1397 1385 return q; 1398 1386 1387 + q->mnt.mnt_flags &= ~MNT_LOCKED; 1399 1388 q->mnt_mountpoint = mnt->mnt_mountpoint; 1400 1389 1401 1390 p = mnt; ··· 1409 1390 continue; 1410 1391 1411 1392 for (s = r; s; s = next_mnt(s, r)) { 1412 - if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) { 1393 + if (!(flag & CL_COPY_UNBINDABLE) && 1394 + IS_MNT_UNBINDABLE(s)) { 1395 + s = skip_mnt_tree(s); 1396 + continue; 1397 + } 1398 + if (!(flag & CL_COPY_MNT_NS_FILE) && 1399 + is_mnt_ns_file(s->mnt.mnt_root)) { 1413 1400 s = skip_mnt_tree(s); 1414 1401 continue; 1415 1402 } ··· 1721 1696 return err; 1722 1697 } 1723 1698 1699 + static bool has_locked_children(struct mount *mnt, struct dentry *dentry) 1700 + { 1701 + struct mount *child; 1702 + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { 1703 + if (!is_subdir(child->mnt_mountpoint, dentry)) 1704 + continue; 1705 + 1706 + if (child->mnt.mnt_flags & MNT_LOCKED) 1707 + return true; 1708 + } 1709 + return false; 1710 + } 1711 + 1724 1712 /* 1725 1713 * do loopback mount. 1726 1714 */ ··· 1751 1713 return err; 1752 1714 1753 1715 err = -EINVAL; 1754 - if (mnt_ns_loop(&old_path)) 1716 + if (mnt_ns_loop(old_path.dentry)) 1755 1717 goto out; 1756 1718 1757 1719 mp = lock_mount(path); ··· 1769 1731 if (!check_mnt(parent) || !check_mnt(old)) 1770 1732 goto out2; 1771 1733 1734 + if (!recurse && has_locked_children(old, old_path.dentry)) 1735 + goto out2; 1736 + 1772 1737 if (recurse) 1773 - mnt = copy_tree(old, old_path.dentry, 0); 1738 + mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); 1774 1739 else 1775 1740 mnt = clone_mnt(old, old_path.dentry, 0); 1776 1741 ··· 1781 1740 err = PTR_ERR(mnt); 1782 1741 goto out2; 1783 1742 } 1743 + 1744 + mnt->mnt.mnt_flags &= ~MNT_LOCKED; 1784 1745 1785 1746 err = graft_tree(mnt, parent, mp); 1786 1747 if (err) { ··· 1894 1851 1895 1852 err = -EINVAL; 1896 1853 if (!check_mnt(p) || !check_mnt(old)) 1854 + goto out1; 1855 + 1856 + if (old->mnt.mnt_flags & MNT_LOCKED) 1897 1857 goto out1; 1898 1858 1899 1859 err = -EINVAL; ··· 2435 2389 2436 2390 namespace_lock(); 2437 2391 /* First pass: copy the tree topology */ 2438 - copy_flags = CL_COPY_ALL | CL_EXPIRE; 2392 + copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 2439 2393 if (user_ns != mnt_ns->user_ns) 2440 2394 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2441 2395 new = copy_tree(old, old->mnt.mnt_root, copy_flags); ··· 2470 2424 } 2471 2425 p = next_mnt(p, old); 2472 2426 q = next_mnt(q, new); 2427 + if (!q) 2428 + break; 2429 + while (p->mnt.mnt_root != q->mnt.mnt_root) 2430 + p = next_mnt(p, old); 2473 2431 } 2474 2432 namespace_unlock(); 2475 2433 ··· 2680 2630 goto out4; 2681 2631 if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 2682 2632 goto out4; 2633 + if (new_mnt->mnt.mnt_flags & MNT_LOCKED) 2634 + goto out4; 2683 2635 error = -ENOENT; 2684 2636 if (d_unlinked(new.dentry)) 2685 2637 goto out4; ··· 2705 2653 br_write_lock(&vfsmount_lock); 2706 2654 detach_mnt(new_mnt, &parent_path); 2707 2655 detach_mnt(root_mnt, &root_parent); 2656 + if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { 2657 + new_mnt->mnt.mnt_flags |= MNT_LOCKED; 2658 + root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; 2659 + } 2708 2660 /* mount old root on put_old */ 2709 2661 attach_mnt(root_mnt, old_mnt, old_mp); 2710 2662 /* mount new_root on / */ ··· 2867 2811 return chrooted; 2868 2812 } 2869 2813 2870 - void update_mnt_policy(struct user_namespace *userns) 2814 + bool fs_fully_visible(struct file_system_type *type) 2871 2815 { 2872 2816 struct mnt_namespace *ns = current->nsproxy->mnt_ns; 2873 2817 struct mount *mnt; 2818 + bool visible = false; 2874 2819 2875 - down_read(&namespace_sem); 2820 + if (unlikely(!ns)) 2821 + return false; 2822 + 2823 + namespace_lock(); 2876 2824 list_for_each_entry(mnt, &ns->list, mnt_list) { 2877 - switch (mnt->mnt.mnt_sb->s_magic) { 2878 - case SYSFS_MAGIC: 2879 - userns->may_mount_sysfs = true; 2880 - break; 2881 - case PROC_SUPER_MAGIC: 2882 - userns->may_mount_proc = true; 2883 - break; 2825 + struct mount *child; 2826 + if (mnt->mnt.mnt_sb->s_type != type) 2827 + continue; 2828 + 2829 + /* This mount is not fully visible if there are any child mounts 2830 + * that cover anything except for empty directories. 2831 + */ 2832 + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { 2833 + struct inode *inode = child->mnt_mountpoint->d_inode; 2834 + if (!S_ISDIR(inode->i_mode)) 2835 + goto next; 2836 + if (inode->i_nlink != 2) 2837 + goto next; 2884 2838 } 2885 - if (userns->may_mount_sysfs && userns->may_mount_proc) 2886 - break; 2839 + visible = true; 2840 + goto found; 2841 + next: ; 2887 2842 } 2888 - up_read(&namespace_sem); 2843 + found: 2844 + namespace_unlock(); 2845 + return visible; 2889 2846 } 2890 2847 2891 2848 static void *mntns_get(struct task_struct *task) ··· 2929 2860 struct path root; 2930 2861 2931 2862 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || 2932 - !nsown_capable(CAP_SYS_CHROOT) || 2933 - !nsown_capable(CAP_SYS_ADMIN)) 2863 + !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || 2864 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 2934 2865 return -EPERM; 2935 2866 2936 2867 if (fs->users != 1)
+1 -1
fs/open.c
··· 443 443 goto dput_and_out; 444 444 445 445 error = -EPERM; 446 - if (!nsown_capable(CAP_SYS_CHROOT)) 446 + if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) 447 447 goto dput_and_out; 448 448 error = security_path_chroot(&path); 449 449 if (error)
+4 -1
fs/pnode.h
··· 19 19 20 20 #define CL_EXPIRE 0x01 21 21 #define CL_SLAVE 0x02 22 - #define CL_COPY_ALL 0x04 22 + #define CL_COPY_UNBINDABLE 0x04 23 23 #define CL_MAKE_SHARED 0x08 24 24 #define CL_PRIVATE 0x10 25 25 #define CL_SHARED_TO_SLAVE 0x20 26 26 #define CL_UNPRIVILEGED 0x40 27 + #define CL_COPY_MNT_NS_FILE 0x80 28 + 29 + #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) 27 30 28 31 static inline void set_mnt_shared(struct mount *mnt) 29 32 {
+5 -1
fs/proc/root.c
··· 110 110 ns = task_active_pid_ns(current); 111 111 options = data; 112 112 113 - if (!current_user_ns()->may_mount_proc) 113 + if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 114 + return ERR_PTR(-EPERM); 115 + 116 + /* Does the mounter have privilege over the pid namespace? */ 117 + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) 114 118 return ERR_PTR(-EPERM); 115 119 } 116 120
+9 -2
fs/sysfs/mount.c
··· 112 112 struct super_block *sb; 113 113 int error; 114 114 115 - if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) 116 - return ERR_PTR(-EPERM); 115 + if (!(flags & MS_KERNMOUNT)) { 116 + if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) 117 + return ERR_PTR(-EPERM); 118 + 119 + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { 120 + if (!kobj_ns_current_may_mount(type)) 121 + return ERR_PTR(-EPERM); 122 + } 123 + } 117 124 118 125 info = kzalloc(sizeof(*info), GFP_KERNEL); 119 126 if (!info)
-1
include/linux/capability.h
··· 210 210 struct user_namespace *ns, int cap); 211 211 extern bool capable(int cap); 212 212 extern bool ns_capable(struct user_namespace *ns, int cap); 213 - extern bool nsown_capable(int cap); 214 213 extern bool inode_capable(const struct inode *inode, int cap); 215 214 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); 216 215
+1
include/linux/fs.h
··· 1900 1900 extern int freeze_super(struct super_block *super); 1901 1901 extern int thaw_super(struct super_block *super); 1902 1902 extern bool our_mnt(struct vfsmount *mnt); 1903 + extern bool fs_fully_visible(struct file_system_type *); 1903 1904 1904 1905 extern int current_umask(void); 1905 1906
+2
include/linux/kobject_ns.h
··· 39 39 */ 40 40 struct kobj_ns_type_operations { 41 41 enum kobj_ns_type type; 42 + bool (*current_may_mount)(void); 42 43 void *(*grab_current_ns)(void); 43 44 const void *(*netlink_ns)(struct sock *sk); 44 45 const void *(*initial_ns)(void); ··· 51 50 const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); 52 51 const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); 53 52 53 + bool kobj_ns_current_may_mount(enum kobj_ns_type type); 54 54 void *kobj_ns_grab_current(enum kobj_ns_type type); 55 55 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); 56 56 const void *kobj_ns_initial(enum kobj_ns_type type);
+1
include/linux/mount.h
··· 48 48 #define MNT_INTERNAL 0x4000 49 49 50 50 #define MNT_LOCK_READONLY 0x400000 51 + #define MNT_LOCKED 0x800000 51 52 52 53 struct vfsmount { 53 54 struct dentry *mnt_root; /* root of the mounted tree */
-4
include/linux/user_namespace.h
··· 27 27 kuid_t owner; 28 28 kgid_t group; 29 29 unsigned int proc_inum; 30 - bool may_mount_sysfs; 31 - bool may_mount_proc; 32 30 }; 33 31 34 32 extern struct user_namespace init_user_ns; ··· 82 84 } 83 85 84 86 #endif 85 - 86 - void update_mnt_policy(struct user_namespace *userns); 87 87 88 88 #endif /* _LINUX_USER_H */
+1 -1
ipc/namespace.c
··· 171 171 { 172 172 struct ipc_namespace *ns = new; 173 173 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 174 - !nsown_capable(CAP_SYS_ADMIN)) 174 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 175 175 return -EPERM; 176 176 177 177 /* Ditch state from the old ipc namespace */
-12
kernel/capability.c
··· 433 433 EXPORT_SYMBOL(capable); 434 434 435 435 /** 436 - * nsown_capable - Check superior capability to one's own user_ns 437 - * @cap: The capability in question 438 - * 439 - * Return true if the current task has the given superior capability 440 - * targeted at its own user namespace. 441 - */ 442 - bool nsown_capable(int cap) 443 - { 444 - return ns_capable(current_user_ns(), cap); 445 - } 446 - 447 - /** 448 436 * inode_capable - Check superior capability over inode 449 437 * @inode: The inode in question 450 438 * @cap: The capability in question
-5
kernel/fork.c
··· 1825 1825 if (unshare_flags & CLONE_NEWUSER) 1826 1826 unshare_flags |= CLONE_THREAD | CLONE_FS; 1827 1827 /* 1828 - * If unsharing a pid namespace must also unshare the thread. 1829 - */ 1830 - if (unshare_flags & CLONE_NEWPID) 1831 - unshare_flags |= CLONE_THREAD; 1832 - /* 1833 1828 * If unsharing a thread from a thread group, must also unshare vm. 1834 1829 */ 1835 1830 if (unshare_flags & CLONE_THREAD)
+1 -1
kernel/groups.c
··· 233 233 struct group_info *group_info; 234 234 int retval; 235 235 236 - if (!nsown_capable(CAP_SETGID)) 236 + if (!ns_capable(current_user_ns(), CAP_SETGID)) 237 237 return -EPERM; 238 238 if ((unsigned)gidsetsize > NGROUPS_MAX) 239 239 return -EINVAL;
+12 -24
kernel/nsproxy.c
··· 126 126 struct nsproxy *old_ns = tsk->nsproxy; 127 127 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); 128 128 struct nsproxy *new_ns; 129 - int err = 0; 130 129 131 - if (!old_ns) 130 + if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 131 + CLONE_NEWPID | CLONE_NEWNET)))) { 132 + get_nsproxy(old_ns); 132 133 return 0; 133 - 134 - get_nsproxy(old_ns); 135 - 136 - if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 137 - CLONE_NEWPID | CLONE_NEWNET))) 138 - return 0; 139 - 140 - if (!ns_capable(user_ns, CAP_SYS_ADMIN)) { 141 - err = -EPERM; 142 - goto out; 143 134 } 135 + 136 + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 137 + return -EPERM; 144 138 145 139 /* 146 140 * CLONE_NEWIPC must detach from the undolist: after switching ··· 143 149 * means share undolist with parent, so we must forbid using 144 150 * it along with CLONE_NEWIPC. 145 151 */ 146 - if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { 147 - err = -EINVAL; 148 - goto out; 149 - } 152 + if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) == 153 + (CLONE_NEWIPC | CLONE_SYSVSEM)) 154 + return -EINVAL; 150 155 151 156 new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); 152 - if (IS_ERR(new_ns)) { 153 - err = PTR_ERR(new_ns); 154 - goto out; 155 - } 157 + if (IS_ERR(new_ns)) 158 + return PTR_ERR(new_ns); 156 159 157 160 tsk->nsproxy = new_ns; 158 - 159 - out: 160 - put_nsproxy(old_ns); 161 - return err; 161 + return 0; 162 162 } 163 163 164 164 void free_nsproxy(struct nsproxy *ns)
+1
kernel/pid.c
··· 265 265 struct pid_namespace *ns = upid->ns; 266 266 hlist_del_rcu(&upid->pid_chain); 267 267 switch(--ns->nr_hashed) { 268 + case 2: 268 269 case 1: 269 270 /* When all that is left in the pid namespace 270 271 * is the reaper wake up the reaper. The reaper
+1 -1
kernel/pid_namespace.c
··· 329 329 struct pid_namespace *ancestor, *new = ns; 330 330 331 331 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || 332 - !nsown_capable(CAP_SYS_ADMIN)) 332 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 333 333 return -EPERM; 334 334 335 335 /*
+10 -10
kernel/sys.c
··· 337 337 if (rgid != (gid_t) -1) { 338 338 if (gid_eq(old->gid, krgid) || 339 339 gid_eq(old->egid, krgid) || 340 - nsown_capable(CAP_SETGID)) 340 + ns_capable(old->user_ns, CAP_SETGID)) 341 341 new->gid = krgid; 342 342 else 343 343 goto error; ··· 346 346 if (gid_eq(old->gid, kegid) || 347 347 gid_eq(old->egid, kegid) || 348 348 gid_eq(old->sgid, kegid) || 349 - nsown_capable(CAP_SETGID)) 349 + ns_capable(old->user_ns, CAP_SETGID)) 350 350 new->egid = kegid; 351 351 else 352 352 goto error; ··· 387 387 old = current_cred(); 388 388 389 389 retval = -EPERM; 390 - if (nsown_capable(CAP_SETGID)) 390 + if (ns_capable(old->user_ns, CAP_SETGID)) 391 391 new->gid = new->egid = new->sgid = new->fsgid = kgid; 392 392 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 393 393 new->egid = new->fsgid = kgid; ··· 471 471 new->uid = kruid; 472 472 if (!uid_eq(old->uid, kruid) && 473 473 !uid_eq(old->euid, kruid) && 474 - !nsown_capable(CAP_SETUID)) 474 + !ns_capable(old->user_ns, CAP_SETUID)) 475 475 goto error; 476 476 } 477 477 ··· 480 480 if (!uid_eq(old->uid, keuid) && 481 481 !uid_eq(old->euid, keuid) && 482 482 !uid_eq(old->suid, keuid) && 483 - !nsown_capable(CAP_SETUID)) 483 + !ns_capable(old->user_ns, CAP_SETUID)) 484 484 goto error; 485 485 } 486 486 ··· 534 534 old = current_cred(); 535 535 536 536 retval = -EPERM; 537 - if (nsown_capable(CAP_SETUID)) { 537 + if (ns_capable(old->user_ns, CAP_SETUID)) { 538 538 new->suid = new->uid = kuid; 539 539 if (!uid_eq(kuid, old->uid)) { 540 540 retval = set_user(new); ··· 591 591 old = current_cred(); 592 592 593 593 retval = -EPERM; 594 - if (!nsown_capable(CAP_SETUID)) { 594 + if (!ns_capable(old->user_ns, CAP_SETUID)) { 595 595 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 596 596 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 597 597 goto error; ··· 673 673 old = current_cred(); 674 674 675 675 retval = -EPERM; 676 - if (!nsown_capable(CAP_SETGID)) { 676 + if (!ns_capable(old->user_ns, CAP_SETGID)) { 677 677 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 678 678 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 679 679 goto error; ··· 744 744 745 745 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 746 746 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 747 - nsown_capable(CAP_SETUID)) { 747 + ns_capable(old->user_ns, CAP_SETUID)) { 748 748 if (!uid_eq(kuid, old->fsuid)) { 749 749 new->fsuid = kuid; 750 750 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) ··· 783 783 784 784 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 785 785 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 786 - nsown_capable(CAP_SETGID)) { 786 + ns_capable(old->user_ns, CAP_SETGID)) { 787 787 if (!gid_eq(kgid, old->fsgid)) { 788 788 new->fsgid = kgid; 789 789 goto change_okay;
+1 -1
kernel/uid16.c
··· 176 176 struct group_info *group_info; 177 177 int retval; 178 178 179 - if (!nsown_capable(CAP_SETGID)) 179 + if (!ns_capable(current_user_ns(), CAP_SETGID)) 180 180 return -EPERM; 181 181 if ((unsigned)gidsetsize > NGROUPS_MAX) 182 182 return -EINVAL;
-2
kernel/user.c
··· 51 51 .owner = GLOBAL_ROOT_UID, 52 52 .group = GLOBAL_ROOT_GID, 53 53 .proc_inum = PROC_USER_INIT_INO, 54 - .may_mount_sysfs = true, 55 - .may_mount_proc = true, 56 54 }; 57 55 EXPORT_SYMBOL_GPL(init_user_ns); 58 56
-2
kernel/user_namespace.c
··· 101 101 102 102 set_cred_user_ns(new, ns); 103 103 104 - update_mnt_policy(ns); 105 - 106 104 return 0; 107 105 } 108 106
+1 -1
kernel/utsname.c
··· 114 114 struct uts_namespace *ns = new; 115 115 116 116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 117 - !nsown_capable(CAP_SYS_ADMIN)) 117 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 118 118 return -EPERM; 119 119 120 120 get_uts_ns(ns);
+15
lib/kobject.c
··· 931 931 return kobj_child_ns_ops(kobj->parent); 932 932 } 933 933 934 + bool kobj_ns_current_may_mount(enum kobj_ns_type type) 935 + { 936 + bool may_mount = false; 937 + 938 + if (type == KOBJ_NS_TYPE_NONE) 939 + return true; 940 + 941 + spin_lock(&kobj_ns_type_lock); 942 + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && 943 + kobj_ns_ops_tbl[type]) 944 + may_mount = kobj_ns_ops_tbl[type]->current_may_mount(); 945 + spin_unlock(&kobj_ns_type_lock); 946 + 947 + return may_mount; 948 + } 934 949 935 950 void *kobj_ns_grab_current(enum kobj_ns_type type) 936 951 {
+8
net/core/net-sysfs.c
··· 1196 1196 #endif 1197 1197 } 1198 1198 1199 + static bool net_current_may_mount(void) 1200 + { 1201 + struct net *net = current->nsproxy->net_ns; 1202 + 1203 + return ns_capable(net->user_ns, CAP_SYS_ADMIN); 1204 + } 1205 + 1199 1206 static void *net_grab_current_ns(void) 1200 1207 { 1201 1208 struct net *ns = current->nsproxy->net_ns; ··· 1225 1218 1226 1219 struct kobj_ns_type_operations net_ns_type_operations = { 1227 1220 .type = KOBJ_NS_TYPE_NET, 1221 + .current_may_mount = net_current_may_mount, 1228 1222 .grab_current_ns = net_grab_current_ns, 1229 1223 .netlink_ns = net_netlink_ns, 1230 1224 .initial_ns = net_initial_ns,
+1 -1
net/core/net_namespace.c
··· 651 651 struct net *net = ns; 652 652 653 653 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 654 - !nsown_capable(CAP_SYS_ADMIN)) 654 + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 655 655 return -EPERM; 656 656 657 657 put_net(nsproxy->net_ns);
+2 -2
net/core/scm.c
··· 56 56 if ((creds->pid == task_tgid_vnr(current) || 57 57 ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && 58 58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 59 - uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && 59 + uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) && 60 60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 61 - gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { 61 + gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) { 62 62 return 0; 63 63 } 64 64 return -EPERM;
+5 -5
security/commoncap.c
··· 768 768 */ 769 769 static int cap_safe_nice(struct task_struct *p) 770 770 { 771 - int is_subset; 771 + int is_subset, ret = 0; 772 772 773 773 rcu_read_lock(); 774 774 is_subset = cap_issubset(__task_cred(p)->cap_permitted, 775 775 current_cred()->cap_permitted); 776 + if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) 777 + ret = -EPERM; 776 778 rcu_read_unlock(); 777 779 778 - if (!is_subset && !capable(CAP_SYS_NICE)) 779 - return -EPERM; 780 - return 0; 780 + return ret; 781 781 } 782 782 783 783 /** ··· 824 824 */ 825 825 static long cap_prctl_drop(struct cred *new, unsigned long cap) 826 826 { 827 - if (!capable(CAP_SETPCAP)) 827 + if (!ns_capable(current_user_ns(), CAP_SETPCAP)) 828 828 return -EPERM; 829 829 if (!cap_valid(cap)) 830 830 return -EINVAL;