Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs: Better permission checking for submounts

To support unprivileged users mounting filesystems two permission
checks have to be performed: a test to see if the user allowed to
create a mount in the mount namespace, and a test to see if
the user is allowed to access the specified filesystem.

The automount case is special in that mounting the original filesystem
grants permission to mount the sub-filesystems, to any user who
happens to stumble across the their mountpoint and satisfies the
ordinary filesystem permission checks.

Attempting to handle the automount case by using override_creds
almost works. It preserves the idea that permission to mount
the original filesystem is permission to mount the sub-filesystem.
Unfortunately using override_creds messes up the filesystems
ordinary permission checks.

Solve this by being explicit that a mount is a submount by introducing
vfs_submount, and using it where appropriate.

vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let
sget and friends know that a mount is a submount so they can take appropriate
action.

sget and sget_userns are modified to not perform any permission checks
on submounts.

follow_automount is modified to stop using override_creds as that
has proven problemantic.

do_mount is modified to always remove the new MS_SUBMOUNT flag so
that we know userspace will never by able to specify it.

autofs4 is modified to stop using current_real_cred that was put in
there to handle the previous version of submount permission checking.

cifs is modified to pass the mountpoint all of the way down to vfs_submount.

debugfs is modified to pass the mountpoint all of the way down to
trace_automount by adding a new parameter. To make this change easier
a new typedef debugfs_automount_t is introduced to capture the type of
the debugfs automount function.

Cc: stable@vger.kernel.org
Fixes: 069d5ac9ae0d ("autofs: Fix automounts by using current_real_cred()->uid")
Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds")
Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

+47 -22
+1 -1
fs/afs/mntpt.c
··· 202 202 203 203 /* try and do the mount */ 204 204 _debug("--- attempting mount %s -o %s ---", devname, options); 205 - mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); 205 + mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); 206 206 _debug("--- mount result %p ---", mnt); 207 207 208 208 free_page((unsigned long) devname);
+2 -2
fs/autofs4/waitq.c
··· 436 436 memcpy(&wq->name, &qstr, sizeof(struct qstr)); 437 437 wq->dev = autofs4_get_dev(sbi); 438 438 wq->ino = autofs4_get_ino(sbi); 439 - wq->uid = current_real_cred()->uid; 440 - wq->gid = current_real_cred()->gid; 439 + wq->uid = current_cred()->uid; 440 + wq->gid = current_cred()->gid; 441 441 wq->pid = pid; 442 442 wq->tgid = tgid; 443 443 wq->status = -EINTR; /* Status return if interrupted */
+4 -3
fs/cifs/cifs_dfs_ref.c
··· 245 245 * @fullpath: full path in UNC format 246 246 * @ref: server's referral 247 247 */ 248 - static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, 248 + static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, 249 + struct cifs_sb_info *cifs_sb, 249 250 const char *fullpath, const struct dfs_info3_param *ref) 250 251 { 251 252 struct vfsmount *mnt; ··· 260 259 if (IS_ERR(mountdata)) 261 260 return (struct vfsmount *)mountdata; 262 261 263 - mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata); 262 + mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); 264 263 kfree(mountdata); 265 264 kfree(devname); 266 265 return mnt; ··· 335 334 mnt = ERR_PTR(-EINVAL); 336 335 break; 337 336 } 338 - mnt = cifs_dfs_do_refmount(cifs_sb, 337 + mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, 339 338 full_path, referrals + i); 340 339 cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", 341 340 __func__, referrals[i].node_name, mnt);
+4 -4
fs/debugfs/inode.c
··· 187 187 188 188 static struct vfsmount *debugfs_automount(struct path *path) 189 189 { 190 - struct vfsmount *(*f)(void *); 191 - f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata; 192 - return f(d_inode(path->dentry)->i_private); 190 + debugfs_automount_t f; 191 + f = (debugfs_automount_t)path->dentry->d_fsdata; 192 + return f(path->dentry, d_inode(path->dentry)->i_private); 193 193 } 194 194 195 195 static const struct dentry_operations debugfs_dops = { ··· 504 504 */ 505 505 struct dentry *debugfs_create_automount(const char *name, 506 506 struct dentry *parent, 507 - struct vfsmount *(*f)(void *), 507 + debugfs_automount_t f, 508 508 void *data) 509 509 { 510 510 struct dentry *dentry = start_creating(name, parent);
-3
fs/namei.c
··· 1100 1100 bool *need_mntput) 1101 1101 { 1102 1102 struct vfsmount *mnt; 1103 - const struct cred *old_cred; 1104 1103 int err; 1105 1104 1106 1105 if (!path->dentry->d_op || !path->dentry->d_op->d_automount) ··· 1128 1129 if (nd->total_link_count >= 40) 1129 1130 return -ELOOP; 1130 1131 1131 - old_cred = override_creds(&init_cred); 1132 1132 mnt = path->dentry->d_op->d_automount(path); 1133 - revert_creds(old_cred); 1134 1133 if (IS_ERR(mnt)) { 1135 1134 /* 1136 1135 * The filesystem is allowed to return -EISDIR here to indicate
+16 -1
fs/namespace.c
··· 989 989 } 990 990 EXPORT_SYMBOL_GPL(vfs_kern_mount); 991 991 992 + struct vfsmount * 993 + vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, 994 + const char *name, void *data) 995 + { 996 + /* Until it is worked out how to pass the user namespace 997 + * through from the parent mount to the submount don't support 998 + * unprivileged mounts with submounts. 999 + */ 1000 + if (mountpoint->d_sb->s_user_ns != &init_user_ns) 1001 + return ERR_PTR(-EPERM); 1002 + 1003 + return vfs_kern_mount(type, MS_SUBMOUNT, name, data); 1004 + } 1005 + EXPORT_SYMBOL_GPL(vfs_submount); 1006 + 992 1007 static struct mount *clone_mnt(struct mount *old, struct dentry *root, 993 1008 int flag) 994 1009 { ··· 2809 2794 2810 2795 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | 2811 2796 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 2812 - MS_STRICTATIME | MS_NOREMOTELOCK); 2797 + MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT); 2813 2798 2814 2799 if (flags & MS_REMOUNT) 2815 2800 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
+1 -1
fs/nfs/namespace.c
··· 226 226 const char *devname, 227 227 struct nfs_clone_mount *mountdata) 228 228 { 229 - return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); 229 + return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); 230 230 } 231 231 232 232 /**
+1 -1
fs/nfs/nfs4namespace.c
··· 279 279 mountdata->hostname, 280 280 mountdata->mnt_path); 281 281 282 - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); 282 + mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); 283 283 if (!IS_ERR(mnt)) 284 284 break; 285 285 }
+10 -3
fs/super.c
··· 469 469 struct super_block *old; 470 470 int err; 471 471 472 - if (!(flags & MS_KERNMOUNT) && 472 + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && 473 473 !(type->fs_flags & FS_USERNS_MOUNT) && 474 474 !capable(CAP_SYS_ADMIN)) 475 475 return ERR_PTR(-EPERM); ··· 499 499 } 500 500 if (!s) { 501 501 spin_unlock(&sb_lock); 502 - s = alloc_super(type, flags, user_ns); 502 + s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns); 503 503 if (!s) 504 504 return ERR_PTR(-ENOMEM); 505 505 goto retry; ··· 540 540 { 541 541 struct user_namespace *user_ns = current_user_ns(); 542 542 543 + /* We don't yet pass the user namespace of the parent 544 + * mount through to here so always use &init_user_ns 545 + * until that changes. 546 + */ 547 + if (flags & MS_SUBMOUNT) 548 + user_ns = &init_user_ns; 549 + 543 550 /* Ensure the requestor has permissions over the target filesystem */ 544 - if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN)) 551 + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN)) 545 552 return ERR_PTR(-EPERM); 546 553 547 554 return sget_userns(type, test, set, flags, user_ns, data);
+2 -1
include/linux/debugfs.h
··· 97 97 struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, 98 98 const char *dest); 99 99 100 + typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); 100 101 struct dentry *debugfs_create_automount(const char *name, 101 102 struct dentry *parent, 102 - struct vfsmount *(*f)(void *), 103 + debugfs_automount_t f, 103 104 void *data); 104 105 105 106 void debugfs_remove(struct dentry *dentry);
+3
include/linux/mount.h
··· 90 90 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, 91 91 int flags, const char *name, 92 92 void *data); 93 + extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, 94 + struct file_system_type *type, 95 + const char *name, void *data); 93 96 94 97 extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); 95 98 extern void mark_mounts_for_expiry(struct list_head *mounts);
+1
include/uapi/linux/fs.h
··· 132 132 #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ 133 133 134 134 /* These sb flags are internal to the kernel */ 135 + #define MS_SUBMOUNT (1<<26) 135 136 #define MS_NOREMOTELOCK (1<<27) 136 137 #define MS_NOSEC (1<<28) 137 138 #define MS_BORN (1<<29)
+2 -2
kernel/trace/trace.c
··· 7503 7503 ftrace_init_tracefs(tr, d_tracer); 7504 7504 } 7505 7505 7506 - static struct vfsmount *trace_automount(void *ingore) 7506 + static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 7507 7507 { 7508 7508 struct vfsmount *mnt; 7509 7509 struct file_system_type *type; ··· 7516 7516 type = get_fs_type("tracefs"); 7517 7517 if (!type) 7518 7518 return NULL; 7519 - mnt = vfs_kern_mount(type, 0, "tracefs", NULL); 7519 + mnt = vfs_submount(mntpt, type, "tracefs", NULL); 7520 7520 put_filesystem(type); 7521 7521 if (IS_ERR(mnt)) 7522 7522 return NULL;