Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs/coredump: prevent fsuid=0 dumps into user-controlled directories

This commit fixes the following security hole affecting systems where
all of the following conditions are fulfilled:

- The fs.suid_dumpable sysctl is set to 2.
- The kernel.core_pattern sysctl's value starts with "/". (Systems
where kernel.core_pattern starts with "|/" are not affected.)
- Unprivileged user namespace creation is permitted. (This is
true on Linux >=3.8, but some distributions disallow it by
default using a distro patch.)

Under these conditions, if a program executes under secure exec rules,
causing it to run with the SUID_DUMP_ROOT flag, then unshares its user
namespace, changes its root directory and crashes, the coredump will be
written using fsuid=0 and a path derived from kernel.core_pattern - but
this path is interpreted relative to the root directory of the process,
allowing the attacker to control where a coredump will be written with
root privileges.

To fix the security issue, always interpret core_pattern for dumps that
are written under SUID_DUMP_ROOT relative to the root directory of init.

Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Jann Horn and committed by
Linus Torvalds
378c6520 1333ab03

+32 -12
+1 -1
arch/um/drivers/mconsole_kern.c
··· 133 133 ptr += strlen("proc"); 134 134 ptr = skip_spaces(ptr); 135 135 136 - file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); 136 + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0); 137 137 if (IS_ERR(file)) { 138 138 mconsole_reply(req, "Failed to open file", 1, 0); 139 139 printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));
+26 -4
fs/coredump.c
··· 32 32 #include <linux/pipe_fs_i.h> 33 33 #include <linux/oom.h> 34 34 #include <linux/compat.h> 35 + #include <linux/sched.h> 36 + #include <linux/fs.h> 37 + #include <linux/path.h> 35 38 #include <linux/timekeeping.h> 36 39 37 40 #include <asm/uaccess.h> ··· 652 649 } 653 650 } else { 654 651 struct inode *inode; 652 + int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | 653 + O_LARGEFILE | O_EXCL; 655 654 656 655 if (cprm.limit < binfmt->min_coredump) 657 656 goto fail_unlock; ··· 692 687 * what matters is that at least one of the two processes 693 688 * writes its coredump successfully, not which one. 694 689 */ 695 - cprm.file = filp_open(cn.corename, 696 - O_CREAT | 2 | O_NOFOLLOW | 697 - O_LARGEFILE | O_EXCL, 698 - 0600); 690 + if (need_suid_safe) { 691 + /* 692 + * Using user namespaces, normal user tasks can change 693 + * their current->fs->root to point to arbitrary 694 + * directories. Since the intention of the "only dump 695 + * with a fully qualified path" rule is to control where 696 + * coredumps may be placed using root privileges, 697 + * current->fs->root must not be used. Instead, use the 698 + * root directory of init_task. 699 + */ 700 + struct path root; 701 + 702 + task_lock(&init_task); 703 + get_fs_root(init_task.fs, &root); 704 + task_unlock(&init_task); 705 + cprm.file = file_open_root(root.dentry, root.mnt, 706 + cn.corename, open_flags, 0600); 707 + path_put(&root); 708 + } else { 709 + cprm.file = filp_open(cn.corename, open_flags, 0600); 710 + } 699 711 if (IS_ERR(cprm.file)) 700 712 goto fail_unlock; 701 713
+1 -1
fs/fhandle.c
··· 228 228 path_put(&path); 229 229 return fd; 230 230 } 231 - file = file_open_root(path.dentry, path.mnt, "", open_flag); 231 + file = file_open_root(path.dentry, path.mnt, "", open_flag, 0); 232 232 if (IS_ERR(file)) { 233 233 put_unused_fd(fd); 234 234 retval = PTR_ERR(file);
+2 -4
fs/open.c
··· 992 992 EXPORT_SYMBOL(filp_open); 993 993 994 994 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, 995 - const char *filename, int flags) 995 + const char *filename, int flags, umode_t mode) 996 996 { 997 997 struct open_flags op; 998 - int err = build_open_flags(flags, 0, &op); 998 + int err = build_open_flags(flags, mode, &op); 999 999 if (err) 1000 1000 return ERR_PTR(err); 1001 - if (flags & O_CREAT) 1002 - return ERR_PTR(-EINVAL); 1003 1001 return do_file_open_root(dentry, mnt, filename, &op); 1004 1002 } 1005 1003 EXPORT_SYMBOL(file_open_root);
+1 -1
include/linux/fs.h
··· 2263 2263 extern struct file *file_open_name(struct filename *, int, umode_t); 2264 2264 extern struct file *filp_open(const char *, int, umode_t); 2265 2265 extern struct file *file_open_root(struct dentry *, struct vfsmount *, 2266 - const char *, int); 2266 + const char *, int, umode_t); 2267 2267 extern struct file * dentry_open(const struct path *, int, const struct cred *); 2268 2268 extern int filp_close(struct file *, fl_owner_t id); 2269 2269
+1 -1
kernel/sysctl_binary.c
··· 1321 1321 } 1322 1322 1323 1323 mnt = task_active_pid_ns(current)->proc_mnt; 1324 - file = file_open_root(mnt->mnt_root, mnt, pathname, flags); 1324 + file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0); 1325 1325 result = PTR_ERR(file); 1326 1326 if (IS_ERR(file)) 1327 1327 goto out_putname;