Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfs: Commit to never having exectuables on proc and sysfs.

Today proc and sysfs do not contain any executable files. Several
applications today mount proc or sysfs without noexec and nosuid and
then depend on there being no exectuables files on proc or sysfs.
Having any executable files show on proc or sysfs would cause
a user space visible regression, and most likely security problems.

Therefore commit to never allowing executables on proc and sysfs by
adding a new flag to mark them as filesystems without executables and
enforce that flag.

Test the flag where MNT_NOEXEC is tested today, so that the only user
visible effect will be that exectuables will be treated as if the
execute bit is cleared.

The filesystems proc and sysfs do not currently incoporate any
executable files so this does not result in any user visible effects.

This makes it unnecessary to vet changes to proc and sysfs tightly for
adding exectuable files or changes to chattr that would modify
existing files, as no matter what the individual file say they will
not be treated as exectuable files by the vfs.

Not having to vet changes to closely is important as without this we
are only one proc_create call (or another goof up in the
implementation of notify_change) from having problematic executables
on proc. Those mistakes are all too easy to make and would create
a situation where there are security issues or the assumptions of
some program having to be broken (and cause userspace regressions).

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

+23 -9
+8 -2
fs/exec.c
··· 98 98 module_put(fmt->module); 99 99 } 100 100 101 + bool path_noexec(const struct path *path) 102 + { 103 + return (path->mnt->mnt_flags & MNT_NOEXEC) || 104 + (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); 105 + } 106 + 101 107 #ifdef CONFIG_USELIB 102 108 /* 103 109 * Note that a shared library must be both readable and executable due to ··· 138 132 goto exit; 139 133 140 134 error = -EACCES; 141 - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) 135 + if (path_noexec(&file->f_path)) 142 136 goto exit; 143 137 144 138 fsnotify_open(file); ··· 783 777 if (!S_ISREG(file_inode(file)->i_mode)) 784 778 goto exit; 785 779 786 - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) 780 + if (path_noexec(&file->f_path)) 787 781 goto exit; 788 782 789 783 err = deny_write_access(file);
+1 -1
fs/open.c
··· 377 377 * with the "noexec" flag. 378 378 */ 379 379 res = -EACCES; 380 - if (path.mnt->mnt_flags & MNT_NOEXEC) 380 + if (path_noexec(&path)) 381 381 goto out_path_release; 382 382 } 383 383
+2
fs/proc/root.c
··· 134 134 } 135 135 136 136 sb->s_flags |= MS_ACTIVE; 137 + /* User space would break if executables appear on proc */ 138 + sb->s_iflags |= SB_I_NOEXEC; 137 139 } 138 140 139 141 return dget(sb->s_root);
+4
fs/sysfs/mount.c
··· 40 40 SYSFS_MAGIC, &new_sb, ns); 41 41 if (IS_ERR(root) || !new_sb) 42 42 kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); 43 + else if (new_sb) 44 + /* Userspace would break if executables appear on sysfs */ 45 + root->d_sb->s_iflags |= SB_I_NOEXEC; 46 + 43 47 return root; 44 48 } 45 49
+3
include/linux/fs.h
··· 1244 1244 1245 1245 /* sb->s_iflags */ 1246 1246 #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ 1247 + #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ 1247 1248 1248 1249 /* Possible states of 'frozen' field */ 1249 1250 enum { ··· 3030 3029 mutex_lock(&inode->i_mutex); 3031 3030 return !IS_DEADDIR(inode); 3032 3031 } 3032 + 3033 + extern bool path_noexec(const struct path *path); 3033 3034 3034 3035 #endif /* _LINUX_FS_H */
+1 -2
kernel/sys.c
··· 1668 1668 * overall picture. 1669 1669 */ 1670 1670 err = -EACCES; 1671 - if (!S_ISREG(inode->i_mode) || 1672 - exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1671 + if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) 1673 1672 goto exit; 1674 1673 1675 1674 err = inode_permission(inode, MAY_EXEC);
+2 -2
mm/mmap.c
··· 1268 1268 * mounted, in which case we dont add PROT_EXEC.) 1269 1269 */ 1270 1270 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 1271 - if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC))) 1271 + if (!(file && path_noexec(&file->f_path))) 1272 1272 prot |= PROT_EXEC; 1273 1273 1274 1274 if (!(flags & MAP_FIXED)) ··· 1337 1337 case MAP_PRIVATE: 1338 1338 if (!(file->f_mode & FMODE_READ)) 1339 1339 return -EACCES; 1340 - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 1340 + if (path_noexec(&file->f_path)) { 1341 1341 if (vm_flags & VM_EXEC) 1342 1342 return -EPERM; 1343 1343 vm_flags &= ~VM_MAYEXEC;
+1 -1
mm/nommu.c
··· 1035 1035 1036 1036 /* handle executable mappings and implied executable 1037 1037 * mappings */ 1038 - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 1038 + if (path_noexec(&file->f_path)) { 1039 1039 if (prot & PROT_EXEC) 1040 1040 return -EPERM; 1041 1041 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
+1 -1
security/security.c
··· 776 776 * ditto if it's not on noexec mount, except that on !MMU we need 777 777 * NOMMU_MAP_EXEC (== VM_MAYEXEC) in this case 778 778 */ 779 - if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { 779 + if (!path_noexec(&file->f_path)) { 780 780 #ifndef CONFIG_MMU 781 781 if (file->f_op->mmap_capabilities) { 782 782 unsigned caps = file->f_op->mmap_capabilities(file);