Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fanotify: support watching filesystems and mounts inside userns

An unprivileged user is allowed to create an fanotify group and add
inode marks, but not filesystem, mntns and mount marks.

Add limited support for setting up filesystem, mntns and mount marks by
an unprivileged user under the following conditions:

1. User has CAP_SYS_ADMIN in the user ns where the group was created
2.a. User has CAP_SYS_ADMIN in the user ns where the sb was created
OR (in case setting up a mntns mark)
2.b. User has CAP_SYS_ADMIN in the user ns associated with the mntns

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250516192803.838659-3-amir73il@gmail.com

authored by

Amir Goldstein and committed by
Jan Kara
58f5fbeb 90d12380

+31 -15
+1
fs/notify/fanotify/fanotify.c
··· 1009 1009 1010 1010 static void fanotify_free_group_priv(struct fsnotify_group *group) 1011 1011 { 1012 + put_user_ns(group->user_ns); 1012 1013 kfree(group->fanotify_data.merge_hash); 1013 1014 if (group->fanotify_data.ucounts) 1014 1015 dec_ucount(group->fanotify_data.ucounts,
+27 -12
fs/notify/fanotify/fanotify_user.c
··· 1499 1499 /* fanotify syscalls */ 1500 1500 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) 1501 1501 { 1502 + struct user_namespace *user_ns = current_user_ns(); 1502 1503 struct fsnotify_group *group; 1503 1504 int f_flags, fd; 1504 1505 unsigned int fid_mode = flags & FANOTIFY_FID_BITS; ··· 1514 1513 /* 1515 1514 * An unprivileged user can setup an fanotify group with 1516 1515 * limited functionality - an unprivileged group is limited to 1517 - * notification events with file handles and it cannot use 1518 - * unlimited queue/marks. 1516 + * notification events with file handles or mount ids and it 1517 + * cannot use unlimited queue/marks. 1519 1518 */ 1520 - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) 1519 + if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || 1520 + !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) 1521 1521 return -EPERM; 1522 1522 1523 1523 /* ··· 1597 1595 } 1598 1596 1599 1597 /* Enforce groups limits per user in all containing user ns */ 1600 - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), 1601 - current_euid(), 1598 + group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), 1602 1599 UCOUNT_FANOTIFY_GROUPS); 1603 1600 if (!group->fanotify_data.ucounts) { 1604 1601 fd = -EMFILE; ··· 1606 1605 1607 1606 group->fanotify_data.flags = flags | internal_flags; 1608 1607 group->memcg = get_mem_cgroup_from_mm(current->mm); 1608 + group->user_ns = get_user_ns(user_ns); 1609 1609 1610 1610 group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); 1611 1611 if (!group->fanotify_data.merge_hash) { ··· 1806 1804 struct fsnotify_group *group; 1807 1805 struct path path; 1808 1806 struct fan_fsid __fsid, *fsid = NULL; 1807 + struct user_namespace *user_ns = NULL; 1808 + struct mnt_namespace *mntns; 1809 1809 u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; 1810 1810 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 1811 1811 unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; ··· 1901 1897 } 1902 1898 1903 1899 /* 1904 - * An unprivileged user is not allowed to setup mount nor filesystem 1905 - * marks. This also includes setting up such marks by a group that 1906 - * was initialized by an unprivileged user. 1900 + * A user is allowed to setup sb/mount/mntns marks only if it is 1901 + * capable in the user ns where the group was created. 1907 1902 */ 1908 - if ((!capable(CAP_SYS_ADMIN) || 1909 - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && 1903 + if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) && 1910 1904 mark_type != FAN_MARK_INODE) 1911 1905 return -EPERM; 1912 1906 ··· 1988 1986 fsid = &__fsid; 1989 1987 } 1990 1988 1991 - /* inode held in place by reference to path; group by fget on fd */ 1989 + /* 1990 + * In addition to being capable in the user ns where group was created, 1991 + * the user also needs to be capable in the user ns associated with 1992 + * the filesystem or in the user ns associated with the mntns 1993 + * (when marking mntns). 1994 + */ 1992 1995 if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { 1993 1996 inode = path.dentry->d_inode; 1994 1997 obj = inode; 1995 1998 } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 1999 + user_ns = path.mnt->mnt_sb->s_user_ns; 1996 2000 obj = path.mnt; 1997 2001 } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { 2002 + user_ns = path.mnt->mnt_sb->s_user_ns; 1998 2003 obj = path.mnt->mnt_sb; 1999 2004 } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { 2000 - obj = mnt_ns_from_dentry(path.dentry); 2005 + mntns = mnt_ns_from_dentry(path.dentry); 2006 + user_ns = mntns->user_ns; 2007 + obj = mntns; 2001 2008 } 2009 + 2010 + ret = -EPERM; 2011 + if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) 2012 + goto path_put_and_out; 2002 2013 2003 2014 ret = -EINVAL; 2004 2015 if (!obj)
+2 -3
include/linux/fanotify.h
··· 38 38 FAN_REPORT_PIDFD | \ 39 39 FAN_REPORT_FD_ERROR | \ 40 40 FAN_UNLIMITED_QUEUE | \ 41 - FAN_UNLIMITED_MARKS | \ 42 - FAN_REPORT_MNT) 41 + FAN_UNLIMITED_MARKS) 43 42 44 43 /* 45 44 * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. ··· 47 48 * so one of the flags for reporting file handles is required. 48 49 */ 49 50 #define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ 50 - FANOTIFY_FID_BITS | \ 51 + FANOTIFY_FID_BITS | FAN_REPORT_MNT | \ 51 52 FAN_CLOEXEC | FAN_NONBLOCK) 52 53 53 54 #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \
+1
include/linux/fsnotify_backend.h
··· 250 250 * full */ 251 251 252 252 struct mem_cgroup *memcg; /* memcg to charge allocations */ 253 + struct user_namespace *user_ns; /* user ns where group was created */ 253 254 254 255 /* groups can define private fields here or use the void *private */ 255 256 union {