Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pidfs: support xattrs on pidfds

Now that we have a way to persist information for pidfs dentries we can
start supporting extended attributes on pidfds. This will allow
userspace to attach meta information to tasks.

One natural extension would be to introduce a custom pidfs.* extended
attribute space and allow for the inheritance of extended attributes
across fork() and exec().

The first simple scheme will allow privileged userspace to set trusted
extended attributes on pidfs inodes.

Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-12-98f3456fd552@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>

+103 -4
+103 -4
fs/pidfs.c
··· 21 21 #include <linux/utsname.h> 22 22 #include <net/net_namespace.h> 23 23 #include <linux/coredump.h> 24 + #include <linux/xattr.h> 24 25 25 26 #include "internal.h" 26 27 #include "mount.h" ··· 29 28 #define PIDFS_PID_DEAD ERR_PTR(-ESRCH) 30 29 31 30 static struct kmem_cache *pidfs_attr_cachep __ro_after_init; 31 + static struct kmem_cache *pidfs_xattr_cachep __ro_after_init; 32 32 33 33 /* 34 34 * Stashes information that userspace needs to access even after the ··· 42 40 }; 43 41 44 42 struct pidfs_attr { 43 + struct simple_xattrs *xattrs; 45 44 struct pidfs_exit_info __pei; 46 45 struct pidfs_exit_info *exit_info; 47 46 }; ··· 141 138 142 139 void pidfs_free_pid(struct pid *pid) 143 140 { 141 + struct pidfs_attr *attr __free(kfree) = no_free_ptr(pid->attr); 142 + struct simple_xattrs *xattrs __free(kfree) = NULL; 143 + 144 144 /* 145 145 * Any dentry must've been wiped from the pid by now. 146 146 * Otherwise there's a reference count bug. 147 147 */ 148 148 VFS_WARN_ON_ONCE(pid->stashed); 149 149 150 - if (!IS_ERR(pid->attr)) 151 - kfree(pid->attr); 150 + if (IS_ERR(attr)) 151 + return; 152 + 153 + /* 154 + * Any dentry must've been wiped from the pid by now. Otherwise 155 + * there's a reference count bug. 156 + */ 157 + VFS_WARN_ON_ONCE(pid->stashed); 158 + 159 + xattrs = attr->xattrs; 160 + if (xattrs) 161 + simple_xattrs_free(attr->xattrs, NULL); 152 162 } 153 163 154 164 #ifdef CONFIG_PROC_FS ··· 679 663 return anon_inode_getattr(idmap, path, stat, request_mask, query_flags); 680 664 } 681 665 666 + static ssize_t pidfs_listxattr(struct dentry *dentry, char *buf, size_t size) 667 + { 668 + struct inode *inode = d_inode(dentry); 669 + struct pid *pid = inode->i_private; 670 + struct pidfs_attr *attr = pid->attr; 671 + struct simple_xattrs *xattrs; 672 + 673 + xattrs = READ_ONCE(attr->xattrs); 674 + if (!xattrs) 675 + return 0; 676 + 677 + return simple_xattr_list(inode, xattrs, buf, size); 678 + } 679 + 682 680 static const struct inode_operations pidfs_inode_operations = { 683 - .getattr = pidfs_getattr, 684 - .setattr = pidfs_setattr, 681 + .getattr = pidfs_getattr, 682 + .setattr = pidfs_setattr, 683 + .listxattr = pidfs_listxattr, 685 684 }; 686 685 687 686 static void pidfs_evict_inode(struct inode *inode) ··· 936 905 .put_data = pidfs_put_data, 937 906 }; 938 907 908 + static int pidfs_xattr_get(const struct xattr_handler *handler, 909 + struct dentry *unused, struct inode *inode, 910 + const char *suffix, void *value, size_t size) 911 + { 912 + struct pid *pid = inode->i_private; 913 + struct pidfs_attr *attr = pid->attr; 914 + const char *name; 915 + struct simple_xattrs *xattrs; 916 + 917 + xattrs = READ_ONCE(attr->xattrs); 918 + if (!xattrs) 919 + return 0; 920 + 921 + name = xattr_full_name(handler, suffix); 922 + return simple_xattr_get(xattrs, name, value, size); 923 + } 924 + 925 + static int pidfs_xattr_set(const struct xattr_handler *handler, 926 + struct mnt_idmap *idmap, struct dentry *unused, 927 + struct inode *inode, const char *suffix, 928 + const void *value, size_t size, int flags) 929 + { 930 + struct pid *pid = inode->i_private; 931 + struct pidfs_attr *attr = pid->attr; 932 + const char *name; 933 + struct simple_xattrs *xattrs; 934 + struct simple_xattr *old_xattr; 935 + 936 + /* Ensure we're the only one to set @attr->xattrs. */ 937 + WARN_ON_ONCE(!inode_is_locked(inode)); 938 + 939 + xattrs = READ_ONCE(attr->xattrs); 940 + if (!xattrs) { 941 + xattrs = kmem_cache_zalloc(pidfs_xattr_cachep, GFP_KERNEL); 942 + if (!xattrs) 943 + return -ENOMEM; 944 + 945 + simple_xattrs_init(xattrs); 946 + smp_store_release(&pid->attr->xattrs, xattrs); 947 + } 948 + 949 + name = xattr_full_name(handler, suffix); 950 + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); 951 + if (IS_ERR(old_xattr)) 952 + return PTR_ERR(old_xattr); 953 + 954 + simple_xattr_free(old_xattr); 955 + return 0; 956 + } 957 + 958 + static const struct xattr_handler pidfs_trusted_xattr_handler = { 959 + .prefix = XATTR_TRUSTED_PREFIX, 960 + .get = pidfs_xattr_get, 961 + .set = pidfs_xattr_set, 962 + }; 963 + 964 + static const struct xattr_handler *const pidfs_xattr_handlers[] = { 965 + &pidfs_trusted_xattr_handler, 966 + NULL 967 + }; 968 + 939 969 static int pidfs_init_fs_context(struct fs_context *fc) 940 970 { 941 971 struct pseudo_fs_context *ctx; ··· 1010 918 ctx->ops = &pidfs_sops; 1011 919 ctx->eops = &pidfs_export_operations; 1012 920 ctx->dops = &pidfs_dentry_operations; 921 + ctx->xattr = pidfs_xattr_handlers; 1013 922 fc->s_fs_info = (void *)&pidfs_stashed_ops; 1014 923 return 0; 1015 924 } ··· 1053 960 pidfs_attr_cachep = kmem_cache_create("pidfs_attr_cache", sizeof(struct pidfs_attr), 0, 1054 961 (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | 1055 962 SLAB_ACCOUNT | SLAB_PANIC), NULL); 963 + 964 + pidfs_xattr_cachep = kmem_cache_create("pidfs_xattr_cache", 965 + sizeof(struct simple_xattrs), 0, 966 + (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | 967 + SLAB_ACCOUNT | SLAB_PANIC), NULL); 968 + 1056 969 pidfs_mnt = kern_mount(&pidfs_type); 1057 970 if (IS_ERR(pidfs_mnt)) 1058 971 panic("Failed to mount pidfs pseudo filesystem");