Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

file: reclaim 24 bytes from f_owner

We do embedd struct fown_struct into struct file letting it take up 32
bytes in total. We could tweak struct fown_struct to be more compact but
really it shouldn't even be embedded in struct file in the first place.

Instead, actual users of struct fown_struct should allocate the struct
on demand. This frees up 24 bytes in struct file.

That will have some potentially user-visible changes for the ownership
fcntl()s. Some of them can now fail due to allocation failures.
Practically, that probably will almost never happen as the allocations
are small and they only happen once per file.

The fown_struct is used during kill_fasync() which is used by e.g.,
pipes to generate a SIGIO signal. Sending of such signals is conditional
on userspace having set an owner for the file using one of the F_OWNER
fcntl()s. Such users will be unaffected if struct fown_struct is
allocated during the fcntl() call.

There are a few subsystems that call __f_setown() expecting
file->f_owner to be allocated:

(1) tun devices
file->f_op->fasync::tun_chr_fasync()
-> __f_setown()

There are no callers of tun_chr_fasync().

(2) tty devices

file->f_op->fasync::tty_fasync()
-> __tty_fasync()
-> __f_setown()

tty_fasync() has no additional callers but __tty_fasync() has. Note
that __tty_fasync() only calls __f_setown() if the @on argument is
true. It's called from:

file->f_op->release::tty_release()
-> tty_release()
-> __tty_fasync()
-> __f_setown()

tty_release() calls __tty_fasync() with @on false
=> __f_setown() is never called from tty_release().
=> All callers of tty_release() are safe as well.

file->f_op->release::tty_open()
-> tty_release()
-> __tty_fasync()
-> __f_setown()

__tty_hangup() calls __tty_fasync() with @on false
=> __f_setown() is never called from tty_release().
=> All callers of __tty_hangup() are safe as well.

From the callchains it's obvious that (1) and (2) end up getting called
via file->f_op->fasync(). That can happen either through the F_SETFL
fcntl() with the FASYNC flag raised or via the FIOASYNC ioctl(). If
FASYNC is requested and the file isn't already FASYNC then
file->f_op->fasync() is called with @on true which ends up causing both
(1) and (2) to call __f_setown().

(1) and (2) are the only subsystems that call __f_setown() from the
file->f_op->fasync() handler. So both (1) and (2) have been updated to
allocate a struct fown_struct prior to calling fasync_helper() to
register with the fasync infrastructure. That's safe as they both call
fasync_helper() which also does allocations if @on is true.

The other interesting case are file leases:

(3) file leases
lease_manager_ops->lm_setup::lease_setup()
-> __f_setown()

Which in turn is called from:

generic_add_lease()
-> lease_manager_ops->lm_setup::lease_setup()
-> __f_setown()

So here again we can simply make generic_add_lease() allocate struct
fown_struct prior to the lease_manager_ops->lm_setup::lease_setup()
which happens under a spinlock.

With that the two remaining subsystems that call __f_setown() are:

(4) dnotify
(5) sockets

Both have their own custom ioctls to set struct fown_struct and both
have been converted to allocate a struct fown_struct on demand from
their respective ioctls.

Interactions with O_PATH are fine as well e.g., when opening a /dev/tty
as O_PATH then no file->f_op->open() happens thus no file->f_owner is
allocated. That's fine as no file operation will be set for those and
the device has never been opened. fcntl()s called on such things will
just allocate a ->f_owner on demand. Although I have zero idea why'd you
care about f_owner on an O_PATH fd.

Link: https://lore.kernel.org/r/20240813-work-f_owner-v2-1-4e9343a79f9f@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

+168 -43
+6
drivers/net/tun.c
··· 3451 3451 struct tun_file *tfile = file->private_data; 3452 3452 int ret; 3453 3453 3454 + if (on) { 3455 + ret = file_f_owner_allocate(file); 3456 + if (ret) 3457 + goto out; 3458 + } 3459 + 3454 3460 if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) 3455 3461 goto out; 3456 3462
+6
drivers/tty/tty_io.c
··· 2225 2225 if (tty_paranoia_check(tty, file_inode(filp), "tty_fasync")) 2226 2226 goto out; 2227 2227 2228 + if (on) { 2229 + retval = file_f_owner_allocate(filp); 2230 + if (retval) 2231 + goto out; 2232 + } 2233 + 2228 2234 retval = fasync_helper(fd, filp, on, &tty->fasync); 2229 2235 if (retval <= 0) 2230 2236 goto out;
+132 -34
fs/fcntl.c
··· 33 33 #include <asm/siginfo.h> 34 34 #include <linux/uaccess.h> 35 35 36 + #include "internal.h" 37 + 36 38 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 37 39 38 40 static int setfl(int fd, struct file * filp, unsigned int arg) ··· 89 87 return error; 90 88 } 91 89 90 + /* 91 + * Allocate an file->f_owner struct if it doesn't exist, handling racing 92 + * allocations correctly. 93 + */ 94 + int file_f_owner_allocate(struct file *file) 95 + { 96 + struct fown_struct *f_owner; 97 + 98 + f_owner = file_f_owner(file); 99 + if (f_owner) 100 + return 0; 101 + 102 + f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); 103 + if (!f_owner) 104 + return -ENOMEM; 105 + 106 + rwlock_init(&f_owner->lock); 107 + f_owner->file = file; 108 + /* If someone else raced us, drop our allocation. */ 109 + if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner))) 110 + kfree(f_owner); 111 + return 0; 112 + } 113 + EXPORT_SYMBOL(file_f_owner_allocate); 114 + 115 + void file_f_owner_release(struct file *file) 116 + { 117 + struct fown_struct *f_owner; 118 + 119 + f_owner = file_f_owner(file); 120 + if (f_owner) { 121 + put_pid(f_owner->pid); 122 + kfree(f_owner); 123 + } 124 + } 125 + 92 126 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 93 127 int force) 94 128 { 95 - write_lock_irq(&filp->f_owner.lock); 96 - if (force || !filp->f_owner.pid) { 97 - put_pid(filp->f_owner.pid); 98 - filp->f_owner.pid = get_pid(pid); 99 - filp->f_owner.pid_type = type; 129 + struct fown_struct *f_owner; 130 + 131 + f_owner = file_f_owner(filp); 132 + if (WARN_ON_ONCE(!f_owner)) 133 + return; 134 + 135 + write_lock_irq(&f_owner->lock); 136 + if (force || !f_owner->pid) { 137 + put_pid(f_owner->pid); 138 + f_owner->pid = get_pid(pid); 139 + f_owner->pid_type = type; 100 140 101 141 if (pid) { 102 142 const struct cred *cred = current_cred(); 103 - filp->f_owner.uid = cred->uid; 104 - filp->f_owner.euid = cred->euid; 143 + f_owner->uid = cred->uid; 144 + f_owner->euid = cred->euid; 105 145 } 106 146 } 107 - write_unlock_irq(&filp->f_owner.lock); 147 + write_unlock_irq(&f_owner->lock); 108 148 } 109 149 110 150 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, ··· 163 119 struct pid *pid = NULL; 164 120 int ret = 0; 165 121 122 + might_sleep(); 123 + 166 124 type = PIDTYPE_TGID; 167 125 if (who < 0) { 168 126 /* avoid overflow below */ ··· 174 128 type = PIDTYPE_PGID; 175 129 who = -who; 176 130 } 131 + 132 + ret = file_f_owner_allocate(filp); 133 + if (ret) 134 + return ret; 177 135 178 136 rcu_read_lock(); 179 137 if (who) { ··· 202 152 pid_t f_getown(struct file *filp) 203 153 { 204 154 pid_t pid = 0; 155 + struct fown_struct *f_owner; 205 156 206 - read_lock_irq(&filp->f_owner.lock); 157 + f_owner = file_f_owner(filp); 158 + if (!f_owner) 159 + return pid; 160 + 161 + read_lock_irq(&f_owner->lock); 207 162 rcu_read_lock(); 208 - if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { 209 - pid = pid_vnr(filp->f_owner.pid); 210 - if (filp->f_owner.pid_type == PIDTYPE_PGID) 163 + if (pid_task(f_owner->pid, f_owner->pid_type)) { 164 + pid = pid_vnr(f_owner->pid); 165 + if (f_owner->pid_type == PIDTYPE_PGID) 211 166 pid = -pid; 212 167 } 213 168 rcu_read_unlock(); 214 - read_unlock_irq(&filp->f_owner.lock); 169 + read_unlock_irq(&f_owner->lock); 215 170 return pid; 216 171 } 217 172 ··· 249 194 return -EINVAL; 250 195 } 251 196 197 + ret = file_f_owner_allocate(filp); 198 + if (ret) 199 + return ret; 200 + 252 201 rcu_read_lock(); 253 202 pid = find_vpid(owner.pid); 254 203 if (owner.pid && !pid) ··· 269 210 struct f_owner_ex __user *owner_p = (void __user *)arg; 270 211 struct f_owner_ex owner = {}; 271 212 int ret = 0; 213 + struct fown_struct *f_owner; 214 + enum pid_type pid_type = PIDTYPE_PID; 272 215 273 - read_lock_irq(&filp->f_owner.lock); 274 - rcu_read_lock(); 275 - if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) 276 - owner.pid = pid_vnr(filp->f_owner.pid); 277 - rcu_read_unlock(); 278 - switch (filp->f_owner.pid_type) { 216 + f_owner = file_f_owner(filp); 217 + if (f_owner) { 218 + read_lock_irq(&f_owner->lock); 219 + rcu_read_lock(); 220 + if (pid_task(f_owner->pid, f_owner->pid_type)) 221 + owner.pid = pid_vnr(f_owner->pid); 222 + rcu_read_unlock(); 223 + pid_type = f_owner->pid_type; 224 + } 225 + 226 + switch (pid_type) { 279 227 case PIDTYPE_PID: 280 228 owner.type = F_OWNER_TID; 281 229 break; ··· 300 234 ret = -EINVAL; 301 235 break; 302 236 } 303 - read_unlock_irq(&filp->f_owner.lock); 237 + if (f_owner) 238 + read_unlock_irq(&f_owner->lock); 304 239 305 240 if (!ret) { 306 241 ret = copy_to_user(owner_p, &owner, sizeof(owner)); ··· 315 248 static int f_getowner_uids(struct file *filp, unsigned long arg) 316 249 { 317 250 struct user_namespace *user_ns = current_user_ns(); 251 + struct fown_struct *f_owner; 318 252 uid_t __user *dst = (void __user *)arg; 319 - uid_t src[2]; 253 + uid_t src[2] = {0, 0}; 320 254 int err; 321 255 322 - read_lock_irq(&filp->f_owner.lock); 323 - src[0] = from_kuid(user_ns, filp->f_owner.uid); 324 - src[1] = from_kuid(user_ns, filp->f_owner.euid); 325 - read_unlock_irq(&filp->f_owner.lock); 256 + f_owner = file_f_owner(filp); 257 + if (f_owner) { 258 + read_lock_irq(&f_owner->lock); 259 + src[0] = from_kuid(user_ns, f_owner->uid); 260 + src[1] = from_kuid(user_ns, f_owner->euid); 261 + read_unlock_irq(&f_owner->lock); 262 + } 326 263 327 264 err = put_user(src[0], &dst[0]); 328 265 err |= put_user(src[1], &dst[1]); ··· 414 343 return f.file == filp; 415 344 } 416 345 346 + static int f_owner_sig(struct file *filp, int signum, bool setsig) 347 + { 348 + int ret = 0; 349 + struct fown_struct *f_owner; 350 + 351 + might_sleep(); 352 + 353 + if (setsig) { 354 + if (!valid_signal(signum)) 355 + return -EINVAL; 356 + 357 + ret = file_f_owner_allocate(filp); 358 + if (ret) 359 + return ret; 360 + } 361 + 362 + f_owner = file_f_owner(filp); 363 + if (setsig) 364 + f_owner->signum = signum; 365 + else if (f_owner) 366 + ret = f_owner->signum; 367 + return ret; 368 + } 369 + 417 370 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 418 371 struct file *filp) 419 372 { ··· 516 421 err = f_getowner_uids(filp, arg); 517 422 break; 518 423 case F_GETSIG: 519 - err = filp->f_owner.signum; 424 + err = f_owner_sig(filp, 0, false); 520 425 break; 521 426 case F_SETSIG: 522 - /* arg == 0 restores default behaviour. */ 523 - if (!valid_signal(argi)) { 524 - break; 525 - } 526 - err = 0; 527 - filp->f_owner.signum = argi; 427 + err = f_owner_sig(filp, argi, true); 528 428 break; 529 429 case F_GETLEASE: 530 430 err = fcntl_getlease(filp); ··· 934 844 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); 935 845 } 936 846 937 - int send_sigurg(struct fown_struct *fown) 847 + int send_sigurg(struct file *file) 938 848 { 849 + struct fown_struct *fown; 939 850 struct task_struct *p; 940 851 enum pid_type type; 941 852 struct pid *pid; 942 853 unsigned long flags; 943 854 int ret = 0; 944 855 856 + fown = file_f_owner(file); 857 + if (!fown) 858 + return 0; 859 + 945 860 read_lock_irqsave(&fown->lock, flags); 946 861 947 862 type = fown->pid_type; ··· 1122 1027 } 1123 1028 read_lock_irqsave(&fa->fa_lock, flags); 1124 1029 if (fa->fa_file) { 1125 - fown = &fa->fa_file->f_owner; 1030 + fown = file_f_owner(fa->fa_file); 1031 + if (!fown) 1032 + goto next; 1126 1033 /* Don't send SIGURG to processes which have not set a 1127 1034 queued signum: SIGURG has its own default signalling 1128 1035 mechanism. */ 1129 1036 if (!(sig == SIGURG && fown->signum == 0)) 1130 1037 send_sigio(fown, fa->fa_fd, band); 1131 1038 } 1039 + next: 1132 1040 read_unlock_irqrestore(&fa->fa_lock, flags); 1133 1041 fa = rcu_dereference(fa->fa_next); 1134 1042 }
+1 -2
fs/file_table.c
··· 155 155 return error; 156 156 } 157 157 158 - rwlock_init(&f->f_owner.lock); 159 158 spin_lock_init(&f->f_lock); 160 159 mutex_init(&f->f_pos_lock); 161 160 f->f_flags = flags; ··· 424 425 cdev_put(inode->i_cdev); 425 426 } 426 427 fops_put(file->f_op); 427 - put_pid(file->f_owner.pid); 428 + file_f_owner_release(file); 428 429 put_file_access(file); 429 430 dput(dentry); 430 431 if (unlikely(mode & FMODE_NEED_UNMOUNT))
+1
fs/internal.h
··· 337 337 { 338 338 return path->mnt->mnt_root == path->dentry; 339 339 } 340 + void file_f_owner_release(struct file *file);
+5 -1
fs/locks.c
··· 1451 1451 struct file *filp = fl->c.flc_file; 1452 1452 1453 1453 f_delown(filp); 1454 - filp->f_owner.signum = 0; 1454 + file_f_owner(filp)->signum = 0; 1455 1455 fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync); 1456 1456 if (fl->fl_fasync != NULL) { 1457 1457 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); ··· 1782 1782 1783 1783 lease = *flp; 1784 1784 trace_generic_add_lease(inode, lease); 1785 + 1786 + error = file_f_owner_allocate(filp); 1787 + if (error) 1788 + return error; 1785 1789 1786 1790 /* Note that arg is never F_UNLCK here */ 1787 1791 ctx = locks_get_lock_context(inode, arg);
+5 -1
fs/notify/dnotify/dnotify.c
··· 110 110 prev = &dn->dn_next; 111 111 continue; 112 112 } 113 - fown = &dn->dn_filp->f_owner; 113 + fown = file_f_owner(dn->dn_filp); 114 114 send_sigio(fown, dn->dn_fd, POLL_MSG); 115 115 if (dn->dn_mask & FS_DN_MULTISHOT) 116 116 prev = &dn->dn_next; ··· 315 315 error = -ENOMEM; 316 316 goto out_err; 317 317 } 318 + 319 + error = file_f_owner_allocate(filp); 320 + if (error) 321 + goto out_err; 318 322 319 323 /* set up the new_fsn_mark and new_dn_mark */ 320 324 new_fsn_mark = &new_dn_mark->fsn_mark;
+9 -2
include/linux/fs.h
··· 947 947 } 948 948 949 949 struct fown_struct { 950 + struct file *file; /* backpointer for security modules */ 950 951 rwlock_t lock; /* protects pid, uid, euid fields */ 951 952 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ 952 953 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ ··· 1012 1011 struct mutex f_pos_lock; 1013 1012 loff_t f_pos; 1014 1013 unsigned int f_flags; 1015 - struct fown_struct f_owner; 1014 + struct fown_struct *f_owner; 1016 1015 const struct cred *f_cred; 1017 1016 struct file_ra_state f_ra; 1018 1017 struct path f_path; ··· 1077 1076 #define OFFT_OFFSET_MAX type_max(off_t) 1078 1077 #endif 1079 1078 1079 + int file_f_owner_allocate(struct file *file); 1080 + static inline struct fown_struct *file_f_owner(const struct file *file) 1081 + { 1082 + return READ_ONCE(file->f_owner); 1083 + } 1084 + 1080 1085 extern void send_sigio(struct fown_struct *fown, int fd, int band); 1081 1086 1082 1087 static inline struct inode *file_inode(const struct file *f) ··· 1131 1124 extern int f_setown(struct file *filp, int who, int force); 1132 1125 extern void f_delown(struct file *filp); 1133 1126 extern pid_t f_getown(struct file *filp); 1134 - extern int send_sigurg(struct fown_struct *fown); 1127 + extern int send_sigurg(struct file *file); 1135 1128 1136 1129 /* 1137 1130 * sb->s_flags. Note that these mirror the equivalent MS_* flags where
+1 -1
net/core/sock.c
··· 3429 3429 void sk_send_sigurg(struct sock *sk) 3430 3430 { 3431 3431 if (sk->sk_socket && sk->sk_socket->file) 3432 - if (send_sigurg(&sk->sk_socket->file->f_owner)) 3432 + if (send_sigurg(sk->sk_socket->file)) 3433 3433 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 3434 3434 } 3435 3435 EXPORT_SYMBOL(sk_send_sigurg);
+1 -1
security/selinux/hooks.c
··· 3950 3950 struct file_security_struct *fsec; 3951 3951 3952 3952 /* struct fown_struct is never outside the context of a struct file */ 3953 - file = container_of(fown, struct file, f_owner); 3953 + file = fown->file; 3954 3954 3955 3955 fsec = selinux_file(file); 3956 3956
+1 -1
security/smack/smack_lsm.c
··· 1950 1950 /* 1951 1951 * struct fown_struct is never outside the context of a struct file 1952 1952 */ 1953 - file = container_of(fown, struct file, f_owner); 1953 + file = fown->file; 1954 1954 1955 1955 /* we don't log here as rc can be overriden */ 1956 1956 blob = smack_file(file);