Merge branch 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs updates from Al Viro:
"Assorted patches from Miklos.

An interesting part here is /proc/mounts stuff..."

The "/proc/mounts stuff" is using a cursor for keeeping the location
data while traversing the mount listing.

Also probably worth noting is the addition of faccessat2(), which takes
an additional set of flags to specify how the lookup is done
(AT_EACCESS, AT_SYMLINK_NOFOLLOW, AT_EMPTY_PATH).

* 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
vfs: add faccessat2 syscall
vfs: don't parse "silent" option
vfs: don't parse "posixacl" option
vfs: don't parse forbidden flags
statx: add mount_root
statx: add mount ID
statx: don't clear STATX_ATIME on SB_RDONLY
uapi: deprecate STATX_ALL
utimensat: AT_EMPTY_PATH support
vfs: split out access_override_creds()
proc/mounts: add cursor
aio: fix async fsync creds
vfs: allow unprivileged whiteout creation

Linus Torvalds 5 years ago f3592877 8b39a57e

+234 -96

39 changed files

expand all collapse all

arch

alpha

kernel

syscalls

syscall.tbl

arm

tools

syscall.tbl

arm64

include

asm

unistd.h

unistd32.h

ia64

kernel

syscalls

syscall.tbl

m68k

kernel

syscalls

syscall.tbl

microblaze

kernel

syscalls

syscall.tbl

mips

kernel

syscalls

syscall_n32.tbl

syscall_n64.tbl

syscall_o32.tbl

parisc

kernel

syscalls

syscall.tbl

powerpc

kernel

syscalls

syscall.tbl

s390

kernel

syscalls

syscall.tbl

kernel

syscalls

syscall.tbl

sparc

kernel

syscalls

syscall.tbl

x86

entry

syscalls

syscall_32.tbl

syscall_64.tbl

xtensa

kernel

syscalls

syscall.tbl

aio.c

char_dev.c

fs_context.c

internal.h

mount.h

namei.c

namespace.c

open.c

proc_namespace.c

stat.c

utimes.c

include

linux

device_cgroup.h

fs.h

mount.h

stat.h

syscalls.h

uapi

asm-generic

unistd.h

linux

fcntl.h

stat.h

samples

vfs

test-statx.c

tools

include

uapi

linux

stat.h

arch/alpha/kernel/syscalls/syscall.tbl

reviewed

··· 477 477 # 545 reserved for clone3 478 478 547 common openat2 sys_openat2 479 479 548 common pidfd_getfd sys_pidfd_getfd 480 480 + 549 common faccessat2 sys_faccessat2

arch/arm/tools/syscall.tbl

reviewed

··· 451 451 435 common clone3 sys_clone3 452 452 437 common openat2 sys_openat2 453 453 438 common pidfd_getfd sys_pidfd_getfd 454 454 + 439 common faccessat2 sys_faccessat2

+1 -1

arch/arm64/include/asm/unistd.h

reviewed

··· 38 38 #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) 39 39 #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) 40 40 41 41 - #define __NR_compat_syscalls 439 41 41 + #define __NR_compat_syscalls 440 42 42 #endif 43 43 44 44 #define __ARCH_WANT_SYS_CLONE

arch/arm64/include/asm/unistd32.h

reviewed

··· 883 883 __SYSCALL(__NR_openat2, sys_openat2) 884 884 #define __NR_pidfd_getfd 438 885 885 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) 886 886 + #define __NR_faccessat2 439 887 887 + __SYSCALL(__NR_faccessat2, sys_faccessat2) 886 888 887 889 /* 888 890 * Please add new compat syscalls above this comment and update

arch/ia64/kernel/syscalls/syscall.tbl

reviewed

··· 358 358 # 435 reserved for clone3 359 359 437 common openat2 sys_openat2 360 360 438 common pidfd_getfd sys_pidfd_getfd 361 361 + 439 common faccessat2 sys_faccessat2

arch/m68k/kernel/syscalls/syscall.tbl

reviewed

··· 437 437 435 common clone3 __sys_clone3 438 438 437 common openat2 sys_openat2 439 439 438 common pidfd_getfd sys_pidfd_getfd 440 440 + 439 common faccessat2 sys_faccessat2

arch/microblaze/kernel/syscalls/syscall.tbl

reviewed

··· 443 443 435 common clone3 sys_clone3 444 444 437 common openat2 sys_openat2 445 445 438 common pidfd_getfd sys_pidfd_getfd 446 446 + 439 common faccessat2 sys_faccessat2

arch/mips/kernel/syscalls/syscall_n32.tbl

reviewed

··· 376 376 435 n32 clone3 __sys_clone3 377 377 437 n32 openat2 sys_openat2 378 378 438 n32 pidfd_getfd sys_pidfd_getfd 379 379 + 439 n32 faccessat2 sys_faccessat2

arch/mips/kernel/syscalls/syscall_n64.tbl

reviewed

··· 352 352 435 n64 clone3 __sys_clone3 353 353 437 n64 openat2 sys_openat2 354 354 438 n64 pidfd_getfd sys_pidfd_getfd 355 355 + 439 n64 faccessat2 sys_faccessat2

arch/mips/kernel/syscalls/syscall_o32.tbl

reviewed

··· 425 425 435 o32 clone3 __sys_clone3 426 426 437 o32 openat2 sys_openat2 427 427 438 o32 pidfd_getfd sys_pidfd_getfd 428 428 + 439 o32 faccessat2 sys_faccessat2

arch/parisc/kernel/syscalls/syscall.tbl

reviewed

··· 435 435 435 common clone3 sys_clone3_wrapper 436 436 437 common openat2 sys_openat2 437 437 438 common pidfd_getfd sys_pidfd_getfd 438 438 + 439 common faccessat2 sys_faccessat2

arch/powerpc/kernel/syscalls/syscall.tbl

reviewed

··· 527 527 435 spu clone3 sys_ni_syscall 528 528 437 common openat2 sys_openat2 529 529 438 common pidfd_getfd sys_pidfd_getfd 530 530 + 439 common faccessat2 sys_faccessat2

arch/s390/kernel/syscalls/syscall.tbl

reviewed

··· 440 440 435 common clone3 sys_clone3 sys_clone3 441 441 437 common openat2 sys_openat2 sys_openat2 442 442 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 443 443 + 439 common faccessat2 sys_faccessat2 sys_faccessat2

arch/sh/kernel/syscalls/syscall.tbl

reviewed

··· 440 440 # 435 reserved for clone3 441 441 437 common openat2 sys_openat2 442 442 438 common pidfd_getfd sys_pidfd_getfd 443 443 + 439 common faccessat2 sys_faccessat2

arch/sparc/kernel/syscalls/syscall.tbl

reviewed

··· 483 483 # 435 reserved for clone3 484 484 437 common openat2 sys_openat2 485 485 438 common pidfd_getfd sys_pidfd_getfd 486 486 + 439 common faccessat2 sys_faccessat2

arch/x86/entry/syscalls/syscall_32.tbl

reviewed

··· 442 442 435 i386 clone3 sys_clone3 443 443 437 i386 openat2 sys_openat2 444 444 438 i386 pidfd_getfd sys_pidfd_getfd 445 445 + 439 i386 faccessat2 sys_faccessat2

arch/x86/entry/syscalls/syscall_64.tbl

reviewed

··· 359 359 435 common clone3 sys_clone3 360 360 437 common openat2 sys_openat2 361 361 438 common pidfd_getfd sys_pidfd_getfd 362 362 + 439 common faccessat2 sys_faccessat2 362 363 363 364 # 364 365 # x32-specific system call numbers start at 512 to avoid cache impact

arch/xtensa/kernel/syscalls/syscall.tbl

reviewed

··· 408 408 435 common clone3 sys_clone3 409 409 437 common openat2 sys_openat2 410 410 438 common pidfd_getfd sys_pidfd_getfd 411 411 + 439 common faccessat2 sys_faccessat2

fs/aio.c

reviewed

··· 176 176 struct file *file; 177 177 struct work_struct work; 178 178 bool datasync; 179 179 + struct cred *creds; 179 180 }; 180 181 181 182 struct poll_iocb { ··· 1590 1589 static void aio_fsync_work(struct work_struct *work) 1591 1590 { 1592 1591 struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); 1592 1592 + const struct cred *old_cred = override_creds(iocb->fsync.creds); 1593 1593 1594 1594 iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); 1595 1595 + revert_creds(old_cred); 1596 1596 + put_cred(iocb->fsync.creds); 1595 1597 iocb_put(iocb); 1596 1598 } 1597 1599 ··· 1607 1603 1608 1604 if (unlikely(!req->file->f_op->fsync)) 1609 1605 return -EINVAL; 1606 1606 + 1607 1607 + req->creds = prepare_creds(); 1608 1608 + if (!req->creds) 1609 1609 + return -ENOMEM; 1610 1610 1611 1611 req->datasync = datasync; 1612 1612 INIT_WORK(&req->work, aio_fsync_work);

fs/char_dev.c

reviewed

··· 483 483 p->dev = dev; 484 484 p->count = count; 485 485 486 486 + if (WARN_ON(dev == WHITEOUT_DEV)) 487 487 + return -EBUSY; 488 488 + 486 489 error = kobj_map(cdev_map, dev, count, NULL, 487 490 exact_match, exact_lock, p); 488 491 if (error)

-30

fs/fs_context.c

reviewed

··· 42 42 { "dirsync", SB_DIRSYNC }, 43 43 { "lazytime", SB_LAZYTIME }, 44 44 { "mand", SB_MANDLOCK }, 45 45 - { "posixacl", SB_POSIXACL }, 46 45 { "ro", SB_RDONLY }, 47 46 { "sync", SB_SYNCHRONOUS }, 48 47 { }, ··· 52 53 { "nolazytime", SB_LAZYTIME }, 53 54 { "nomand", SB_MANDLOCK }, 54 55 { "rw", SB_RDONLY }, 55 55 - { "silent", SB_SILENT }, 56 56 { }, 57 57 - }; 58 58 - 59 59 - static const char *const forbidden_sb_flag[] = { 60 60 - "bind", 61 61 - "dev", 62 62 - "exec", 63 63 - "move", 64 64 - "noatime", 65 65 - "nodev", 66 66 - "nodiratime", 67 67 - "noexec", 68 68 - "norelatime", 69 69 - "nostrictatime", 70 70 - "nosuid", 71 71 - "private", 72 72 - "rec", 73 73 - "relatime", 74 74 - "remount", 75 75 - "shared", 76 76 - "slave", 77 77 - "strictatime", 78 78 - "suid", 79 79 - "unbindable", 80 57 }; 81 58 82 59 /* ··· 61 86 static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) 62 87 { 63 88 unsigned int token; 64 64 - unsigned int i; 65 65 - 66 66 - for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++) 67 67 - if (strcmp(key, forbidden_sb_flag[i]) == 0) 68 68 - return -EINVAL; 69 89 70 90 token = lookup_constant(common_set_sb_flag, key, 0); 71 91 if (token) {

-1

fs/internal.h

reviewed

··· 126 126 extern int build_open_flags(const struct open_how *how, struct open_flags *op); 127 127 128 128 long do_sys_ftruncate(unsigned int fd, loff_t length, int small); 129 129 - long do_faccessat(int dfd, const char __user *filename, int mode); 130 129 int do_fchmodat(int dfd, const char __user *filename, umode_t mode); 131 130 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, 132 131 int flag);

+9 -3

fs/mount.h

reviewed

··· 9 9 atomic_t count; 10 10 struct ns_common ns; 11 11 struct mount * root; 12 12 + /* 13 13 + * Traversal and modification of .list is protected by either 14 14 + * - taking namespace_sem for write, OR 15 15 + * - taking namespace_sem for read AND taking .ns_lock. 16 16 + */ 12 17 struct list_head list; 18 18 + spinlock_t ns_lock; 13 19 struct user_namespace *user_ns; 14 20 struct ucounts *ucounts; 15 21 u64 seq; /* Sequence number to prevent loops */ ··· 139 133 struct mnt_namespace *ns; 140 134 struct path root; 141 135 int (*show)(struct seq_file *, struct vfsmount *); 142 142 - void *cached_mount; 143 143 - u64 cached_event; 144 144 - loff_t cached_index; 136 136 + struct mount cursor; 145 137 }; 146 138 147 139 extern const struct seq_operations mounts_op; ··· 157 153 { 158 154 return ns->seq == 0; 159 155 } 156 156 + 157 157 + extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);

+3 -18

fs/namei.c

reviewed

··· 3505 3505 3506 3506 int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 3507 3507 { 3508 3508 + bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; 3508 3509 int error = may_create(dir, dentry); 3509 3510 3510 3511 if (error) 3511 3512 return error; 3512 3513 3513 3513 - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 3514 3514 + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout && 3515 3515 + !capable(CAP_MKNOD)) 3514 3516 return -EPERM; 3515 3517 3516 3518 if (!dir->i_op->mknod) ··· 4347 4345 (flags & RENAME_EXCHANGE)) 4348 4346 return -EINVAL; 4349 4347 4350 4350 - if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) 4351 4351 - return -EPERM; 4352 4352 - 4353 4348 if (flags & RENAME_EXCHANGE) 4354 4349 target_flags = 0; 4355 4350 ··· 4481 4482 { 4482 4483 return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 4483 4484 } 4484 4484 - 4485 4485 - int vfs_whiteout(struct inode *dir, struct dentry *dentry) 4486 4486 - { 4487 4487 - int error = may_create(dir, dentry); 4488 4488 - if (error) 4489 4489 - return error; 4490 4490 - 4491 4491 - if (!dir->i_op->mknod) 4492 4492 - return -EPERM; 4493 4493 - 4494 4494 - return dir->i_op->mknod(dir, dentry, 4495 4495 - S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 4496 4496 - } 4497 4497 - EXPORT_SYMBOL(vfs_whiteout); 4498 4485 4499 4486 int readlink_copy(char __user *buffer, int buflen, const char *link) 4500 4487 {

+75 -16

fs/namespace.c

reviewed

··· 648 648 return m; 649 649 } 650 650 651 651 + static inline void lock_ns_list(struct mnt_namespace *ns) 652 652 + { 653 653 + spin_lock(&ns->ns_lock); 654 654 + } 655 655 + 656 656 + static inline void unlock_ns_list(struct mnt_namespace *ns) 657 657 + { 658 658 + spin_unlock(&ns->ns_lock); 659 659 + } 660 660 + 661 661 + static inline bool mnt_is_cursor(struct mount *mnt) 662 662 + { 663 663 + return mnt->mnt.mnt_flags & MNT_CURSOR; 664 664 + } 665 665 + 651 666 /* 652 667 * __is_local_mountpoint - Test to see if dentry is a mountpoint in the 653 668 * current mount namespace. ··· 688 673 goto out; 689 674 690 675 down_read(&namespace_sem); 676 676 + lock_ns_list(ns); 691 677 list_for_each_entry(mnt, &ns->list, mnt_list) { 678 678 + if (mnt_is_cursor(mnt)) 679 679 + continue; 692 680 is_covered = (mnt->mnt_mountpoint == dentry); 693 681 if (is_covered) 694 682 break; 695 683 } 684 684 + unlock_ns_list(ns); 696 685 up_read(&namespace_sem); 697 686 out: 698 687 return is_covered; ··· 1264 1245 } 1265 1246 1266 1247 #ifdef CONFIG_PROC_FS 1248 1248 + static struct mount *mnt_list_next(struct mnt_namespace *ns, 1249 1249 + struct list_head *p) 1250 1250 + { 1251 1251 + struct mount *mnt, *ret = NULL; 1252 1252 + 1253 1253 + lock_ns_list(ns); 1254 1254 + list_for_each_continue(p, &ns->list) { 1255 1255 + mnt = list_entry(p, typeof(*mnt), mnt_list); 1256 1256 + if (!mnt_is_cursor(mnt)) { 1257 1257 + ret = mnt; 1258 1258 + break; 1259 1259 + } 1260 1260 + } 1261 1261 + unlock_ns_list(ns); 1262 1262 + 1263 1263 + return ret; 1264 1264 + } 1265 1265 + 1267 1266 /* iterator; we want it to have access to namespace_sem, thus here... */ 1268 1267 static void *m_start(struct seq_file *m, loff_t *pos) 1269 1268 { 1270 1269 struct proc_mounts *p = m->private; 1270 1270 + struct list_head *prev; 1271 1271 1272 1272 down_read(&namespace_sem); 1273 1273 - if (p->cached_event == p->ns->event) { 1274 1274 - void *v = p->cached_mount; 1275 1275 - if (*pos == p->cached_index) 1276 1276 - return v; 1277 1277 - if (*pos == p->cached_index + 1) { 1278 1278 - v = seq_list_next(v, &p->ns->list, &p->cached_index); 1279 1279 - return p->cached_mount = v; 1280 1280 - } 1273 1273 + if (!*pos) { 1274 1274 + prev = &p->ns->list; 1275 1275 + } else { 1276 1276 + prev = &p->cursor.mnt_list; 1277 1277 + 1278 1278 + /* Read after we'd reached the end? */ 1279 1279 + if (list_empty(prev)) 1280 1280 + return NULL; 1281 1281 } 1282 1282 1283 1283 - p->cached_event = p->ns->event; 1284 1284 - p->cached_mount = seq_list_start(&p->ns->list, *pos); 1285 1285 - p->cached_index = *pos; 1286 1286 - return p->cached_mount; 1283 1283 + return mnt_list_next(p->ns, prev); 1287 1284 } 1288 1285 1289 1286 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 1290 1287 { 1291 1288 struct proc_mounts *p = m->private; 1289 1289 + struct mount *mnt = v; 1292 1290 1293 1293 - p->cached_mount = seq_list_next(v, &p->ns->list, pos); 1294 1294 - p->cached_index = *pos; 1295 1295 - return p->cached_mount; 1291 1291 + ++*pos; 1292 1292 + return mnt_list_next(p->ns, &mnt->mnt_list); 1296 1293 } 1297 1294 1298 1295 static void m_stop(struct seq_file *m, void *v) 1299 1296 { 1297 1297 + struct proc_mounts *p = m->private; 1298 1298 + struct mount *mnt = v; 1299 1299 + 1300 1300 + lock_ns_list(p->ns); 1301 1301 + if (mnt) 1302 1302 + list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); 1303 1303 + else 1304 1304 + list_del_init(&p->cursor.mnt_list); 1305 1305 + unlock_ns_list(p->ns); 1300 1306 up_read(&namespace_sem); 1301 1307 } 1302 1308 1303 1309 static int m_show(struct seq_file *m, void *v) 1304 1310 { 1305 1311 struct proc_mounts *p = m->private; 1306 1306 - struct mount *r = list_entry(v, struct mount, mnt_list); 1312 1312 + struct mount *r = v; 1307 1313 return p->show(m, &r->mnt); 1308 1314 } 1309 1315 ··· 1338 1294 .stop = m_stop, 1339 1295 .show = m_show, 1340 1296 }; 1297 1297 + 1298 1298 + void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) 1299 1299 + { 1300 1300 + down_read(&namespace_sem); 1301 1301 + lock_ns_list(ns); 1302 1302 + list_del(&cursor->mnt_list); 1303 1303 + unlock_ns_list(ns); 1304 1304 + up_read(&namespace_sem); 1305 1305 + } 1341 1306 #endif /* CONFIG_PROC_FS */ 1342 1307 1343 1308 /** ··· 3255 3202 atomic_set(&new_ns->count, 1); 3256 3203 INIT_LIST_HEAD(&new_ns->list); 3257 3204 init_waitqueue_head(&new_ns->poll); 3205 3205 + spin_lock_init(&new_ns->ns_lock); 3258 3206 new_ns->user_ns = get_user_ns(user_ns); 3259 3207 new_ns->ucounts = ucounts; 3260 3208 return new_ns; ··· 3896 3842 bool visible = false; 3897 3843 3898 3844 down_read(&namespace_sem); 3845 3845 + lock_ns_list(ns); 3899 3846 list_for_each_entry(mnt, &ns->list, mnt_list) { 3900 3847 struct mount *child; 3901 3848 int mnt_flags; 3849 3849 + 3850 3850 + if (mnt_is_cursor(mnt)) 3851 3851 + continue; 3902 3852 3903 3853 if (mnt->mnt.mnt_sb->s_type != sb->s_type) 3904 3854 continue; ··· 3951 3893 next: ; 3952 3894 } 3953 3895 found: 3896 3896 + unlock_ns_list(ns); 3954 3897 up_read(&namespace_sem); 3955 3898 return visible; 3956 3899 }

+45 -13

fs/open.c

reviewed

··· 345 345 * We do this by temporarily clearing all FS-related capabilities and 346 346 * switching the fsuid/fsgid around to the real ones. 347 347 */ 348 348 - long do_faccessat(int dfd, const char __user *filename, int mode) 348 348 + static const struct cred *access_override_creds(void) 349 349 { 350 350 const struct cred *old_cred; 351 351 struct cred *override_cred; 352 352 - struct path path; 353 353 - struct inode *inode; 354 354 - int res; 355 355 - unsigned int lookup_flags = LOOKUP_FOLLOW; 356 356 - 357 357 - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 358 358 - return -EINVAL; 359 352 360 353 override_cred = prepare_creds(); 361 354 if (!override_cred) 362 362 - return -ENOMEM; 355 355 + return NULL; 363 356 364 357 override_cred->fsuid = override_cred->uid; 365 358 override_cred->fsgid = override_cred->gid; ··· 387 394 override_cred->non_rcu = 1; 388 395 389 396 old_cred = override_creds(override_cred); 397 397 + 398 398 + /* override_cred() gets its own ref */ 399 399 + put_cred(override_cred); 400 400 + 401 401 + return old_cred; 402 402 + } 403 403 + 404 404 + long do_faccessat(int dfd, const char __user *filename, int mode, int flags) 405 405 + { 406 406 + struct path path; 407 407 + struct inode *inode; 408 408 + int res; 409 409 + unsigned int lookup_flags = LOOKUP_FOLLOW; 410 410 + const struct cred *old_cred = NULL; 411 411 + 412 412 + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 413 413 + return -EINVAL; 414 414 + 415 415 + if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) 416 416 + return -EINVAL; 417 417 + 418 418 + if (flags & AT_SYMLINK_NOFOLLOW) 419 419 + lookup_flags &= ~LOOKUP_FOLLOW; 420 420 + if (flags & AT_EMPTY_PATH) 421 421 + lookup_flags |= LOOKUP_EMPTY; 422 422 + 423 423 + if (!(flags & AT_EACCESS)) { 424 424 + old_cred = access_override_creds(); 425 425 + if (!old_cred) 426 426 + return -ENOMEM; 427 427 + } 428 428 + 390 429 retry: 391 430 res = user_path_at(dfd, filename, lookup_flags, &path); 392 431 if (res) ··· 460 435 goto retry; 461 436 } 462 437 out: 463 463 - revert_creds(old_cred); 464 464 - put_cred(override_cred); 438 438 + if (old_cred) 439 439 + revert_creds(old_cred); 440 440 + 465 441 return res; 466 442 } 467 443 468 444 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) 469 445 { 470 470 - return do_faccessat(dfd, filename, mode); 446 446 + return do_faccessat(dfd, filename, mode, 0); 447 447 + } 448 448 + 449 449 + SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode, 450 450 + int, flags) 451 451 + { 452 452 + return do_faccessat(dfd, filename, mode, flags); 471 453 } 472 454 473 455 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) 474 456 { 475 475 - return do_faccessat(AT_FDCWD, filename, mode); 457 457 + return do_faccessat(AT_FDCWD, filename, mode, 0); 476 458 } 477 459 478 460 int ksys_chdir(const char __user *filename)

+3 -1

fs/proc_namespace.c

reviewed

··· 279 279 p->ns = ns; 280 280 p->root = root; 281 281 p->show = show; 282 282 - p->cached_event = ~0ULL; 282 282 + INIT_LIST_HEAD(&p->cursor.mnt_list); 283 283 + p->cursor.mnt.mnt_flags = MNT_CURSOR; 283 284 284 285 return 0; 285 286 ··· 297 296 struct seq_file *m = file->private_data; 298 297 struct proc_mounts *p = m->private; 299 298 path_put(&p->root); 299 299 + mnt_cursor_del(p->ns, &p->cursor); 300 300 put_mnt_ns(p->ns); 301 301 return seq_release_private(inode, file); 302 302 }

+9 -2

fs/stat.c

reviewed

··· 22 22 #include <asm/unistd.h> 23 23 24 24 #include "internal.h" 25 25 + #include "mount.h" 25 26 26 27 /** 27 28 * generic_fillattr - Fill in the basic attributes from the inode struct ··· 71 70 72 71 memset(stat, 0, sizeof(*stat)); 73 72 stat->result_mask |= STATX_BASIC_STATS; 74 74 - request_mask &= STATX_ALL; 75 73 query_flags &= KSTAT_QUERY_FLAGS; 76 74 77 75 /* allow the fs to override these if it really wants to */ 78 78 - if (IS_NOATIME(inode)) 76 76 + /* SB_NOATIME means filesystem supplies dummy atime value */ 77 77 + if (inode->i_sb->s_flags & SB_NOATIME) 79 78 stat->result_mask &= ~STATX_ATIME; 80 79 if (IS_AUTOMOUNT(inode)) 81 80 stat->attributes |= STATX_ATTR_AUTOMOUNT; ··· 200 199 goto out; 201 200 202 201 error = vfs_getattr(&path, stat, request_mask, flags); 202 202 + stat->mnt_id = real_mount(path.mnt)->mnt_id; 203 203 + stat->result_mask |= STATX_MNT_ID; 204 204 + if (path.mnt->mnt_root == path.dentry) 205 205 + stat->attributes |= STATX_ATTR_MOUNT_ROOT; 206 206 + stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; 203 207 path_put(&path); 204 208 if (retry_estale(error, lookup_flags)) { 205 209 lookup_flags |= LOOKUP_REVAL; ··· 569 563 tmp.stx_rdev_minor = MINOR(stat->rdev); 570 564 tmp.stx_dev_major = MAJOR(stat->dev); 571 565 tmp.stx_dev_minor = MINOR(stat->dev); 566 566 + tmp.stx_mnt_id = stat->mnt_id; 572 567 573 568 return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; 574 569 }

+4 -2

fs/utimes.c

reviewed

··· 95 95 goto out; 96 96 } 97 97 98 98 - if (flags & ~AT_SYMLINK_NOFOLLOW) 98 98 + if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) 99 99 goto out; 100 100 101 101 if (filename == NULL && dfd != AT_FDCWD) { 102 102 struct fd f; 103 103 104 104 - if (flags & AT_SYMLINK_NOFOLLOW) 104 104 + if (flags) 105 105 goto out; 106 106 107 107 f = fdget(dfd); ··· 117 117 118 118 if (!(flags & AT_SYMLINK_NOFOLLOW)) 119 119 lookup_flags |= LOOKUP_FOLLOW; 120 120 + if (flags & AT_EMPTY_PATH) 121 121 + lookup_flags |= LOOKUP_EMPTY; 120 122 retry: 121 123 error = user_path_at(dfd, filename, lookup_flags, &path); 122 124 if (error)

include/linux/device_cgroup.h

reviewed

··· 44 44 if (!S_ISBLK(mode) && !S_ISCHR(mode)) 45 45 return 0; 46 46 47 47 + if (S_ISCHR(mode) && dev == WHITEOUT_DEV) 48 48 + return 0; 49 49 + 47 50 if (S_ISBLK(mode)) 48 51 type = DEVCG_DEV_BLOCK; 49 52 else

+5 -1

include/linux/fs.h

reviewed

··· 1721 1721 extern int vfs_rmdir(struct inode *, struct dentry *); 1722 1722 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); 1723 1723 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); 1724 1724 - extern int vfs_whiteout(struct inode *, struct dentry *); 1724 1724 + 1725 1725 + static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry) 1726 1726 + { 1727 1727 + return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 1728 1728 + } 1725 1729 1726 1730 extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, 1727 1731 int open_flag);

+3 -1

include/linux/mount.h

reviewed

··· 50 50 #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) 51 51 52 52 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ 53 53 - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) 53 53 + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \ 54 54 + MNT_CURSOR) 54 55 55 56 #define MNT_INTERNAL 0x4000 56 57 ··· 65 64 #define MNT_SYNC_UMOUNT 0x2000000 66 65 #define MNT_MARKED 0x4000000 67 66 #define MNT_UMOUNT 0x8000000 67 67 + #define MNT_CURSOR 0x10000000 68 68 69 69 struct vfsmount { 70 70 struct dentry *mnt_root; /* root of the mounted tree */

include/linux/stat.h

reviewed

··· 47 47 struct timespec64 ctime; 48 48 struct timespec64 btime; /* File creation time */ 49 49 u64 blocks; 50 50 + u64 mnt_id; 50 51 }; 51 52 52 53 #endif

+4 -2

include/linux/syscalls.h

reviewed

··· 428 428 #endif 429 429 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); 430 430 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); 431 431 + asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode, 432 432 + int flags); 431 433 asmlinkage long sys_chdir(const char __user *filename); 432 434 asmlinkage long sys_fchdir(unsigned int fd); 433 435 asmlinkage long sys_chroot(const char __user *filename); ··· 1335 1333 return do_fchmodat(AT_FDCWD, filename, mode); 1336 1334 } 1337 1335 1338 1338 - extern long do_faccessat(int dfd, const char __user *filename, int mode); 1336 1336 + long do_faccessat(int dfd, const char __user *filename, int mode, int flags); 1339 1337 1340 1338 static inline long ksys_access(const char __user *filename, int mode) 1341 1339 { 1342 1342 - return do_faccessat(AT_FDCWD, filename, mode); 1340 1340 + return do_faccessat(AT_FDCWD, filename, mode, 0); 1343 1341 } 1344 1342 1345 1343 extern int do_fchownat(int dfd, const char __user *filename, uid_t user,

+3 -1

include/uapi/asm-generic/unistd.h

reviewed

··· 855 855 __SYSCALL(__NR_openat2, sys_openat2) 856 856 #define __NR_pidfd_getfd 438 857 857 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) 858 858 + #define __NR_faccessat2 439 859 859 + __SYSCALL(__NR_faccessat2, sys_faccessat2) 858 860 859 861 #undef __NR_syscalls 860 860 - #define __NR_syscalls 439 862 862 + #define __NR_syscalls 440 861 863 862 864 /* 863 865 * 32 bit systems traditionally used different

+10

include/uapi/linux/fcntl.h

reviewed

··· 84 84 #define DN_ATTRIB 0x00000020 /* File changed attibutes */ 85 85 #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ 86 86 87 87 + /* 88 88 + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is 89 89 + * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to 90 90 + * unlinkat. The two functions do completely different things and therefore, 91 91 + * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to 92 92 + * faccessat would be undefined behavior and thus treating it equivalent to 93 93 + * AT_EACCESS is valid undefined behavior. 94 94 + */ 87 95 #define AT_FDCWD -100 /* Special value used to indicate 88 96 openat should use the current 89 97 working directory. */ 90 98 #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ 99 99 + #define AT_EACCESS 0x200 /* Test access permitted for 100 100 + effective IDs, not real IDs. */ 91 101 #define AT_REMOVEDIR 0x200 /* Remove directory instead of 92 102 unlinking file. */ 93 103 #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */

+16 -2

include/uapi/linux/stat.h

reviewed

··· 123 123 __u32 stx_dev_major; /* ID of device containing file [uncond] */ 124 124 __u32 stx_dev_minor; 125 125 /* 0x90 */ 126 126 - __u64 __spare2[14]; /* Spare space for future expansion */ 126 126 + __u64 stx_mnt_id; 127 127 + __u64 __spare2; 128 128 + /* 0xa0 */ 129 129 + __u64 __spare3[12]; /* Spare space for future expansion */ 127 130 /* 0x100 */ 128 131 }; 129 132 ··· 151 148 #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ 152 149 #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ 153 150 #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ 154 154 - #define STATX_ALL 0x00000fffU /* All currently supported flags */ 151 151 + #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ 152 152 + 155 153 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ 154 154 + 155 155 + #ifndef __KERNEL__ 156 156 + /* 157 157 + * This is deprecated, and shall remain the same value in the future. To avoid 158 158 + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) 159 159 + * instead. 160 160 + */ 161 161 + #define STATX_ALL 0x00000fffU 162 162 + #endif 156 163 157 164 /* 158 165 * Attributes to be found in stx_attributes and masked in stx_attributes_mask. ··· 181 168 #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ 182 169 #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ 183 170 #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ 171 171 + #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ 184 172 #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ 185 173 186 174

+1 -1

samples/vfs/test-statx.c

reviewed

··· 216 216 struct statx stx; 217 217 int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW; 218 218 219 219 - unsigned int mask = STATX_ALL; 219 219 + unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; 220 220 221 221 for (argv++; *argv; argv++) { 222 222 if (strcmp(*argv, "-F") == 0) {

+10 -1

tools/include/uapi/linux/stat.h

reviewed

··· 148 148 #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ 149 149 #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ 150 150 #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ 151 151 - #define STATX_ALL 0x00000fffU /* All currently supported flags */ 151 151 + 152 152 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ 153 153 + 154 154 + #ifndef __KERNEL__ 155 155 + /* 156 156 + * This is deprecated, and shall remain the same value in the future. To avoid 157 157 + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) 158 158 + * instead. 159 159 + */ 160 160 + #define STATX_ALL 0x00000fffU 161 161 + #endif 153 162 154 163 /* 155 164 * Attributes to be found in stx_attributes and masked in stx_attributes_mask.