Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs updates from Al Viro:
"Assorted patches from Miklos.

An interesting part here is /proc/mounts stuff..."

The "/proc/mounts stuff" is using a cursor for keeeping the location
data while traversing the mount listing.

Also probably worth noting is the addition of faccessat2(), which takes
an additional set of flags to specify how the lookup is done
(AT_EACCESS, AT_SYMLINK_NOFOLLOW, AT_EMPTY_PATH).

* 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
vfs: add faccessat2 syscall
vfs: don't parse "silent" option
vfs: don't parse "posixacl" option
vfs: don't parse forbidden flags
statx: add mount_root
statx: add mount ID
statx: don't clear STATX_ATIME on SB_RDONLY
uapi: deprecate STATX_ALL
utimensat: AT_EMPTY_PATH support
vfs: split out access_override_creds()
proc/mounts: add cursor
aio: fix async fsync creds
vfs: allow unprivileged whiteout creation

+234 -96
+1
arch/alpha/kernel/syscalls/syscall.tbl
··· 477 477 # 545 reserved for clone3 478 478 547 common openat2 sys_openat2 479 479 548 common pidfd_getfd sys_pidfd_getfd 480 + 549 common faccessat2 sys_faccessat2
+1
arch/arm/tools/syscall.tbl
··· 451 451 435 common clone3 sys_clone3 452 452 437 common openat2 sys_openat2 453 453 438 common pidfd_getfd sys_pidfd_getfd 454 + 439 common faccessat2 sys_faccessat2
+1 -1
arch/arm64/include/asm/unistd.h
··· 38 38 #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) 39 39 #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) 40 40 41 - #define __NR_compat_syscalls 439 41 + #define __NR_compat_syscalls 440 42 42 #endif 43 43 44 44 #define __ARCH_WANT_SYS_CLONE
+2
arch/arm64/include/asm/unistd32.h
··· 883 883 __SYSCALL(__NR_openat2, sys_openat2) 884 884 #define __NR_pidfd_getfd 438 885 885 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) 886 + #define __NR_faccessat2 439 887 + __SYSCALL(__NR_faccessat2, sys_faccessat2) 886 888 887 889 /* 888 890 * Please add new compat syscalls above this comment and update
+1
arch/ia64/kernel/syscalls/syscall.tbl
··· 358 358 # 435 reserved for clone3 359 359 437 common openat2 sys_openat2 360 360 438 common pidfd_getfd sys_pidfd_getfd 361 + 439 common faccessat2 sys_faccessat2
+1
arch/m68k/kernel/syscalls/syscall.tbl
··· 437 437 435 common clone3 __sys_clone3 438 438 437 common openat2 sys_openat2 439 439 438 common pidfd_getfd sys_pidfd_getfd 440 + 439 common faccessat2 sys_faccessat2
+1
arch/microblaze/kernel/syscalls/syscall.tbl
··· 443 443 435 common clone3 sys_clone3 444 444 437 common openat2 sys_openat2 445 445 438 common pidfd_getfd sys_pidfd_getfd 446 + 439 common faccessat2 sys_faccessat2
+1
arch/mips/kernel/syscalls/syscall_n32.tbl
··· 376 376 435 n32 clone3 __sys_clone3 377 377 437 n32 openat2 sys_openat2 378 378 438 n32 pidfd_getfd sys_pidfd_getfd 379 + 439 n32 faccessat2 sys_faccessat2
+1
arch/mips/kernel/syscalls/syscall_n64.tbl
··· 352 352 435 n64 clone3 __sys_clone3 353 353 437 n64 openat2 sys_openat2 354 354 438 n64 pidfd_getfd sys_pidfd_getfd 355 + 439 n64 faccessat2 sys_faccessat2
+1
arch/mips/kernel/syscalls/syscall_o32.tbl
··· 425 425 435 o32 clone3 __sys_clone3 426 426 437 o32 openat2 sys_openat2 427 427 438 o32 pidfd_getfd sys_pidfd_getfd 428 + 439 o32 faccessat2 sys_faccessat2
+1
arch/parisc/kernel/syscalls/syscall.tbl
··· 435 435 435 common clone3 sys_clone3_wrapper 436 436 437 common openat2 sys_openat2 437 437 438 common pidfd_getfd sys_pidfd_getfd 438 + 439 common faccessat2 sys_faccessat2
+1
arch/powerpc/kernel/syscalls/syscall.tbl
··· 527 527 435 spu clone3 sys_ni_syscall 528 528 437 common openat2 sys_openat2 529 529 438 common pidfd_getfd sys_pidfd_getfd 530 + 439 common faccessat2 sys_faccessat2
+1
arch/s390/kernel/syscalls/syscall.tbl
··· 440 440 435 common clone3 sys_clone3 sys_clone3 441 441 437 common openat2 sys_openat2 sys_openat2 442 442 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 443 + 439 common faccessat2 sys_faccessat2 sys_faccessat2
+1
arch/sh/kernel/syscalls/syscall.tbl
··· 440 440 # 435 reserved for clone3 441 441 437 common openat2 sys_openat2 442 442 438 common pidfd_getfd sys_pidfd_getfd 443 + 439 common faccessat2 sys_faccessat2
+1
arch/sparc/kernel/syscalls/syscall.tbl
··· 483 483 # 435 reserved for clone3 484 484 437 common openat2 sys_openat2 485 485 438 common pidfd_getfd sys_pidfd_getfd 486 + 439 common faccessat2 sys_faccessat2
+1
arch/x86/entry/syscalls/syscall_32.tbl
··· 442 442 435 i386 clone3 sys_clone3 443 443 437 i386 openat2 sys_openat2 444 444 438 i386 pidfd_getfd sys_pidfd_getfd 445 + 439 i386 faccessat2 sys_faccessat2
+1
arch/x86/entry/syscalls/syscall_64.tbl
··· 359 359 435 common clone3 sys_clone3 360 360 437 common openat2 sys_openat2 361 361 438 common pidfd_getfd sys_pidfd_getfd 362 + 439 common faccessat2 sys_faccessat2 362 363 363 364 # 364 365 # x32-specific system call numbers start at 512 to avoid cache impact
+1
arch/xtensa/kernel/syscalls/syscall.tbl
··· 408 408 435 common clone3 sys_clone3 409 409 437 common openat2 sys_openat2 410 410 438 common pidfd_getfd sys_pidfd_getfd 411 + 439 common faccessat2 sys_faccessat2
+8
fs/aio.c
··· 176 176 struct file *file; 177 177 struct work_struct work; 178 178 bool datasync; 179 + struct cred *creds; 179 180 }; 180 181 181 182 struct poll_iocb { ··· 1590 1589 static void aio_fsync_work(struct work_struct *work) 1591 1590 { 1592 1591 struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); 1592 + const struct cred *old_cred = override_creds(iocb->fsync.creds); 1593 1593 1594 1594 iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); 1595 + revert_creds(old_cred); 1596 + put_cred(iocb->fsync.creds); 1595 1597 iocb_put(iocb); 1596 1598 } 1597 1599 ··· 1607 1603 1608 1604 if (unlikely(!req->file->f_op->fsync)) 1609 1605 return -EINVAL; 1606 + 1607 + req->creds = prepare_creds(); 1608 + if (!req->creds) 1609 + return -ENOMEM; 1610 1610 1611 1611 req->datasync = datasync; 1612 1612 INIT_WORK(&req->work, aio_fsync_work);
+3
fs/char_dev.c
··· 483 483 p->dev = dev; 484 484 p->count = count; 485 485 486 + if (WARN_ON(dev == WHITEOUT_DEV)) 487 + return -EBUSY; 488 + 486 489 error = kobj_map(cdev_map, dev, count, NULL, 487 490 exact_match, exact_lock, p); 488 491 if (error)
-30
fs/fs_context.c
··· 42 42 { "dirsync", SB_DIRSYNC }, 43 43 { "lazytime", SB_LAZYTIME }, 44 44 { "mand", SB_MANDLOCK }, 45 - { "posixacl", SB_POSIXACL }, 46 45 { "ro", SB_RDONLY }, 47 46 { "sync", SB_SYNCHRONOUS }, 48 47 { }, ··· 52 53 { "nolazytime", SB_LAZYTIME }, 53 54 { "nomand", SB_MANDLOCK }, 54 55 { "rw", SB_RDONLY }, 55 - { "silent", SB_SILENT }, 56 56 { }, 57 - }; 58 - 59 - static const char *const forbidden_sb_flag[] = { 60 - "bind", 61 - "dev", 62 - "exec", 63 - "move", 64 - "noatime", 65 - "nodev", 66 - "nodiratime", 67 - "noexec", 68 - "norelatime", 69 - "nostrictatime", 70 - "nosuid", 71 - "private", 72 - "rec", 73 - "relatime", 74 - "remount", 75 - "shared", 76 - "slave", 77 - "strictatime", 78 - "suid", 79 - "unbindable", 80 57 }; 81 58 82 59 /* ··· 61 86 static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) 62 87 { 63 88 unsigned int token; 64 - unsigned int i; 65 - 66 - for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++) 67 - if (strcmp(key, forbidden_sb_flag[i]) == 0) 68 - return -EINVAL; 69 89 70 90 token = lookup_constant(common_set_sb_flag, key, 0); 71 91 if (token) {
-1
fs/internal.h
··· 126 126 extern int build_open_flags(const struct open_how *how, struct open_flags *op); 127 127 128 128 long do_sys_ftruncate(unsigned int fd, loff_t length, int small); 129 - long do_faccessat(int dfd, const char __user *filename, int mode); 130 129 int do_fchmodat(int dfd, const char __user *filename, umode_t mode); 131 130 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, 132 131 int flag);
+9 -3
fs/mount.h
··· 9 9 atomic_t count; 10 10 struct ns_common ns; 11 11 struct mount * root; 12 + /* 13 + * Traversal and modification of .list is protected by either 14 + * - taking namespace_sem for write, OR 15 + * - taking namespace_sem for read AND taking .ns_lock. 16 + */ 12 17 struct list_head list; 18 + spinlock_t ns_lock; 13 19 struct user_namespace *user_ns; 14 20 struct ucounts *ucounts; 15 21 u64 seq; /* Sequence number to prevent loops */ ··· 139 133 struct mnt_namespace *ns; 140 134 struct path root; 141 135 int (*show)(struct seq_file *, struct vfsmount *); 142 - void *cached_mount; 143 - u64 cached_event; 144 - loff_t cached_index; 136 + struct mount cursor; 145 137 }; 146 138 147 139 extern const struct seq_operations mounts_op; ··· 157 153 { 158 154 return ns->seq == 0; 159 155 } 156 + 157 + extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+3 -18
fs/namei.c
··· 3505 3505 3506 3506 int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 3507 3507 { 3508 + bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; 3508 3509 int error = may_create(dir, dentry); 3509 3510 3510 3511 if (error) 3511 3512 return error; 3512 3513 3513 - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 3514 + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout && 3515 + !capable(CAP_MKNOD)) 3514 3516 return -EPERM; 3515 3517 3516 3518 if (!dir->i_op->mknod) ··· 4347 4345 (flags & RENAME_EXCHANGE)) 4348 4346 return -EINVAL; 4349 4347 4350 - if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) 4351 - return -EPERM; 4352 - 4353 4348 if (flags & RENAME_EXCHANGE) 4354 4349 target_flags = 0; 4355 4350 ··· 4481 4482 { 4482 4483 return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 4483 4484 } 4484 - 4485 - int vfs_whiteout(struct inode *dir, struct dentry *dentry) 4486 - { 4487 - int error = may_create(dir, dentry); 4488 - if (error) 4489 - return error; 4490 - 4491 - if (!dir->i_op->mknod) 4492 - return -EPERM; 4493 - 4494 - return dir->i_op->mknod(dir, dentry, 4495 - S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 4496 - } 4497 - EXPORT_SYMBOL(vfs_whiteout); 4498 4485 4499 4486 int readlink_copy(char __user *buffer, int buflen, const char *link) 4500 4487 {
+75 -16
fs/namespace.c
··· 648 648 return m; 649 649 } 650 650 651 + static inline void lock_ns_list(struct mnt_namespace *ns) 652 + { 653 + spin_lock(&ns->ns_lock); 654 + } 655 + 656 + static inline void unlock_ns_list(struct mnt_namespace *ns) 657 + { 658 + spin_unlock(&ns->ns_lock); 659 + } 660 + 661 + static inline bool mnt_is_cursor(struct mount *mnt) 662 + { 663 + return mnt->mnt.mnt_flags & MNT_CURSOR; 664 + } 665 + 651 666 /* 652 667 * __is_local_mountpoint - Test to see if dentry is a mountpoint in the 653 668 * current mount namespace. ··· 688 673 goto out; 689 674 690 675 down_read(&namespace_sem); 676 + lock_ns_list(ns); 691 677 list_for_each_entry(mnt, &ns->list, mnt_list) { 678 + if (mnt_is_cursor(mnt)) 679 + continue; 692 680 is_covered = (mnt->mnt_mountpoint == dentry); 693 681 if (is_covered) 694 682 break; 695 683 } 684 + unlock_ns_list(ns); 696 685 up_read(&namespace_sem); 697 686 out: 698 687 return is_covered; ··· 1264 1245 } 1265 1246 1266 1247 #ifdef CONFIG_PROC_FS 1248 + static struct mount *mnt_list_next(struct mnt_namespace *ns, 1249 + struct list_head *p) 1250 + { 1251 + struct mount *mnt, *ret = NULL; 1252 + 1253 + lock_ns_list(ns); 1254 + list_for_each_continue(p, &ns->list) { 1255 + mnt = list_entry(p, typeof(*mnt), mnt_list); 1256 + if (!mnt_is_cursor(mnt)) { 1257 + ret = mnt; 1258 + break; 1259 + } 1260 + } 1261 + unlock_ns_list(ns); 1262 + 1263 + return ret; 1264 + } 1265 + 1267 1266 /* iterator; we want it to have access to namespace_sem, thus here... */ 1268 1267 static void *m_start(struct seq_file *m, loff_t *pos) 1269 1268 { 1270 1269 struct proc_mounts *p = m->private; 1270 + struct list_head *prev; 1271 1271 1272 1272 down_read(&namespace_sem); 1273 - if (p->cached_event == p->ns->event) { 1274 - void *v = p->cached_mount; 1275 - if (*pos == p->cached_index) 1276 - return v; 1277 - if (*pos == p->cached_index + 1) { 1278 - v = seq_list_next(v, &p->ns->list, &p->cached_index); 1279 - return p->cached_mount = v; 1280 - } 1273 + if (!*pos) { 1274 + prev = &p->ns->list; 1275 + } else { 1276 + prev = &p->cursor.mnt_list; 1277 + 1278 + /* Read after we'd reached the end? */ 1279 + if (list_empty(prev)) 1280 + return NULL; 1281 1281 } 1282 1282 1283 - p->cached_event = p->ns->event; 1284 - p->cached_mount = seq_list_start(&p->ns->list, *pos); 1285 - p->cached_index = *pos; 1286 - return p->cached_mount; 1283 + return mnt_list_next(p->ns, prev); 1287 1284 } 1288 1285 1289 1286 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 1290 1287 { 1291 1288 struct proc_mounts *p = m->private; 1289 + struct mount *mnt = v; 1292 1290 1293 - p->cached_mount = seq_list_next(v, &p->ns->list, pos); 1294 - p->cached_index = *pos; 1295 - return p->cached_mount; 1291 + ++*pos; 1292 + return mnt_list_next(p->ns, &mnt->mnt_list); 1296 1293 } 1297 1294 1298 1295 static void m_stop(struct seq_file *m, void *v) 1299 1296 { 1297 + struct proc_mounts *p = m->private; 1298 + struct mount *mnt = v; 1299 + 1300 + lock_ns_list(p->ns); 1301 + if (mnt) 1302 + list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); 1303 + else 1304 + list_del_init(&p->cursor.mnt_list); 1305 + unlock_ns_list(p->ns); 1300 1306 up_read(&namespace_sem); 1301 1307 } 1302 1308 1303 1309 static int m_show(struct seq_file *m, void *v) 1304 1310 { 1305 1311 struct proc_mounts *p = m->private; 1306 - struct mount *r = list_entry(v, struct mount, mnt_list); 1312 + struct mount *r = v; 1307 1313 return p->show(m, &r->mnt); 1308 1314 } 1309 1315 ··· 1338 1294 .stop = m_stop, 1339 1295 .show = m_show, 1340 1296 }; 1297 + 1298 + void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) 1299 + { 1300 + down_read(&namespace_sem); 1301 + lock_ns_list(ns); 1302 + list_del(&cursor->mnt_list); 1303 + unlock_ns_list(ns); 1304 + up_read(&namespace_sem); 1305 + } 1341 1306 #endif /* CONFIG_PROC_FS */ 1342 1307 1343 1308 /** ··· 3255 3202 atomic_set(&new_ns->count, 1); 3256 3203 INIT_LIST_HEAD(&new_ns->list); 3257 3204 init_waitqueue_head(&new_ns->poll); 3205 + spin_lock_init(&new_ns->ns_lock); 3258 3206 new_ns->user_ns = get_user_ns(user_ns); 3259 3207 new_ns->ucounts = ucounts; 3260 3208 return new_ns; ··· 3896 3842 bool visible = false; 3897 3843 3898 3844 down_read(&namespace_sem); 3845 + lock_ns_list(ns); 3899 3846 list_for_each_entry(mnt, &ns->list, mnt_list) { 3900 3847 struct mount *child; 3901 3848 int mnt_flags; 3849 + 3850 + if (mnt_is_cursor(mnt)) 3851 + continue; 3902 3852 3903 3853 if (mnt->mnt.mnt_sb->s_type != sb->s_type) 3904 3854 continue; ··· 3951 3893 next: ; 3952 3894 } 3953 3895 found: 3896 + unlock_ns_list(ns); 3954 3897 up_read(&namespace_sem); 3955 3898 return visible; 3956 3899 }
+45 -13
fs/open.c
··· 345 345 * We do this by temporarily clearing all FS-related capabilities and 346 346 * switching the fsuid/fsgid around to the real ones. 347 347 */ 348 - long do_faccessat(int dfd, const char __user *filename, int mode) 348 + static const struct cred *access_override_creds(void) 349 349 { 350 350 const struct cred *old_cred; 351 351 struct cred *override_cred; 352 - struct path path; 353 - struct inode *inode; 354 - int res; 355 - unsigned int lookup_flags = LOOKUP_FOLLOW; 356 - 357 - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 358 - return -EINVAL; 359 352 360 353 override_cred = prepare_creds(); 361 354 if (!override_cred) 362 - return -ENOMEM; 355 + return NULL; 363 356 364 357 override_cred->fsuid = override_cred->uid; 365 358 override_cred->fsgid = override_cred->gid; ··· 387 394 override_cred->non_rcu = 1; 388 395 389 396 old_cred = override_creds(override_cred); 397 + 398 + /* override_cred() gets its own ref */ 399 + put_cred(override_cred); 400 + 401 + return old_cred; 402 + } 403 + 404 + long do_faccessat(int dfd, const char __user *filename, int mode, int flags) 405 + { 406 + struct path path; 407 + struct inode *inode; 408 + int res; 409 + unsigned int lookup_flags = LOOKUP_FOLLOW; 410 + const struct cred *old_cred = NULL; 411 + 412 + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 413 + return -EINVAL; 414 + 415 + if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) 416 + return -EINVAL; 417 + 418 + if (flags & AT_SYMLINK_NOFOLLOW) 419 + lookup_flags &= ~LOOKUP_FOLLOW; 420 + if (flags & AT_EMPTY_PATH) 421 + lookup_flags |= LOOKUP_EMPTY; 422 + 423 + if (!(flags & AT_EACCESS)) { 424 + old_cred = access_override_creds(); 425 + if (!old_cred) 426 + return -ENOMEM; 427 + } 428 + 390 429 retry: 391 430 res = user_path_at(dfd, filename, lookup_flags, &path); 392 431 if (res) ··· 460 435 goto retry; 461 436 } 462 437 out: 463 - revert_creds(old_cred); 464 - put_cred(override_cred); 438 + if (old_cred) 439 + revert_creds(old_cred); 440 + 465 441 return res; 466 442 } 467 443 468 444 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) 469 445 { 470 - return do_faccessat(dfd, filename, mode); 446 + return do_faccessat(dfd, filename, mode, 0); 447 + } 448 + 449 + SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode, 450 + int, flags) 451 + { 452 + return do_faccessat(dfd, filename, mode, flags); 471 453 } 472 454 473 455 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) 474 456 { 475 - return do_faccessat(AT_FDCWD, filename, mode); 457 + return do_faccessat(AT_FDCWD, filename, mode, 0); 476 458 } 477 459 478 460 int ksys_chdir(const char __user *filename)
+3 -1
fs/proc_namespace.c
··· 279 279 p->ns = ns; 280 280 p->root = root; 281 281 p->show = show; 282 - p->cached_event = ~0ULL; 282 + INIT_LIST_HEAD(&p->cursor.mnt_list); 283 + p->cursor.mnt.mnt_flags = MNT_CURSOR; 283 284 284 285 return 0; 285 286 ··· 297 296 struct seq_file *m = file->private_data; 298 297 struct proc_mounts *p = m->private; 299 298 path_put(&p->root); 299 + mnt_cursor_del(p->ns, &p->cursor); 300 300 put_mnt_ns(p->ns); 301 301 return seq_release_private(inode, file); 302 302 }
+9 -2
fs/stat.c
··· 22 22 #include <asm/unistd.h> 23 23 24 24 #include "internal.h" 25 + #include "mount.h" 25 26 26 27 /** 27 28 * generic_fillattr - Fill in the basic attributes from the inode struct ··· 71 70 72 71 memset(stat, 0, sizeof(*stat)); 73 72 stat->result_mask |= STATX_BASIC_STATS; 74 - request_mask &= STATX_ALL; 75 73 query_flags &= KSTAT_QUERY_FLAGS; 76 74 77 75 /* allow the fs to override these if it really wants to */ 78 - if (IS_NOATIME(inode)) 76 + /* SB_NOATIME means filesystem supplies dummy atime value */ 77 + if (inode->i_sb->s_flags & SB_NOATIME) 79 78 stat->result_mask &= ~STATX_ATIME; 80 79 if (IS_AUTOMOUNT(inode)) 81 80 stat->attributes |= STATX_ATTR_AUTOMOUNT; ··· 200 199 goto out; 201 200 202 201 error = vfs_getattr(&path, stat, request_mask, flags); 202 + stat->mnt_id = real_mount(path.mnt)->mnt_id; 203 + stat->result_mask |= STATX_MNT_ID; 204 + if (path.mnt->mnt_root == path.dentry) 205 + stat->attributes |= STATX_ATTR_MOUNT_ROOT; 206 + stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; 203 207 path_put(&path); 204 208 if (retry_estale(error, lookup_flags)) { 205 209 lookup_flags |= LOOKUP_REVAL; ··· 569 563 tmp.stx_rdev_minor = MINOR(stat->rdev); 570 564 tmp.stx_dev_major = MAJOR(stat->dev); 571 565 tmp.stx_dev_minor = MINOR(stat->dev); 566 + tmp.stx_mnt_id = stat->mnt_id; 572 567 573 568 return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; 574 569 }
+4 -2
fs/utimes.c
··· 95 95 goto out; 96 96 } 97 97 98 - if (flags & ~AT_SYMLINK_NOFOLLOW) 98 + if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) 99 99 goto out; 100 100 101 101 if (filename == NULL && dfd != AT_FDCWD) { 102 102 struct fd f; 103 103 104 - if (flags & AT_SYMLINK_NOFOLLOW) 104 + if (flags) 105 105 goto out; 106 106 107 107 f = fdget(dfd); ··· 117 117 118 118 if (!(flags & AT_SYMLINK_NOFOLLOW)) 119 119 lookup_flags |= LOOKUP_FOLLOW; 120 + if (flags & AT_EMPTY_PATH) 121 + lookup_flags |= LOOKUP_EMPTY; 120 122 retry: 121 123 error = user_path_at(dfd, filename, lookup_flags, &path); 122 124 if (error)
+3
include/linux/device_cgroup.h
··· 44 44 if (!S_ISBLK(mode) && !S_ISCHR(mode)) 45 45 return 0; 46 46 47 + if (S_ISCHR(mode) && dev == WHITEOUT_DEV) 48 + return 0; 49 + 47 50 if (S_ISBLK(mode)) 48 51 type = DEVCG_DEV_BLOCK; 49 52 else
+5 -1
include/linux/fs.h
··· 1721 1721 extern int vfs_rmdir(struct inode *, struct dentry *); 1722 1722 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); 1723 1723 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); 1724 - extern int vfs_whiteout(struct inode *, struct dentry *); 1724 + 1725 + static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry) 1726 + { 1727 + return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); 1728 + } 1725 1729 1726 1730 extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, 1727 1731 int open_flag);
+3 -1
include/linux/mount.h
··· 50 50 #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) 51 51 52 52 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ 53 - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) 53 + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \ 54 + MNT_CURSOR) 54 55 55 56 #define MNT_INTERNAL 0x4000 56 57 ··· 65 64 #define MNT_SYNC_UMOUNT 0x2000000 66 65 #define MNT_MARKED 0x4000000 67 66 #define MNT_UMOUNT 0x8000000 67 + #define MNT_CURSOR 0x10000000 68 68 69 69 struct vfsmount { 70 70 struct dentry *mnt_root; /* root of the mounted tree */
+1
include/linux/stat.h
··· 47 47 struct timespec64 ctime; 48 48 struct timespec64 btime; /* File creation time */ 49 49 u64 blocks; 50 + u64 mnt_id; 50 51 }; 51 52 52 53 #endif
+4 -2
include/linux/syscalls.h
··· 428 428 #endif 429 429 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); 430 430 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); 431 + asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode, 432 + int flags); 431 433 asmlinkage long sys_chdir(const char __user *filename); 432 434 asmlinkage long sys_fchdir(unsigned int fd); 433 435 asmlinkage long sys_chroot(const char __user *filename); ··· 1335 1333 return do_fchmodat(AT_FDCWD, filename, mode); 1336 1334 } 1337 1335 1338 - extern long do_faccessat(int dfd, const char __user *filename, int mode); 1336 + long do_faccessat(int dfd, const char __user *filename, int mode, int flags); 1339 1337 1340 1338 static inline long ksys_access(const char __user *filename, int mode) 1341 1339 { 1342 - return do_faccessat(AT_FDCWD, filename, mode); 1340 + return do_faccessat(AT_FDCWD, filename, mode, 0); 1343 1341 } 1344 1342 1345 1343 extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
+3 -1
include/uapi/asm-generic/unistd.h
··· 855 855 __SYSCALL(__NR_openat2, sys_openat2) 856 856 #define __NR_pidfd_getfd 438 857 857 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) 858 + #define __NR_faccessat2 439 859 + __SYSCALL(__NR_faccessat2, sys_faccessat2) 858 860 859 861 #undef __NR_syscalls 860 - #define __NR_syscalls 439 862 + #define __NR_syscalls 440 861 863 862 864 /* 863 865 * 32 bit systems traditionally used different
+10
include/uapi/linux/fcntl.h
··· 84 84 #define DN_ATTRIB 0x00000020 /* File changed attibutes */ 85 85 #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ 86 86 87 + /* 88 + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is 89 + * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to 90 + * unlinkat. The two functions do completely different things and therefore, 91 + * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to 92 + * faccessat would be undefined behavior and thus treating it equivalent to 93 + * AT_EACCESS is valid undefined behavior. 94 + */ 87 95 #define AT_FDCWD -100 /* Special value used to indicate 88 96 openat should use the current 89 97 working directory. */ 90 98 #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ 99 + #define AT_EACCESS 0x200 /* Test access permitted for 100 + effective IDs, not real IDs. */ 91 101 #define AT_REMOVEDIR 0x200 /* Remove directory instead of 92 102 unlinking file. */ 93 103 #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
+16 -2
include/uapi/linux/stat.h
··· 123 123 __u32 stx_dev_major; /* ID of device containing file [uncond] */ 124 124 __u32 stx_dev_minor; 125 125 /* 0x90 */ 126 - __u64 __spare2[14]; /* Spare space for future expansion */ 126 + __u64 stx_mnt_id; 127 + __u64 __spare2; 128 + /* 0xa0 */ 129 + __u64 __spare3[12]; /* Spare space for future expansion */ 127 130 /* 0x100 */ 128 131 }; 129 132 ··· 151 148 #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ 152 149 #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ 153 150 #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ 154 - #define STATX_ALL 0x00000fffU /* All currently supported flags */ 151 + #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ 152 + 155 153 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ 154 + 155 + #ifndef __KERNEL__ 156 + /* 157 + * This is deprecated, and shall remain the same value in the future. To avoid 158 + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) 159 + * instead. 160 + */ 161 + #define STATX_ALL 0x00000fffU 162 + #endif 156 163 157 164 /* 158 165 * Attributes to be found in stx_attributes and masked in stx_attributes_mask. ··· 181 168 #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ 182 169 #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ 183 170 #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ 171 + #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ 184 172 #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ 185 173 186 174
+1 -1
samples/vfs/test-statx.c
··· 216 216 struct statx stx; 217 217 int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW; 218 218 219 - unsigned int mask = STATX_ALL; 219 + unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; 220 220 221 221 for (argv++; *argv; argv++) { 222 222 if (strcmp(*argv, "-F") == 0) {
+10 -1
tools/include/uapi/linux/stat.h
··· 148 148 #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ 149 149 #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ 150 150 #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ 151 - #define STATX_ALL 0x00000fffU /* All currently supported flags */ 151 + 152 152 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ 153 + 154 + #ifndef __KERNEL__ 155 + /* 156 + * This is deprecated, and shall remain the same value in the future. To avoid 157 + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) 158 + * instead. 159 + */ 160 + #define STATX_ALL 0x00000fffU 161 + #endif 153 162 154 163 /* 155 164 * Attributes to be found in stx_attributes and masked in stx_attributes_mask.