statx: optimize copy of struct statx to userspace

I found that statx() was significantly slower than stat(). As a
microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
file to the same with statx() passed a NULL path:

$ time ./stat_benchmark

real 0m1.464s
user 0m0.275s
sys 0m1.187s

$ time ./statx_benchmark

real 0m5.530s
user 0m0.281s
sys 0m5.247s

statx is expected to be a little slower than stat because struct statx
is larger than struct stat, but not by *that* much. It turns out that
most of the overhead was in copying struct statx to userspace, mostly in
all the stac/clac instructions that got generated for each __put_user()
call. (This was on x86_64, but some other architectures, e.g. arm64,
have something similar now too.)

stat() instead initializes its struct on the stack and copies it to
userspace with a single call to copy_to_user(). This turns out to be
much faster, and changing statx to do this makes it almost as fast as
stat:

$ time ./statx_benchmark

real 0m1.624s
user 0m0.270s
sys 0m1.354s

For zeroing the reserved fields, start by zeroing the full struct with
memset. This makes it clear that every byte copied to userspace is
initialized, even implicit padding bytes (though there are none
currently). In the scenarios I tested, it also performed the same as a
designated initializer. Manually initializing each field was still
slightly faster, but would have been more error-prone and less
verifiable.

Also rename statx_set_result() to cp_statx() for consistency with
cp_old_stat() et al., and make it noinline so that struct statx doesn't
add to the stack usage during the main portion of the syscall execution.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by Eric Biggers and committed by Al Viro 64bd7204 b15fb70b

+29 -39
+29 -39
fs/stat.c
··· 510 } 511 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ 512 513 - static inline int __put_timestamp(struct timespec *kts, 514 - struct statx_timestamp __user *uts) 515 { 516 - return (__put_user(kts->tv_sec, &uts->tv_sec ) || 517 - __put_user(kts->tv_nsec, &uts->tv_nsec ) || 518 - __put_user(0, &uts->__reserved )); 519 - } 520 521 - /* 522 - * Set the statx results. 523 - */ 524 - static long statx_set_result(struct kstat *stat, struct statx __user *buffer) 525 - { 526 - uid_t uid = from_kuid_munged(current_user_ns(), stat->uid); 527 - gid_t gid = from_kgid_munged(current_user_ns(), stat->gid); 528 529 - if (__put_user(stat->result_mask, &buffer->stx_mask ) || 530 - __put_user(stat->mode, &buffer->stx_mode ) || 531 - __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) || 532 - __put_user(stat->nlink, &buffer->stx_nlink ) || 533 - __put_user(uid, &buffer->stx_uid ) || 534 - __put_user(gid, &buffer->stx_gid ) || 535 - __put_user(stat->attributes, &buffer->stx_attributes ) || 536 - __put_user(stat->blksize, &buffer->stx_blksize ) || 537 - __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) || 538 - __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) || 539 - __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) || 540 - __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) || 541 - __put_timestamp(&stat->atime, &buffer->stx_atime ) || 542 - __put_timestamp(&stat->btime, &buffer->stx_btime ) || 543 - __put_timestamp(&stat->ctime, &buffer->stx_ctime ) || 544 - __put_timestamp(&stat->mtime, &buffer->stx_mtime ) || 545 - __put_user(stat->ino, &buffer->stx_ino ) || 546 - __put_user(stat->size, &buffer->stx_size ) || 547 - __put_user(stat->blocks, &buffer->stx_blocks ) || 548 - __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) || 549 - __clear_user(&buffer->__spare2, sizeof(buffer->__spare2))) 550 - return -EFAULT; 551 552 - return 0; 553 } 554 555 /** ··· 564 565 if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) 566 return -EINVAL; 567 - if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer))) 568 - return -EFAULT; 569 570 if (filename) 571 error = vfs_statx(dfd, filename, flags, &stat, mask); ··· 571 error = vfs_statx_fd(dfd, &stat, mask, flags); 572 if (error) 573 return error; 574 - return statx_set_result(&stat, buffer); 575 } 576 577 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
··· 510 } 511 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ 512 513 + static noinline_for_stack int 514 + cp_statx(const struct kstat *stat, struct statx __user *buffer) 515 { 516 + struct statx tmp; 517 518 + memset(&tmp, 0, sizeof(tmp)); 519 520 + tmp.stx_mask = stat->result_mask; 521 + tmp.stx_blksize = stat->blksize; 522 + tmp.stx_attributes = stat->attributes; 523 + tmp.stx_nlink = stat->nlink; 524 + tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); 525 + tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); 526 + tmp.stx_mode = stat->mode; 527 + tmp.stx_ino = stat->ino; 528 + tmp.stx_size = stat->size; 529 + tmp.stx_blocks = stat->blocks; 530 + tmp.stx_atime.tv_sec = stat->atime.tv_sec; 531 + tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; 532 + tmp.stx_btime.tv_sec = stat->btime.tv_sec; 533 + tmp.stx_btime.tv_nsec = stat->btime.tv_nsec; 534 + tmp.stx_ctime.tv_sec = stat->ctime.tv_sec; 535 + tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec; 536 + tmp.stx_mtime.tv_sec = stat->mtime.tv_sec; 537 + tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec; 538 + tmp.stx_rdev_major = MAJOR(stat->rdev); 539 + tmp.stx_rdev_minor = MINOR(stat->rdev); 540 + tmp.stx_dev_major = MAJOR(stat->dev); 541 + tmp.stx_dev_minor = MINOR(stat->dev); 542 543 + return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; 544 } 545 546 /** ··· 573 574 if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) 575 return -EINVAL; 576 577 if (filename) 578 error = vfs_statx(dfd, filename, flags, &stat, mask); ··· 582 error = vfs_statx_fd(dfd, &stat, mask, flags); 583 if (error) 584 return error; 585 + 586 + return cp_statx(&stat, buffer); 587 } 588 589 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */