Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.8-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

- Fix performance regression introduced by moving the security
permission hook out of do_clone_file_range() and into its caller
vfs_clone_file_range().

This causes the security hook to be called in situation were it
wasn't called before as the fast permission checks were left in
do_clone_file_range().

Fix this by merging the two implementations back together and
restoring the old ordering: fast permission checks first, expensive
ones later.

- Tweak mount_setattr() permission checking so that mount properties on
the real rootfs can be changed.

When we added mount_setattr() we added additional checks compared to
legacy mount(2). If the mount had a parent then verify that the
caller and the mount namespace the mount is attached to match and if
not make sure that it's an anonymous mount.

But the real rootfs falls into neither category. It is neither an
anoymous mount because it is obviously attached to the initial mount
namespace but it also obviously doesn't have a parent mount. So that
means legacy mount(2) allows changing mount properties on the real
rootfs but mount_setattr(2) blocks this. This causes regressions (See
the commit for details).

Fix this by relaxing the check. If the mount has a parent or if it
isn't a detached mount, verify that the mount namespaces of the
caller and the mount are the same. Technically, we could probably
write this even simpler and check that the mount namespaces match if
it isn't a detached mount. But the slightly longer check makes it
clearer what conditions one needs to think about.

* tag 'vfs-6.8-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: relax mount_setattr() permission checks
remap_range: merge do_clone_file_range() into vfs_clone_file_range()

+23 -36
+8 -3
fs/namespace.c
··· 4472 4472 /* 4473 4473 * If this is an attached mount make sure it's located in the callers 4474 4474 * mount namespace. If it's not don't let the caller interact with it. 4475 - * If this is a detached mount make sure it has an anonymous mount 4476 - * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. 4475 + * 4476 + * If this mount doesn't have a parent it's most often simply a 4477 + * detached mount with an anonymous mount namespace. IOW, something 4478 + * that's simply not attached yet. But there are apparently also users 4479 + * that do change mount properties on the rootfs itself. That obviously 4480 + * neither has a parent nor is it a detached mount so we cannot 4481 + * unconditionally check for detached mounts. 4477 4482 */ 4478 - if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) 4483 + if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) 4479 4484 goto out; 4480 4485 4481 4486 /*
+6 -8
fs/overlayfs/copy_up.c
··· 265 265 if (IS_ERR(old_file)) 266 266 return PTR_ERR(old_file); 267 267 268 + /* Try to use clone_file_range to clone up within the same fs */ 269 + cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0); 270 + if (cloned == len) 271 + goto out_fput; 272 + 273 + /* Couldn't clone, so now we try to copy the data */ 268 274 error = rw_verify_area(READ, old_file, &old_pos, len); 269 275 if (!error) 270 276 error = rw_verify_area(WRITE, new_file, &new_pos, len); 271 277 if (error) 272 278 goto out_fput; 273 - 274 - /* Try to use clone_file_range to clone up within the same fs */ 275 - ovl_start_write(dentry); 276 - cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); 277 - ovl_end_write(dentry); 278 - if (cloned == len) 279 - goto out_fput; 280 - /* Couldn't clone, so now we try to copy the data */ 281 279 282 280 /* Check if lower fs supports seek operation */ 283 281 if (old_file->f_mode & FMODE_LSEEK)
+9 -22
fs/remap_range.c
··· 373 373 } 374 374 EXPORT_SYMBOL(generic_remap_file_range_prep); 375 375 376 - loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, 377 - struct file *file_out, loff_t pos_out, 378 - loff_t len, unsigned int remap_flags) 376 + loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, 377 + struct file *file_out, loff_t pos_out, 378 + loff_t len, unsigned int remap_flags) 379 379 { 380 380 loff_t ret; 381 381 ··· 391 391 if (!file_in->f_op->remap_file_range) 392 392 return -EOPNOTSUPP; 393 393 394 - ret = file_in->f_op->remap_file_range(file_in, pos_in, 395 - file_out, pos_out, len, remap_flags); 396 - if (ret < 0) 397 - return ret; 398 - 399 - fsnotify_access(file_in); 400 - fsnotify_modify(file_out); 401 - return ret; 402 - } 403 - EXPORT_SYMBOL(do_clone_file_range); 404 - 405 - loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, 406 - struct file *file_out, loff_t pos_out, 407 - loff_t len, unsigned int remap_flags) 408 - { 409 - loff_t ret; 410 - 411 394 ret = remap_verify_area(file_in, pos_in, len, false); 412 395 if (ret) 413 396 return ret; ··· 400 417 return ret; 401 418 402 419 file_start_write(file_out); 403 - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, 404 - remap_flags); 420 + ret = file_in->f_op->remap_file_range(file_in, pos_in, 421 + file_out, pos_out, len, remap_flags); 405 422 file_end_write(file_out); 423 + if (ret < 0) 424 + return ret; 406 425 426 + fsnotify_access(file_in); 427 + fsnotify_modify(file_out); 407 428 return ret; 408 429 } 409 430 EXPORT_SYMBOL(vfs_clone_file_range);
-3
include/linux/fs.h
··· 2101 2101 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, 2102 2102 struct file *file_out, loff_t pos_out, 2103 2103 loff_t *count, unsigned int remap_flags); 2104 - extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, 2105 - struct file *file_out, loff_t pos_out, 2106 - loff_t len, unsigned int remap_flags); 2107 2104 extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, 2108 2105 struct file *file_out, loff_t pos_out, 2109 2106 loff_t len, unsigned int remap_flags);