at v4.14 1243 lines 30 kB view raw
1/* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10#include <uapi/linux/magic.h> 11#include <linux/fs.h> 12#include <linux/namei.h> 13#include <linux/xattr.h> 14#include <linux/mount.h> 15#include <linux/parser.h> 16#include <linux/module.h> 17#include <linux/statfs.h> 18#include <linux/seq_file.h> 19#include <linux/posix_acl_xattr.h> 20#include "overlayfs.h" 21#include "ovl_entry.h" 22 23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 24MODULE_DESCRIPTION("Overlay filesystem"); 25MODULE_LICENSE("GPL"); 26 27 28struct ovl_dir_cache; 29 30#define OVL_MAX_STACK 500 31 32static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 33module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 34MODULE_PARM_DESC(ovl_redirect_dir_def, 35 "Default to on or off for the redirect_dir feature"); 36 37static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 38module_param_named(index, ovl_index_def, bool, 0644); 39MODULE_PARM_DESC(ovl_index_def, 40 "Default to on or off for the inodes index feature"); 41 42static void ovl_dentry_release(struct dentry *dentry) 43{ 44 struct ovl_entry *oe = dentry->d_fsdata; 45 46 if (oe) { 47 unsigned int i; 48 49 for (i = 0; i < oe->numlower; i++) 50 dput(oe->lowerstack[i].dentry); 51 kfree_rcu(oe, rcu); 52 } 53} 54 55static int ovl_check_append_only(struct inode *inode, int flag) 56{ 57 /* 58 * This test was moot in vfs may_open() because overlay inode does 59 * not have the S_APPEND flag, so re-check on real upper inode 60 */ 61 if (IS_APPEND(inode)) { 62 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 63 return -EPERM; 64 if (flag & O_TRUNC) 65 return -EPERM; 66 } 67 68 return 0; 69} 70 71static struct dentry *ovl_d_real(struct dentry *dentry, 72 const struct inode *inode, 73 unsigned int open_flags, unsigned int flags) 74{ 75 struct dentry *real; 76 int err; 77 78 if (flags & D_REAL_UPPER) 79 return ovl_dentry_upper(dentry); 80 81 if (!d_is_reg(dentry)) { 82 if (!inode || inode == d_inode(dentry)) 83 return dentry; 84 goto bug; 85 } 86 87 if (open_flags) { 88 err = ovl_open_maybe_copy_up(dentry, open_flags); 89 if (err) 90 return ERR_PTR(err); 91 } 92 93 real = ovl_dentry_upper(dentry); 94 if (real && (!inode || inode == d_inode(real))) { 95 if (!inode) { 96 err = ovl_check_append_only(d_inode(real), open_flags); 97 if (err) 98 return ERR_PTR(err); 99 } 100 return real; 101 } 102 103 real = ovl_dentry_lower(dentry); 104 if (!real) 105 goto bug; 106 107 /* Handle recursion */ 108 real = d_real(real, inode, open_flags, 0); 109 110 if (!inode || inode == d_inode(real)) 111 return real; 112bug: 113 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 114 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 115 return dentry; 116} 117 118static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 119{ 120 struct ovl_entry *oe = dentry->d_fsdata; 121 unsigned int i; 122 int ret = 1; 123 124 for (i = 0; i < oe->numlower; i++) { 125 struct dentry *d = oe->lowerstack[i].dentry; 126 127 if (d->d_flags & DCACHE_OP_REVALIDATE) { 128 ret = d->d_op->d_revalidate(d, flags); 129 if (ret < 0) 130 return ret; 131 if (!ret) { 132 if (!(flags & LOOKUP_RCU)) 133 d_invalidate(d); 134 return -ESTALE; 135 } 136 } 137 } 138 return 1; 139} 140 141static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 142{ 143 struct ovl_entry *oe = dentry->d_fsdata; 144 unsigned int i; 145 int ret = 1; 146 147 for (i = 0; i < oe->numlower; i++) { 148 struct dentry *d = oe->lowerstack[i].dentry; 149 150 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 151 ret = d->d_op->d_weak_revalidate(d, flags); 152 if (ret <= 0) 153 break; 154 } 155 } 156 return ret; 157} 158 159static const struct dentry_operations ovl_dentry_operations = { 160 .d_release = ovl_dentry_release, 161 .d_real = ovl_d_real, 162}; 163 164static const struct dentry_operations ovl_reval_dentry_operations = { 165 .d_release = ovl_dentry_release, 166 .d_real = ovl_d_real, 167 .d_revalidate = ovl_dentry_revalidate, 168 .d_weak_revalidate = ovl_dentry_weak_revalidate, 169}; 170 171static struct kmem_cache *ovl_inode_cachep; 172 173static struct inode *ovl_alloc_inode(struct super_block *sb) 174{ 175 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 176 177 if (!oi) 178 return NULL; 179 180 oi->cache = NULL; 181 oi->redirect = NULL; 182 oi->version = 0; 183 oi->flags = 0; 184 oi->__upperdentry = NULL; 185 oi->lower = NULL; 186 mutex_init(&oi->lock); 187 188 return &oi->vfs_inode; 189} 190 191static void ovl_i_callback(struct rcu_head *head) 192{ 193 struct inode *inode = container_of(head, struct inode, i_rcu); 194 195 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 196} 197 198static void ovl_destroy_inode(struct inode *inode) 199{ 200 struct ovl_inode *oi = OVL_I(inode); 201 202 dput(oi->__upperdentry); 203 kfree(oi->redirect); 204 ovl_dir_cache_free(inode); 205 mutex_destroy(&oi->lock); 206 207 call_rcu(&inode->i_rcu, ovl_i_callback); 208} 209 210static void ovl_put_super(struct super_block *sb) 211{ 212 struct ovl_fs *ufs = sb->s_fs_info; 213 unsigned i; 214 215 dput(ufs->indexdir); 216 dput(ufs->workdir); 217 if (ufs->workdir_locked) 218 ovl_inuse_unlock(ufs->workbasedir); 219 dput(ufs->workbasedir); 220 if (ufs->upper_mnt && ufs->upperdir_locked) 221 ovl_inuse_unlock(ufs->upper_mnt->mnt_root); 222 mntput(ufs->upper_mnt); 223 for (i = 0; i < ufs->numlower; i++) 224 mntput(ufs->lower_mnt[i]); 225 kfree(ufs->lower_mnt); 226 227 kfree(ufs->config.lowerdir); 228 kfree(ufs->config.upperdir); 229 kfree(ufs->config.workdir); 230 put_cred(ufs->creator_cred); 231 kfree(ufs); 232} 233 234static int ovl_sync_fs(struct super_block *sb, int wait) 235{ 236 struct ovl_fs *ufs = sb->s_fs_info; 237 struct super_block *upper_sb; 238 int ret; 239 240 if (!ufs->upper_mnt) 241 return 0; 242 upper_sb = ufs->upper_mnt->mnt_sb; 243 if (!upper_sb->s_op->sync_fs) 244 return 0; 245 246 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 247 down_read(&upper_sb->s_umount); 248 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 249 up_read(&upper_sb->s_umount); 250 return ret; 251} 252 253/** 254 * ovl_statfs 255 * @sb: The overlayfs super block 256 * @buf: The struct kstatfs to fill in with stats 257 * 258 * Get the filesystem statistics. As writes always target the upper layer 259 * filesystem pass the statfs to the upper filesystem (if it exists) 260 */ 261static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 262{ 263 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 264 struct dentry *root_dentry = dentry->d_sb->s_root; 265 struct path path; 266 int err; 267 268 ovl_path_real(root_dentry, &path); 269 270 err = vfs_statfs(&path, buf); 271 if (!err) { 272 buf->f_namelen = ofs->namelen; 273 buf->f_type = OVERLAYFS_SUPER_MAGIC; 274 } 275 276 return err; 277} 278 279/* Will this overlay be forced to mount/remount ro? */ 280static bool ovl_force_readonly(struct ovl_fs *ufs) 281{ 282 return (!ufs->upper_mnt || !ufs->workdir); 283} 284 285/** 286 * ovl_show_options 287 * 288 * Prints the mount options for a given superblock. 289 * Returns zero; does not fail. 290 */ 291static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 292{ 293 struct super_block *sb = dentry->d_sb; 294 struct ovl_fs *ufs = sb->s_fs_info; 295 296 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 297 if (ufs->config.upperdir) { 298 seq_show_option(m, "upperdir", ufs->config.upperdir); 299 seq_show_option(m, "workdir", ufs->config.workdir); 300 } 301 if (ufs->config.default_permissions) 302 seq_puts(m, ",default_permissions"); 303 if (ufs->config.redirect_dir != ovl_redirect_dir_def) 304 seq_printf(m, ",redirect_dir=%s", 305 ufs->config.redirect_dir ? "on" : "off"); 306 if (ufs->config.index != ovl_index_def) 307 seq_printf(m, ",index=%s", 308 ufs->config.index ? "on" : "off"); 309 return 0; 310} 311 312static int ovl_remount(struct super_block *sb, int *flags, char *data) 313{ 314 struct ovl_fs *ufs = sb->s_fs_info; 315 316 if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) 317 return -EROFS; 318 319 return 0; 320} 321 322static const struct super_operations ovl_super_operations = { 323 .alloc_inode = ovl_alloc_inode, 324 .destroy_inode = ovl_destroy_inode, 325 .drop_inode = generic_delete_inode, 326 .put_super = ovl_put_super, 327 .sync_fs = ovl_sync_fs, 328 .statfs = ovl_statfs, 329 .show_options = ovl_show_options, 330 .remount_fs = ovl_remount, 331}; 332 333enum { 334 OPT_LOWERDIR, 335 OPT_UPPERDIR, 336 OPT_WORKDIR, 337 OPT_DEFAULT_PERMISSIONS, 338 OPT_REDIRECT_DIR_ON, 339 OPT_REDIRECT_DIR_OFF, 340 OPT_INDEX_ON, 341 OPT_INDEX_OFF, 342 OPT_ERR, 343}; 344 345static const match_table_t ovl_tokens = { 346 {OPT_LOWERDIR, "lowerdir=%s"}, 347 {OPT_UPPERDIR, "upperdir=%s"}, 348 {OPT_WORKDIR, "workdir=%s"}, 349 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 350 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 351 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, 352 {OPT_INDEX_ON, "index=on"}, 353 {OPT_INDEX_OFF, "index=off"}, 354 {OPT_ERR, NULL} 355}; 356 357static char *ovl_next_opt(char **s) 358{ 359 char *sbegin = *s; 360 char *p; 361 362 if (sbegin == NULL) 363 return NULL; 364 365 for (p = sbegin; *p; p++) { 366 if (*p == '\\') { 367 p++; 368 if (!*p) 369 break; 370 } else if (*p == ',') { 371 *p = '\0'; 372 *s = p + 1; 373 return sbegin; 374 } 375 } 376 *s = NULL; 377 return sbegin; 378} 379 380static int ovl_parse_opt(char *opt, struct ovl_config *config) 381{ 382 char *p; 383 384 while ((p = ovl_next_opt(&opt)) != NULL) { 385 int token; 386 substring_t args[MAX_OPT_ARGS]; 387 388 if (!*p) 389 continue; 390 391 token = match_token(p, ovl_tokens, args); 392 switch (token) { 393 case OPT_UPPERDIR: 394 kfree(config->upperdir); 395 config->upperdir = match_strdup(&args[0]); 396 if (!config->upperdir) 397 return -ENOMEM; 398 break; 399 400 case OPT_LOWERDIR: 401 kfree(config->lowerdir); 402 config->lowerdir = match_strdup(&args[0]); 403 if (!config->lowerdir) 404 return -ENOMEM; 405 break; 406 407 case OPT_WORKDIR: 408 kfree(config->workdir); 409 config->workdir = match_strdup(&args[0]); 410 if (!config->workdir) 411 return -ENOMEM; 412 break; 413 414 case OPT_DEFAULT_PERMISSIONS: 415 config->default_permissions = true; 416 break; 417 418 case OPT_REDIRECT_DIR_ON: 419 config->redirect_dir = true; 420 break; 421 422 case OPT_REDIRECT_DIR_OFF: 423 config->redirect_dir = false; 424 break; 425 426 case OPT_INDEX_ON: 427 config->index = true; 428 break; 429 430 case OPT_INDEX_OFF: 431 config->index = false; 432 break; 433 434 default: 435 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 436 return -EINVAL; 437 } 438 } 439 440 /* Workdir is useless in non-upper mount */ 441 if (!config->upperdir && config->workdir) { 442 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 443 config->workdir); 444 kfree(config->workdir); 445 config->workdir = NULL; 446 } 447 448 return 0; 449} 450 451#define OVL_WORKDIR_NAME "work" 452#define OVL_INDEXDIR_NAME "index" 453 454static struct dentry *ovl_workdir_create(struct super_block *sb, 455 struct ovl_fs *ufs, 456 struct dentry *dentry, 457 const char *name, bool persist) 458{ 459 struct inode *dir = dentry->d_inode; 460 struct vfsmount *mnt = ufs->upper_mnt; 461 struct dentry *work; 462 int err; 463 bool retried = false; 464 bool locked = false; 465 466 err = mnt_want_write(mnt); 467 if (err) 468 goto out_err; 469 470 inode_lock_nested(dir, I_MUTEX_PARENT); 471 locked = true; 472 473retry: 474 work = lookup_one_len(name, dentry, strlen(name)); 475 476 if (!IS_ERR(work)) { 477 struct iattr attr = { 478 .ia_valid = ATTR_MODE, 479 .ia_mode = S_IFDIR | 0, 480 }; 481 482 if (work->d_inode) { 483 err = -EEXIST; 484 if (retried) 485 goto out_dput; 486 487 if (persist) 488 goto out_unlock; 489 490 retried = true; 491 ovl_workdir_cleanup(dir, mnt, work, 0); 492 dput(work); 493 goto retry; 494 } 495 496 err = ovl_create_real(dir, work, 497 &(struct cattr){.mode = S_IFDIR | 0}, 498 NULL, true); 499 if (err) 500 goto out_dput; 501 502 /* 503 * Try to remove POSIX ACL xattrs from workdir. We are good if: 504 * 505 * a) success (there was a POSIX ACL xattr and was removed) 506 * b) -ENODATA (there was no POSIX ACL xattr) 507 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 508 * 509 * There are various other error values that could effectively 510 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 511 * if the xattr name is too long), but the set of filesystems 512 * allowed as upper are limited to "normal" ones, where checking 513 * for the above two errors is sufficient. 514 */ 515 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 516 if (err && err != -ENODATA && err != -EOPNOTSUPP) 517 goto out_dput; 518 519 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 520 if (err && err != -ENODATA && err != -EOPNOTSUPP) 521 goto out_dput; 522 523 /* Clear any inherited mode bits */ 524 inode_lock(work->d_inode); 525 err = notify_change(work, &attr, NULL); 526 inode_unlock(work->d_inode); 527 if (err) 528 goto out_dput; 529 } else { 530 err = PTR_ERR(work); 531 goto out_err; 532 } 533out_unlock: 534 mnt_drop_write(mnt); 535 if (locked) 536 inode_unlock(dir); 537 538 return work; 539 540out_dput: 541 dput(work); 542out_err: 543 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 544 ufs->config.workdir, name, -err); 545 sb->s_flags |= MS_RDONLY; 546 work = NULL; 547 goto out_unlock; 548} 549 550static void ovl_unescape(char *s) 551{ 552 char *d = s; 553 554 for (;; s++, d++) { 555 if (*s == '\\') 556 s++; 557 *d = *s; 558 if (!*s) 559 break; 560 } 561} 562 563static int ovl_mount_dir_noesc(const char *name, struct path *path) 564{ 565 int err = -EINVAL; 566 567 if (!*name) { 568 pr_err("overlayfs: empty lowerdir\n"); 569 goto out; 570 } 571 err = kern_path(name, LOOKUP_FOLLOW, path); 572 if (err) { 573 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 574 goto out; 575 } 576 err = -EINVAL; 577 if (ovl_dentry_weird(path->dentry)) { 578 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 579 goto out_put; 580 } 581 if (!d_is_dir(path->dentry)) { 582 pr_err("overlayfs: '%s' not a directory\n", name); 583 goto out_put; 584 } 585 return 0; 586 587out_put: 588 path_put(path); 589out: 590 return err; 591} 592 593static int ovl_mount_dir(const char *name, struct path *path) 594{ 595 int err = -ENOMEM; 596 char *tmp = kstrdup(name, GFP_KERNEL); 597 598 if (tmp) { 599 ovl_unescape(tmp); 600 err = ovl_mount_dir_noesc(tmp, path); 601 602 if (!err) 603 if (ovl_dentry_remote(path->dentry)) { 604 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 605 tmp); 606 path_put(path); 607 err = -EINVAL; 608 } 609 kfree(tmp); 610 } 611 return err; 612} 613 614static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 615 const char *name) 616{ 617 struct kstatfs statfs; 618 int err = vfs_statfs(path, &statfs); 619 620 if (err) 621 pr_err("overlayfs: statfs failed on '%s'\n", name); 622 else 623 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 624 625 return err; 626} 627 628static int ovl_lower_dir(const char *name, struct path *path, 629 struct ovl_fs *ofs, int *stack_depth, bool *remote) 630{ 631 int err; 632 633 err = ovl_mount_dir_noesc(name, path); 634 if (err) 635 goto out; 636 637 err = ovl_check_namelen(path, ofs, name); 638 if (err) 639 goto out_put; 640 641 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 642 643 if (ovl_dentry_remote(path->dentry)) 644 *remote = true; 645 646 /* 647 * The inodes index feature needs to encode and decode file 648 * handles, so it requires that all layers support them. 649 */ 650 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 651 ofs->config.index = false; 652 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 653 } 654 655 return 0; 656 657out_put: 658 path_put(path); 659out: 660 return err; 661} 662 663/* Workdir should not be subdir of upperdir and vice versa */ 664static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 665{ 666 bool ok = false; 667 668 if (workdir != upperdir) { 669 ok = (lock_rename(workdir, upperdir) == NULL); 670 unlock_rename(workdir, upperdir); 671 } 672 return ok; 673} 674 675static unsigned int ovl_split_lowerdirs(char *str) 676{ 677 unsigned int ctr = 1; 678 char *s, *d; 679 680 for (s = d = str;; s++, d++) { 681 if (*s == '\\') { 682 s++; 683 } else if (*s == ':') { 684 *d = '\0'; 685 ctr++; 686 continue; 687 } 688 *d = *s; 689 if (!*s) 690 break; 691 } 692 return ctr; 693} 694 695static int __maybe_unused 696ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 697 struct dentry *dentry, struct inode *inode, 698 const char *name, void *buffer, size_t size) 699{ 700 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 701} 702 703static int __maybe_unused 704ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 705 struct dentry *dentry, struct inode *inode, 706 const char *name, const void *value, 707 size_t size, int flags) 708{ 709 struct dentry *workdir = ovl_workdir(dentry); 710 struct inode *realinode = ovl_inode_real(inode); 711 struct posix_acl *acl = NULL; 712 int err; 713 714 /* Check that everything is OK before copy-up */ 715 if (value) { 716 acl = posix_acl_from_xattr(&init_user_ns, value, size); 717 if (IS_ERR(acl)) 718 return PTR_ERR(acl); 719 } 720 err = -EOPNOTSUPP; 721 if (!IS_POSIXACL(d_inode(workdir))) 722 goto out_acl_release; 723 if (!realinode->i_op->set_acl) 724 goto out_acl_release; 725 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 726 err = acl ? -EACCES : 0; 727 goto out_acl_release; 728 } 729 err = -EPERM; 730 if (!inode_owner_or_capable(inode)) 731 goto out_acl_release; 732 733 posix_acl_release(acl); 734 735 /* 736 * Check if sgid bit needs to be cleared (actual setacl operation will 737 * be done with mounter's capabilities and so that won't do it for us). 738 */ 739 if (unlikely(inode->i_mode & S_ISGID) && 740 handler->flags == ACL_TYPE_ACCESS && 741 !in_group_p(inode->i_gid) && 742 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 743 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 744 745 err = ovl_setattr(dentry, &iattr); 746 if (err) 747 return err; 748 } 749 750 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 751 if (!err) 752 ovl_copyattr(ovl_inode_real(inode), inode); 753 754 return err; 755 756out_acl_release: 757 posix_acl_release(acl); 758 return err; 759} 760 761static int ovl_own_xattr_get(const struct xattr_handler *handler, 762 struct dentry *dentry, struct inode *inode, 763 const char *name, void *buffer, size_t size) 764{ 765 return -EOPNOTSUPP; 766} 767 768static int ovl_own_xattr_set(const struct xattr_handler *handler, 769 struct dentry *dentry, struct inode *inode, 770 const char *name, const void *value, 771 size_t size, int flags) 772{ 773 return -EOPNOTSUPP; 774} 775 776static int ovl_other_xattr_get(const struct xattr_handler *handler, 777 struct dentry *dentry, struct inode *inode, 778 const char *name, void *buffer, size_t size) 779{ 780 return ovl_xattr_get(dentry, inode, name, buffer, size); 781} 782 783static int ovl_other_xattr_set(const struct xattr_handler *handler, 784 struct dentry *dentry, struct inode *inode, 785 const char *name, const void *value, 786 size_t size, int flags) 787{ 788 return ovl_xattr_set(dentry, inode, name, value, size, flags); 789} 790 791static const struct xattr_handler __maybe_unused 792ovl_posix_acl_access_xattr_handler = { 793 .name = XATTR_NAME_POSIX_ACL_ACCESS, 794 .flags = ACL_TYPE_ACCESS, 795 .get = ovl_posix_acl_xattr_get, 796 .set = ovl_posix_acl_xattr_set, 797}; 798 799static const struct xattr_handler __maybe_unused 800ovl_posix_acl_default_xattr_handler = { 801 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 802 .flags = ACL_TYPE_DEFAULT, 803 .get = ovl_posix_acl_xattr_get, 804 .set = ovl_posix_acl_xattr_set, 805}; 806 807static const struct xattr_handler ovl_own_xattr_handler = { 808 .prefix = OVL_XATTR_PREFIX, 809 .get = ovl_own_xattr_get, 810 .set = ovl_own_xattr_set, 811}; 812 813static const struct xattr_handler ovl_other_xattr_handler = { 814 .prefix = "", /* catch all */ 815 .get = ovl_other_xattr_get, 816 .set = ovl_other_xattr_set, 817}; 818 819static const struct xattr_handler *ovl_xattr_handlers[] = { 820#ifdef CONFIG_FS_POSIX_ACL 821 &ovl_posix_acl_access_xattr_handler, 822 &ovl_posix_acl_default_xattr_handler, 823#endif 824 &ovl_own_xattr_handler, 825 &ovl_other_xattr_handler, 826 NULL 827}; 828 829static int ovl_fill_super(struct super_block *sb, void *data, int silent) 830{ 831 struct path upperpath = { }; 832 struct path workpath = { }; 833 struct dentry *root_dentry; 834 struct ovl_entry *oe; 835 struct ovl_fs *ufs; 836 struct path *stack = NULL; 837 char *lowertmp; 838 char *lower; 839 unsigned int numlower; 840 unsigned int stacklen = 0; 841 unsigned int i; 842 bool remote = false; 843 struct cred *cred; 844 int err; 845 846 err = -ENOMEM; 847 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 848 if (!ufs) 849 goto out; 850 851 ufs->config.redirect_dir = ovl_redirect_dir_def; 852 ufs->config.index = ovl_index_def; 853 err = ovl_parse_opt((char *) data, &ufs->config); 854 if (err) 855 goto out_free_config; 856 857 err = -EINVAL; 858 if (!ufs->config.lowerdir) { 859 if (!silent) 860 pr_err("overlayfs: missing 'lowerdir'\n"); 861 goto out_free_config; 862 } 863 864 sb->s_stack_depth = 0; 865 sb->s_maxbytes = MAX_LFS_FILESIZE; 866 if (ufs->config.upperdir) { 867 if (!ufs->config.workdir) { 868 pr_err("overlayfs: missing 'workdir'\n"); 869 goto out_free_config; 870 } 871 872 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 873 if (err) 874 goto out_free_config; 875 876 /* Upper fs should not be r/o */ 877 if (sb_rdonly(upperpath.mnt->mnt_sb)) { 878 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 879 err = -EINVAL; 880 goto out_put_upperpath; 881 } 882 883 err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); 884 if (err) 885 goto out_put_upperpath; 886 887 err = -EBUSY; 888 if (ovl_inuse_trylock(upperpath.dentry)) { 889 ufs->upperdir_locked = true; 890 } else if (ufs->config.index) { 891 pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); 892 goto out_put_upperpath; 893 } else { 894 pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 895 } 896 897 err = ovl_mount_dir(ufs->config.workdir, &workpath); 898 if (err) 899 goto out_unlock_upperdentry; 900 901 err = -EINVAL; 902 if (upperpath.mnt != workpath.mnt) { 903 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 904 goto out_put_workpath; 905 } 906 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 907 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 908 goto out_put_workpath; 909 } 910 911 err = -EBUSY; 912 if (ovl_inuse_trylock(workpath.dentry)) { 913 ufs->workdir_locked = true; 914 } else if (ufs->config.index) { 915 pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); 916 goto out_put_workpath; 917 } else { 918 pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 919 } 920 921 ufs->workbasedir = workpath.dentry; 922 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 923 } 924 err = -ENOMEM; 925 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 926 if (!lowertmp) 927 goto out_unlock_workdentry; 928 929 err = -EINVAL; 930 stacklen = ovl_split_lowerdirs(lowertmp); 931 if (stacklen > OVL_MAX_STACK) { 932 pr_err("overlayfs: too many lower directories, limit is %d\n", 933 OVL_MAX_STACK); 934 goto out_free_lowertmp; 935 } else if (!ufs->config.upperdir && stacklen == 1) { 936 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 937 goto out_free_lowertmp; 938 } 939 940 err = -ENOMEM; 941 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 942 if (!stack) 943 goto out_free_lowertmp; 944 945 err = -EINVAL; 946 lower = lowertmp; 947 for (numlower = 0; numlower < stacklen; numlower++) { 948 err = ovl_lower_dir(lower, &stack[numlower], ufs, 949 &sb->s_stack_depth, &remote); 950 if (err) 951 goto out_put_lowerpath; 952 953 lower = strchr(lower, '\0') + 1; 954 } 955 956 err = -EINVAL; 957 sb->s_stack_depth++; 958 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 959 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 960 goto out_put_lowerpath; 961 } 962 963 if (ufs->config.upperdir) { 964 ufs->upper_mnt = clone_private_mount(&upperpath); 965 err = PTR_ERR(ufs->upper_mnt); 966 if (IS_ERR(ufs->upper_mnt)) { 967 pr_err("overlayfs: failed to clone upperpath\n"); 968 goto out_put_lowerpath; 969 } 970 971 /* Don't inherit atime flags */ 972 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 973 974 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 975 976 ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, 977 OVL_WORKDIR_NAME, false); 978 /* 979 * Upper should support d_type, else whiteouts are visible. 980 * Given workdir and upper are on same fs, we can do 981 * iterate_dir() on workdir. This check requires successful 982 * creation of workdir in previous step. 983 */ 984 if (ufs->workdir) { 985 struct dentry *temp; 986 987 err = ovl_check_d_type_supported(&workpath); 988 if (err < 0) 989 goto out_put_workdir; 990 991 /* 992 * We allowed this configuration and don't want to 993 * break users over kernel upgrade. So warn instead 994 * of erroring out. 995 */ 996 if (!err) 997 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 998 999 /* Check if upper/work fs supports O_TMPFILE */ 1000 temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); 1001 ufs->tmpfile = !IS_ERR(temp); 1002 if (ufs->tmpfile) 1003 dput(temp); 1004 else 1005 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 1006 1007 /* 1008 * Check if upper/work fs supports trusted.overlay.* 1009 * xattr 1010 */ 1011 err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, 1012 "0", 1, 0); 1013 if (err) { 1014 ufs->noxattr = true; 1015 pr_warn("overlayfs: upper fs does not support xattr.\n"); 1016 } else { 1017 vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); 1018 } 1019 1020 /* Check if upper/work fs supports file handles */ 1021 if (ufs->config.index && 1022 !ovl_can_decode_fh(ufs->workdir->d_sb)) { 1023 ufs->config.index = false; 1024 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1025 } 1026 } 1027 } 1028 1029 err = -ENOMEM; 1030 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1031 if (ufs->lower_mnt == NULL) 1032 goto out_put_workdir; 1033 for (i = 0; i < numlower; i++) { 1034 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1035 1036 err = PTR_ERR(mnt); 1037 if (IS_ERR(mnt)) { 1038 pr_err("overlayfs: failed to clone lowerpath\n"); 1039 goto out_put_lower_mnt; 1040 } 1041 /* 1042 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1043 * will fail instead of modifying lower fs. 1044 */ 1045 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1046 1047 ufs->lower_mnt[ufs->numlower] = mnt; 1048 ufs->numlower++; 1049 1050 /* Check if all lower layers are on same sb */ 1051 if (i == 0) 1052 ufs->same_sb = mnt->mnt_sb; 1053 else if (ufs->same_sb != mnt->mnt_sb) 1054 ufs->same_sb = NULL; 1055 } 1056 1057 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1058 if (!ufs->upper_mnt) 1059 sb->s_flags |= MS_RDONLY; 1060 else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) 1061 ufs->same_sb = NULL; 1062 1063 if (!(ovl_force_readonly(ufs)) && ufs->config.index) { 1064 /* Verify lower root is upper root origin */ 1065 err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], 1066 stack[0].dentry, false, true); 1067 if (err) { 1068 pr_err("overlayfs: failed to verify upper root origin\n"); 1069 goto out_put_lower_mnt; 1070 } 1071 1072 ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, 1073 OVL_INDEXDIR_NAME, true); 1074 if (ufs->indexdir) { 1075 /* Verify upper root is index dir origin */ 1076 err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, 1077 upperpath.dentry, true, true); 1078 if (err) 1079 pr_err("overlayfs: failed to verify index dir origin\n"); 1080 1081 /* Cleanup bad/stale/orphan index entries */ 1082 if (!err) 1083 err = ovl_indexdir_cleanup(ufs->indexdir, 1084 ufs->upper_mnt, 1085 stack, numlower); 1086 } 1087 if (err || !ufs->indexdir) 1088 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1089 if (err) 1090 goto out_put_indexdir; 1091 } 1092 1093 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1094 if (!ufs->indexdir) 1095 ufs->config.index = false; 1096 1097 if (remote) 1098 sb->s_d_op = &ovl_reval_dentry_operations; 1099 else 1100 sb->s_d_op = &ovl_dentry_operations; 1101 1102 err = -ENOMEM; 1103 ufs->creator_cred = cred = prepare_creds(); 1104 if (!cred) 1105 goto out_put_indexdir; 1106 1107 /* Never override disk quota limits or use reserved space */ 1108 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1109 1110 err = -ENOMEM; 1111 oe = ovl_alloc_entry(numlower); 1112 if (!oe) 1113 goto out_put_cred; 1114 1115 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1116 sb->s_op = &ovl_super_operations; 1117 sb->s_xattr = ovl_xattr_handlers; 1118 sb->s_fs_info = ufs; 1119 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1120 1121 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1122 if (!root_dentry) 1123 goto out_free_oe; 1124 1125 mntput(upperpath.mnt); 1126 for (i = 0; i < numlower; i++) 1127 mntput(stack[i].mnt); 1128 mntput(workpath.mnt); 1129 kfree(lowertmp); 1130 1131 if (upperpath.dentry) { 1132 oe->has_upper = true; 1133 if (ovl_is_impuredir(upperpath.dentry)) 1134 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1135 } 1136 for (i = 0; i < numlower; i++) { 1137 oe->lowerstack[i].dentry = stack[i].dentry; 1138 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1139 } 1140 kfree(stack); 1141 1142 root_dentry->d_fsdata = oe; 1143 1144 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1145 ovl_dentry_lower(root_dentry)); 1146 1147 sb->s_root = root_dentry; 1148 1149 return 0; 1150 1151out_free_oe: 1152 kfree(oe); 1153out_put_cred: 1154 put_cred(ufs->creator_cred); 1155out_put_indexdir: 1156 dput(ufs->indexdir); 1157out_put_lower_mnt: 1158 for (i = 0; i < ufs->numlower; i++) 1159 mntput(ufs->lower_mnt[i]); 1160 kfree(ufs->lower_mnt); 1161out_put_workdir: 1162 dput(ufs->workdir); 1163 mntput(ufs->upper_mnt); 1164out_put_lowerpath: 1165 for (i = 0; i < numlower; i++) 1166 path_put(&stack[i]); 1167 kfree(stack); 1168out_free_lowertmp: 1169 kfree(lowertmp); 1170out_unlock_workdentry: 1171 if (ufs->workdir_locked) 1172 ovl_inuse_unlock(workpath.dentry); 1173out_put_workpath: 1174 path_put(&workpath); 1175out_unlock_upperdentry: 1176 if (ufs->upperdir_locked) 1177 ovl_inuse_unlock(upperpath.dentry); 1178out_put_upperpath: 1179 path_put(&upperpath); 1180out_free_config: 1181 kfree(ufs->config.lowerdir); 1182 kfree(ufs->config.upperdir); 1183 kfree(ufs->config.workdir); 1184 kfree(ufs); 1185out: 1186 return err; 1187} 1188 1189static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1190 const char *dev_name, void *raw_data) 1191{ 1192 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1193} 1194 1195static struct file_system_type ovl_fs_type = { 1196 .owner = THIS_MODULE, 1197 .name = "overlay", 1198 .mount = ovl_mount, 1199 .kill_sb = kill_anon_super, 1200}; 1201MODULE_ALIAS_FS("overlay"); 1202 1203static void ovl_inode_init_once(void *foo) 1204{ 1205 struct ovl_inode *oi = foo; 1206 1207 inode_init_once(&oi->vfs_inode); 1208} 1209 1210static int __init ovl_init(void) 1211{ 1212 int err; 1213 1214 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1215 sizeof(struct ovl_inode), 0, 1216 (SLAB_RECLAIM_ACCOUNT| 1217 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1218 ovl_inode_init_once); 1219 if (ovl_inode_cachep == NULL) 1220 return -ENOMEM; 1221 1222 err = register_filesystem(&ovl_fs_type); 1223 if (err) 1224 kmem_cache_destroy(ovl_inode_cachep); 1225 1226 return err; 1227} 1228 1229static void __exit ovl_exit(void) 1230{ 1231 unregister_filesystem(&ovl_fs_type); 1232 1233 /* 1234 * Make sure all delayed rcu free inodes are flushed before we 1235 * destroy cache. 1236 */ 1237 rcu_barrier(); 1238 kmem_cache_destroy(ovl_inode_cachep); 1239 1240} 1241 1242module_init(ovl_init); 1243module_exit(ovl_exit);