Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

Pull overlayfs updates from Miklos Szeredi:
"This fixes d_ino correctness in readdir, which brings overlayfs on par
with normal filesystems regarding inode number semantics, as long as
all layers are on the same filesystem.

There are also some bug fixes, one in particular (random ioctl's
shouldn't be able to modify lower layers) that touches some vfs code,
but of course no-op for non-overlay fs"

* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
ovl: fix false positive ESTALE on lookup
ovl: don't allow writing ioctl on lower layer
ovl: fix relatime for directories
vfs: add flags to d_real()
ovl: cleanup d_real for negative
ovl: constant d_ino for non-merge dirs
ovl: constant d_ino across copy up
ovl: fix readdir error value
ovl: check snprintf return

+496 -79
+1 -1
Documentation/filesystems/Locking
··· 22 22 struct vfsmount *(*d_automount)(struct path *path); 23 23 int (*d_manage)(const struct path *, bool); 24 24 struct dentry *(*d_real)(struct dentry *, const struct inode *, 25 - unsigned int); 25 + unsigned int, unsigned int); 26 26 27 27 locking rules: 28 28 rename_lock ->d_lock may block rcu-walk
+1 -1
Documentation/filesystems/vfs.txt
··· 988 988 struct vfsmount *(*d_automount)(struct path *); 989 989 int (*d_manage)(const struct path *, bool); 990 990 struct dentry *(*d_real)(struct dentry *, const struct inode *, 991 - unsigned int); 991 + unsigned int, unsigned int); 992 992 }; 993 993 994 994 d_revalidate: called when the VFS needs to revalidate a dentry. This
+17 -4
fs/inode.c
··· 1570 1570 static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode, 1571 1571 bool rcu) 1572 1572 { 1573 - if (!rcu) { 1574 - struct inode *realinode = d_real_inode(dentry); 1573 + struct dentry *upperdentry; 1575 1574 1576 - if (unlikely(inode != realinode) && 1577 - (!timespec_equal(&inode->i_mtime, &realinode->i_mtime) || 1575 + /* 1576 + * Nothing to do if in rcu or if non-overlayfs 1577 + */ 1578 + if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL))) 1579 + return; 1580 + 1581 + upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); 1582 + 1583 + /* 1584 + * If file is on lower then we can't update atime, so no worries about 1585 + * stale mtime/ctime. 1586 + */ 1587 + if (upperdentry) { 1588 + struct inode *realinode = d_inode(upperdentry); 1589 + 1590 + if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) || 1578 1591 !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) { 1579 1592 inode->i_mtime = realinode->i_mtime; 1580 1593 inode->i_ctime = realinode->i_ctime;
+2
fs/internal.h
··· 71 71 72 72 extern int __mnt_want_write(struct vfsmount *); 73 73 extern int __mnt_want_write_file(struct file *); 74 + extern int mnt_want_write_file_path(struct file *); 74 75 extern void __mnt_drop_write(struct vfsmount *); 75 76 extern void __mnt_drop_write_file(struct file *); 77 + extern void mnt_drop_write_file_path(struct file *); 76 78 77 79 /* 78 80 * fs_struct.c
+61 -3
fs/namespace.c
··· 431 431 } 432 432 433 433 /** 434 - * mnt_want_write_file - get write access to a file's mount 434 + * mnt_want_write_file_path - get write access to a file's mount 435 435 * @file: the file who's mount on which to take a write 436 436 * 437 437 * This is like mnt_want_write, but it takes a file and can 438 438 * do some optimisations if the file is open for write already 439 + * 440 + * Called by the vfs for cases when we have an open file at hand, but will do an 441 + * inode operation on it (important distinction for files opened on overlayfs, 442 + * since the file operations will come from the real underlying file, while 443 + * inode operations come from the overlay). 439 444 */ 440 - int mnt_want_write_file(struct file *file) 445 + int mnt_want_write_file_path(struct file *file) 441 446 { 442 447 int ret; 443 448 ··· 450 445 ret = __mnt_want_write_file(file); 451 446 if (ret) 452 447 sb_end_write(file->f_path.mnt->mnt_sb); 448 + return ret; 449 + } 450 + 451 + static inline int may_write_real(struct file *file) 452 + { 453 + struct dentry *dentry = file->f_path.dentry; 454 + struct dentry *upperdentry; 455 + 456 + /* Writable file? */ 457 + if (file->f_mode & FMODE_WRITER) 458 + return 0; 459 + 460 + /* Not overlayfs? */ 461 + if (likely(!(dentry->d_flags & DCACHE_OP_REAL))) 462 + return 0; 463 + 464 + /* File refers to upper, writable layer? */ 465 + upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); 466 + if (upperdentry && file_inode(file) == d_inode(upperdentry)) 467 + return 0; 468 + 469 + /* Lower layer: can't write to real file, sorry... */ 470 + return -EPERM; 471 + } 472 + 473 + /** 474 + * mnt_want_write_file - get write access to a file's mount 475 + * @file: the file who's mount on which to take a write 476 + * 477 + * This is like mnt_want_write, but it takes a file and can 478 + * do some optimisations if the file is open for write already 479 + * 480 + * Mostly called by filesystems from their ioctl operation before performing 481 + * modification. On overlayfs this needs to check if the file is on a read-only 482 + * lower layer and deny access in that case. 483 + */ 484 + int mnt_want_write_file(struct file *file) 485 + { 486 + int ret; 487 + 488 + ret = may_write_real(file); 489 + if (!ret) { 490 + sb_start_write(file_inode(file)->i_sb); 491 + ret = __mnt_want_write_file(file); 492 + if (ret) 493 + sb_end_write(file_inode(file)->i_sb); 494 + } 453 495 return ret; 454 496 } 455 497 EXPORT_SYMBOL_GPL(mnt_want_write_file); ··· 536 484 __mnt_drop_write(file->f_path.mnt); 537 485 } 538 486 539 - void mnt_drop_write_file(struct file *file) 487 + void mnt_drop_write_file_path(struct file *file) 540 488 { 541 489 mnt_drop_write(file->f_path.mnt); 490 + } 491 + 492 + void mnt_drop_write_file(struct file *file) 493 + { 494 + __mnt_drop_write(file->f_path.mnt); 495 + sb_end_write(file_inode(file)->i_sb); 542 496 } 543 497 EXPORT_SYMBOL(mnt_drop_write_file); 544 498
+4 -4
fs/open.c
··· 96 96 * write access on the upper inode, not on the overlay inode. For 97 97 * non-overlay filesystems d_real() is an identity function. 98 98 */ 99 - upperdentry = d_real(path->dentry, NULL, O_WRONLY); 99 + upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0); 100 100 error = PTR_ERR(upperdentry); 101 101 if (IS_ERR(upperdentry)) 102 102 goto mnt_drop_write_and_out; ··· 670 670 if (!f.file) 671 671 goto out; 672 672 673 - error = mnt_want_write_file(f.file); 673 + error = mnt_want_write_file_path(f.file); 674 674 if (error) 675 675 goto out_fput; 676 676 audit_file(f.file); 677 677 error = chown_common(&f.file->f_path, user, group); 678 - mnt_drop_write_file(f.file); 678 + mnt_drop_write_file_path(f.file); 679 679 out_fput: 680 680 fdput(f); 681 681 out: ··· 857 857 int vfs_open(const struct path *path, struct file *file, 858 858 const struct cred *cred) 859 859 { 860 - struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags); 860 + struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0); 861 861 862 862 if (IS_ERR(dentry)) 863 863 return PTR_ERR(dentry);
+6 -5
fs/overlayfs/dir.c
··· 155 155 static void ovl_instantiate(struct dentry *dentry, struct inode *inode, 156 156 struct dentry *newdentry, bool hardlink) 157 157 { 158 - ovl_dentry_version_inc(dentry->d_parent); 158 + ovl_dentry_version_inc(dentry->d_parent, false); 159 159 ovl_dentry_set_upper_alias(dentry); 160 160 if (!hardlink) { 161 161 ovl_inode_update(inode, newdentry); ··· 692 692 if (flags) 693 693 ovl_cleanup(wdir, upper); 694 694 695 - ovl_dentry_version_inc(dentry->d_parent); 695 + ovl_dentry_version_inc(dentry->d_parent, true); 696 696 out_d_drop: 697 697 d_drop(dentry); 698 698 dput(whiteout); ··· 742 742 err = vfs_rmdir(dir, upper); 743 743 else 744 744 err = vfs_unlink(dir, upper, NULL); 745 - ovl_dentry_version_inc(dentry->d_parent); 745 + ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry)); 746 746 747 747 /* 748 748 * Keeping this dentry hashed would mean having to release ··· 1089 1089 drop_nlink(d_inode(new)); 1090 1090 } 1091 1091 1092 - ovl_dentry_version_inc(old->d_parent); 1093 - ovl_dentry_version_inc(new->d_parent); 1092 + ovl_dentry_version_inc(old->d_parent, 1093 + !overwrite && ovl_type_origin(new)); 1094 + ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old)); 1094 1095 1095 1096 out_dput: 1096 1097 dput(newdentry);
+10 -4
fs/overlayfs/inode.c
··· 498 498 len = snprintf(buf, sizeof(buf), format, 499 499 (int) (inode->i_nlink - realinode->i_nlink)); 500 500 501 + if (WARN_ON(len >= sizeof(buf))) 502 + return -EIO; 503 + 501 504 return ovl_do_setxattr(ovl_dentry_upper(dentry), 502 505 OVL_XATTR_NLINK, buf, len, 0); 503 506 } ··· 579 576 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 580 577 struct dentry *upperdentry) 581 578 { 582 - struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL; 583 - 584 - /* Lower (origin) inode must match, even if NULL */ 585 - if (ovl_inode_lower(inode) != lowerinode) 579 + /* 580 + * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 581 + * This happens when finding a copied up overlay inode for a renamed 582 + * or hardlinked overlay dentry and lower dentry cannot be followed 583 + * by origin because lower fs does not support file handles. 584 + */ 585 + if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 586 586 return false; 587 587 588 588 /*
+5 -3
fs/overlayfs/overlayfs.h
··· 204 204 struct inode *ovl_inode_upper(struct inode *inode); 205 205 struct inode *ovl_inode_lower(struct inode *inode); 206 206 struct inode *ovl_inode_real(struct inode *inode); 207 - struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); 208 - void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); 207 + struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); 208 + void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); 209 209 bool ovl_dentry_is_opaque(struct dentry *dentry); 210 210 bool ovl_dentry_is_whiteout(struct dentry *dentry); 211 211 void ovl_dentry_set_opaque(struct dentry *dentry); ··· 217 217 void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, 218 218 struct dentry *lowerdentry); 219 219 void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); 220 - void ovl_dentry_version_inc(struct dentry *dentry); 220 + void ovl_dentry_version_inc(struct dentry *dentry, bool impurity); 221 221 u64 ovl_dentry_version_get(struct dentry *dentry); 222 222 bool ovl_is_whiteout(struct dentry *dentry); 223 223 struct file *ovl_path_open(struct path *path, int flags); ··· 229 229 int xerr); 230 230 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); 231 231 void ovl_set_flag(unsigned long flag, struct inode *inode); 232 + void ovl_clear_flag(unsigned long flag, struct inode *inode); 232 233 bool ovl_test_flag(unsigned long flag, struct inode *inode); 233 234 bool ovl_inuse_trylock(struct dentry *dentry); 234 235 void ovl_inuse_unlock(struct dentry *dentry); ··· 257 256 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); 258 257 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); 259 258 void ovl_cache_free(struct list_head *list); 259 + void ovl_dir_cache_free(struct inode *inode); 260 260 int ovl_check_d_type_supported(struct path *realpath); 261 261 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, 262 262 struct dentry *dentry, int level);
+350 -33
fs/overlayfs/readdir.c
··· 15 15 #include <linux/rbtree.h> 16 16 #include <linux/security.h> 17 17 #include <linux/cred.h> 18 + #include <linux/ratelimit.h> 18 19 #include "overlayfs.h" 19 20 20 21 struct ovl_cache_entry { 21 22 unsigned int len; 22 23 unsigned int type; 24 + u64 real_ino; 23 25 u64 ino; 24 26 struct list_head l_node; 25 27 struct rb_node node; ··· 34 32 long refcount; 35 33 u64 version; 36 34 struct list_head entries; 35 + struct rb_root root; 37 36 }; 38 37 39 38 struct ovl_readdir_data { 40 39 struct dir_context ctx; 41 40 struct dentry *dentry; 42 41 bool is_lowest; 43 - struct rb_root root; 42 + struct rb_root *root; 44 43 struct list_head *list; 45 44 struct list_head middle; 46 45 struct ovl_cache_entry *first_maybe_whiteout; 47 46 int count; 48 47 int err; 48 + bool is_upper; 49 49 bool d_type_supported; 50 50 }; 51 51 ··· 62 58 63 59 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 64 60 { 65 - return container_of(n, struct ovl_cache_entry, node); 61 + return rb_entry(n, struct ovl_cache_entry, node); 62 + } 63 + 64 + static bool ovl_cache_entry_find_link(const char *name, int len, 65 + struct rb_node ***link, 66 + struct rb_node **parent) 67 + { 68 + bool found = false; 69 + struct rb_node **newp = *link; 70 + 71 + while (!found && *newp) { 72 + int cmp; 73 + struct ovl_cache_entry *tmp; 74 + 75 + *parent = *newp; 76 + tmp = ovl_cache_entry_from_node(*newp); 77 + cmp = strncmp(name, tmp->name, len); 78 + if (cmp > 0) 79 + newp = &tmp->node.rb_right; 80 + else if (cmp < 0 || len < tmp->len) 81 + newp = &tmp->node.rb_left; 82 + else 83 + found = true; 84 + } 85 + *link = newp; 86 + 87 + return found; 66 88 } 67 89 68 90 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, ··· 112 82 return NULL; 113 83 } 114 84 85 + static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, 86 + struct ovl_cache_entry *p) 87 + { 88 + /* Don't care if not doing ovl_iter() */ 89 + if (!rdd->dentry) 90 + return false; 91 + 92 + /* Always recalc d_ino for parent */ 93 + if (strcmp(p->name, "..") == 0) 94 + return true; 95 + 96 + /* If this is lower, then native d_ino will do */ 97 + if (!rdd->is_upper) 98 + return false; 99 + 100 + /* 101 + * Recalc d_ino for '.' and for all entries if dir is impure (contains 102 + * copied up entries) 103 + */ 104 + if ((p->name[0] == '.' && p->len == 1) || 105 + ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) 106 + return true; 107 + 108 + return false; 109 + } 110 + 115 111 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 116 112 const char *name, int len, 117 113 u64 ino, unsigned int d_type) ··· 153 97 p->name[len] = '\0'; 154 98 p->len = len; 155 99 p->type = d_type; 100 + p->real_ino = ino; 156 101 p->ino = ino; 102 + /* Defer setting d_ino for upper entry to ovl_iterate() */ 103 + if (ovl_calc_d_ino(rdd, p)) 104 + p->ino = 0; 157 105 p->is_whiteout = false; 158 106 159 107 if (d_type == DT_CHR) { ··· 171 111 const char *name, int len, u64 ino, 172 112 unsigned int d_type) 173 113 { 174 - struct rb_node **newp = &rdd->root.rb_node; 114 + struct rb_node **newp = &rdd->root->rb_node; 175 115 struct rb_node *parent = NULL; 176 116 struct ovl_cache_entry *p; 177 117 178 - while (*newp) { 179 - int cmp; 180 - struct ovl_cache_entry *tmp; 181 - 182 - parent = *newp; 183 - tmp = ovl_cache_entry_from_node(*newp); 184 - cmp = strncmp(name, tmp->name, len); 185 - if (cmp > 0) 186 - newp = &tmp->node.rb_right; 187 - else if (cmp < 0 || len < tmp->len) 188 - newp = &tmp->node.rb_left; 189 - else 190 - return 0; 191 - } 118 + if (ovl_cache_entry_find_link(name, len, &newp, &parent)) 119 + return 0; 192 120 193 121 p = ovl_cache_entry_new(rdd, name, len, ino, d_type); 194 - if (p == NULL) 122 + if (p == NULL) { 123 + rdd->err = -ENOMEM; 195 124 return -ENOMEM; 125 + } 196 126 197 127 list_add_tail(&p->l_node, rdd->list); 198 128 rb_link_node(&p->node, parent, newp); 199 - rb_insert_color(&p->node, &rdd->root); 129 + rb_insert_color(&p->node, rdd->root); 200 130 201 131 return 0; 202 132 } ··· 197 147 { 198 148 struct ovl_cache_entry *p; 199 149 200 - p = ovl_cache_entry_find(&rdd->root, name, namelen); 150 + p = ovl_cache_entry_find(rdd->root, name, namelen); 201 151 if (p) { 202 152 list_move_tail(&p->l_node, &rdd->middle); 203 153 } else { ··· 222 172 INIT_LIST_HEAD(list); 223 173 } 224 174 175 + void ovl_dir_cache_free(struct inode *inode) 176 + { 177 + struct ovl_dir_cache *cache = ovl_dir_cache(inode); 178 + 179 + if (cache) { 180 + ovl_cache_free(&cache->entries); 181 + kfree(cache); 182 + } 183 + } 184 + 225 185 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) 226 186 { 227 187 struct ovl_dir_cache *cache = od->cache; ··· 239 179 WARN_ON(cache->refcount <= 0); 240 180 cache->refcount--; 241 181 if (!cache->refcount) { 242 - if (ovl_dir_cache(dentry) == cache) 243 - ovl_set_dir_cache(dentry, NULL); 182 + if (ovl_dir_cache(d_inode(dentry)) == cache) 183 + ovl_set_dir_cache(d_inode(dentry), NULL); 244 184 245 185 ovl_cache_free(&cache->entries); 246 186 kfree(cache); ··· 333 273 od->is_real = false; 334 274 } 335 275 336 - static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) 276 + static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, 277 + struct rb_root *root) 337 278 { 338 279 int err; 339 280 struct path realpath; ··· 342 281 .ctx.actor = ovl_fill_merge, 343 282 .dentry = dentry, 344 283 .list = list, 345 - .root = RB_ROOT, 284 + .root = root, 346 285 .is_lowest = false, 347 286 }; 348 287 int idx, next; 349 288 350 289 for (idx = 0; idx != -1; idx = next) { 351 290 next = ovl_path_next(idx, dentry, &realpath); 291 + rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 352 292 353 293 if (next != -1) { 354 294 err = ovl_dir_read(&realpath, &rdd); ··· 388 326 int res; 389 327 struct ovl_dir_cache *cache; 390 328 391 - cache = ovl_dir_cache(dentry); 329 + cache = ovl_dir_cache(d_inode(dentry)); 392 330 if (cache && ovl_dentry_version_get(dentry) == cache->version) { 331 + WARN_ON(!cache->refcount); 393 332 cache->refcount++; 394 333 return cache; 395 334 } 396 - ovl_set_dir_cache(dentry, NULL); 335 + ovl_set_dir_cache(d_inode(dentry), NULL); 397 336 398 337 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 399 338 if (!cache) ··· 402 339 403 340 cache->refcount = 1; 404 341 INIT_LIST_HEAD(&cache->entries); 342 + cache->root = RB_ROOT; 405 343 406 - res = ovl_dir_read_merged(dentry, &cache->entries); 344 + res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root); 407 345 if (res) { 408 346 ovl_cache_free(&cache->entries); 409 347 kfree(cache); ··· 412 348 } 413 349 414 350 cache->version = ovl_dentry_version_get(dentry); 415 - ovl_set_dir_cache(dentry, cache); 351 + ovl_set_dir_cache(d_inode(dentry), cache); 416 352 417 353 return cache; 418 354 } 355 + 356 + /* 357 + * Set d_ino for upper entries. Non-upper entries should always report 358 + * the uppermost real inode ino and should not call this function. 359 + * 360 + * When not all layer are on same fs, report real ino also for upper. 361 + * 362 + * When all layers are on the same fs, and upper has a reference to 363 + * copy up origin, call vfs_getattr() on the overlay entry to make 364 + * sure that d_ino will be consistent with st_ino from stat(2). 365 + */ 366 + static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) 367 + 368 + { 369 + struct dentry *dir = path->dentry; 370 + struct dentry *this = NULL; 371 + enum ovl_path_type type; 372 + u64 ino = p->real_ino; 373 + int err = 0; 374 + 375 + if (!ovl_same_sb(dir->d_sb)) 376 + goto out; 377 + 378 + if (p->name[0] == '.') { 379 + if (p->len == 1) { 380 + this = dget(dir); 381 + goto get; 382 + } 383 + if (p->len == 2 && p->name[1] == '.') { 384 + /* we shall not be moved */ 385 + this = dget(dir->d_parent); 386 + goto get; 387 + } 388 + } 389 + this = lookup_one_len(p->name, dir, p->len); 390 + if (IS_ERR_OR_NULL(this) || !this->d_inode) { 391 + if (IS_ERR(this)) { 392 + err = PTR_ERR(this); 393 + this = NULL; 394 + goto fail; 395 + } 396 + goto out; 397 + } 398 + 399 + get: 400 + type = ovl_path_type(this); 401 + if (OVL_TYPE_ORIGIN(type)) { 402 + struct kstat stat; 403 + struct path statpath = *path; 404 + 405 + statpath.dentry = this; 406 + err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 407 + if (err) 408 + goto fail; 409 + 410 + WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 411 + ino = stat.ino; 412 + } 413 + 414 + out: 415 + p->ino = ino; 416 + dput(this); 417 + return err; 418 + 419 + fail: 420 + pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n", 421 + p->name, err); 422 + goto out; 423 + } 424 + 425 + static int ovl_fill_plain(struct dir_context *ctx, const char *name, 426 + int namelen, loff_t offset, u64 ino, 427 + unsigned int d_type) 428 + { 429 + struct ovl_cache_entry *p; 430 + struct ovl_readdir_data *rdd = 431 + container_of(ctx, struct ovl_readdir_data, ctx); 432 + 433 + rdd->count++; 434 + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); 435 + if (p == NULL) { 436 + rdd->err = -ENOMEM; 437 + return -ENOMEM; 438 + } 439 + list_add_tail(&p->l_node, rdd->list); 440 + 441 + return 0; 442 + } 443 + 444 + static int ovl_dir_read_impure(struct path *path, struct list_head *list, 445 + struct rb_root *root) 446 + { 447 + int err; 448 + struct path realpath; 449 + struct ovl_cache_entry *p, *n; 450 + struct ovl_readdir_data rdd = { 451 + .ctx.actor = ovl_fill_plain, 452 + .list = list, 453 + .root = root, 454 + }; 455 + 456 + INIT_LIST_HEAD(list); 457 + *root = RB_ROOT; 458 + ovl_path_upper(path->dentry, &realpath); 459 + 460 + err = ovl_dir_read(&realpath, &rdd); 461 + if (err) 462 + return err; 463 + 464 + list_for_each_entry_safe(p, n, list, l_node) { 465 + if (strcmp(p->name, ".") != 0 && 466 + strcmp(p->name, "..") != 0) { 467 + err = ovl_cache_update_ino(path, p); 468 + if (err) 469 + return err; 470 + } 471 + if (p->ino == p->real_ino) { 472 + list_del(&p->l_node); 473 + kfree(p); 474 + } else { 475 + struct rb_node **newp = &root->rb_node; 476 + struct rb_node *parent = NULL; 477 + 478 + if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, 479 + &newp, &parent))) 480 + return -EIO; 481 + 482 + rb_link_node(&p->node, parent, newp); 483 + rb_insert_color(&p->node, root); 484 + } 485 + } 486 + return 0; 487 + } 488 + 489 + static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) 490 + { 491 + int res; 492 + struct dentry *dentry = path->dentry; 493 + struct ovl_dir_cache *cache; 494 + 495 + cache = ovl_dir_cache(d_inode(dentry)); 496 + if (cache && ovl_dentry_version_get(dentry) == cache->version) 497 + return cache; 498 + 499 + /* Impure cache is not refcounted, free it here */ 500 + ovl_dir_cache_free(d_inode(dentry)); 501 + ovl_set_dir_cache(d_inode(dentry), NULL); 502 + 503 + cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 504 + if (!cache) 505 + return ERR_PTR(-ENOMEM); 506 + 507 + res = ovl_dir_read_impure(path, &cache->entries, &cache->root); 508 + if (res) { 509 + ovl_cache_free(&cache->entries); 510 + kfree(cache); 511 + return ERR_PTR(res); 512 + } 513 + if (list_empty(&cache->entries)) { 514 + /* Good oportunity to get rid of an unnecessary "impure" flag */ 515 + ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE); 516 + ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); 517 + kfree(cache); 518 + return NULL; 519 + } 520 + 521 + cache->version = ovl_dentry_version_get(dentry); 522 + ovl_set_dir_cache(d_inode(dentry), cache); 523 + 524 + return cache; 525 + } 526 + 527 + struct ovl_readdir_translate { 528 + struct dir_context *orig_ctx; 529 + struct ovl_dir_cache *cache; 530 + struct dir_context ctx; 531 + u64 parent_ino; 532 + }; 533 + 534 + static int ovl_fill_real(struct dir_context *ctx, const char *name, 535 + int namelen, loff_t offset, u64 ino, 536 + unsigned int d_type) 537 + { 538 + struct ovl_readdir_translate *rdt = 539 + container_of(ctx, struct ovl_readdir_translate, ctx); 540 + struct dir_context *orig_ctx = rdt->orig_ctx; 541 + 542 + if (rdt->parent_ino && strcmp(name, "..") == 0) 543 + ino = rdt->parent_ino; 544 + else if (rdt->cache) { 545 + struct ovl_cache_entry *p; 546 + 547 + p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); 548 + if (p) 549 + ino = p->ino; 550 + } 551 + 552 + return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 553 + } 554 + 555 + static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 556 + { 557 + int err; 558 + struct ovl_dir_file *od = file->private_data; 559 + struct dentry *dir = file->f_path.dentry; 560 + struct ovl_readdir_translate rdt = { 561 + .ctx.actor = ovl_fill_real, 562 + .orig_ctx = ctx, 563 + }; 564 + 565 + if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { 566 + struct kstat stat; 567 + struct path statpath = file->f_path; 568 + 569 + statpath.dentry = dir->d_parent; 570 + err = vfs_getattr(&statpath, &stat, STATX_INO, 0); 571 + if (err) 572 + return err; 573 + 574 + WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); 575 + rdt.parent_ino = stat.ino; 576 + } 577 + 578 + if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) { 579 + rdt.cache = ovl_cache_get_impure(&file->f_path); 580 + if (IS_ERR(rdt.cache)) 581 + return PTR_ERR(rdt.cache); 582 + } 583 + 584 + return iterate_dir(od->realfile, &rdt.ctx); 585 + } 586 + 419 587 420 588 static int ovl_iterate(struct file *file, struct dir_context *ctx) 421 589 { 422 590 struct ovl_dir_file *od = file->private_data; 423 591 struct dentry *dentry = file->f_path.dentry; 424 592 struct ovl_cache_entry *p; 593 + int err; 425 594 426 595 if (!ctx->pos) 427 596 ovl_dir_reset(file); 428 597 429 - if (od->is_real) 598 + if (od->is_real) { 599 + /* 600 + * If parent is merge, then need to adjust d_ino for '..', if 601 + * dir is impure then need to adjust d_ino for copied up 602 + * entries. 603 + */ 604 + if (ovl_same_sb(dentry->d_sb) && 605 + (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) || 606 + OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) { 607 + return ovl_iterate_real(file, ctx); 608 + } 430 609 return iterate_dir(od->realfile, ctx); 610 + } 431 611 432 612 if (!od->cache) { 433 613 struct ovl_dir_cache *cache; ··· 686 378 687 379 while (od->cursor != &od->cache->entries) { 688 380 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 689 - if (!p->is_whiteout) 381 + if (!p->is_whiteout) { 382 + if (!p->ino) { 383 + err = ovl_cache_update_ino(&file->f_path, p); 384 + if (err) 385 + return err; 386 + } 690 387 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 691 388 break; 389 + } 692 390 od->cursor = p->l_node.next; 693 391 ctx->pos++; 694 392 } ··· 836 522 { 837 523 int err; 838 524 struct ovl_cache_entry *p; 525 + struct rb_root root = RB_ROOT; 839 526 840 - err = ovl_dir_read_merged(dentry, list); 527 + err = ovl_dir_read_merged(dentry, list, &root); 841 528 if (err) 842 529 return err; 843 530 ··· 927 612 int err; 928 613 struct inode *dir = path->dentry->d_inode; 929 614 LIST_HEAD(list); 615 + struct rb_root root = RB_ROOT; 930 616 struct ovl_cache_entry *p; 931 617 struct ovl_readdir_data rdd = { 932 618 .ctx.actor = ovl_fill_merge, 933 619 .dentry = NULL, 934 620 .list = &list, 935 - .root = RB_ROOT, 621 + .root = &root, 936 622 .is_lowest = false, 937 623 }; 938 624 ··· 991 675 struct inode *dir = dentry->d_inode; 992 676 struct path path = { .mnt = mnt, .dentry = dentry }; 993 677 LIST_HEAD(list); 678 + struct rb_root root = RB_ROOT; 994 679 struct ovl_cache_entry *p; 995 680 struct ovl_readdir_data rdd = { 996 681 .ctx.actor = ovl_fill_merge, 997 682 .dentry = NULL, 998 683 .list = &list, 999 - .root = RB_ROOT, 684 + .root = &root, 1000 685 .is_lowest = false, 1001 686 }; 1002 687
+6 -5
fs/overlayfs/super.c
··· 70 70 71 71 static struct dentry *ovl_d_real(struct dentry *dentry, 72 72 const struct inode *inode, 73 - unsigned int open_flags) 73 + unsigned int open_flags, unsigned int flags) 74 74 { 75 75 struct dentry *real; 76 76 int err; 77 + 78 + if (flags & D_REAL_UPPER) 79 + return ovl_dentry_upper(dentry); 77 80 78 81 if (!d_is_reg(dentry)) { 79 82 if (!inode || inode == d_inode(dentry)) 80 83 return dentry; 81 84 goto bug; 82 85 } 83 - 84 - if (d_is_negative(dentry)) 85 - return dentry; 86 86 87 87 if (open_flags) { 88 88 err = ovl_open_maybe_copy_up(dentry, open_flags); ··· 105 105 goto bug; 106 106 107 107 /* Handle recursion */ 108 - real = d_real(real, inode, open_flags); 108 + real = d_real(real, inode, open_flags, 0); 109 109 110 110 if (!inode || inode == d_inode(real)) 111 111 return real; ··· 198 198 199 199 dput(oi->__upperdentry); 200 200 kfree(oi->redirect); 201 + ovl_dir_cache_free(inode); 201 202 mutex_destroy(&oi->lock); 202 203 203 204 call_rcu(&inode->i_rcu, ovl_i_callback);
+18 -6
fs/overlayfs/util.c
··· 180 180 } 181 181 182 182 183 - struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) 183 + struct ovl_dir_cache *ovl_dir_cache(struct inode *inode) 184 184 { 185 - return OVL_I(d_inode(dentry))->cache; 185 + return OVL_I(inode)->cache; 186 186 } 187 187 188 - void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) 188 + void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache) 189 189 { 190 - OVL_I(d_inode(dentry))->cache = cache; 190 + OVL_I(inode)->cache = cache; 191 191 } 192 192 193 193 bool ovl_dentry_is_opaque(struct dentry *dentry) ··· 275 275 } 276 276 } 277 277 278 - void ovl_dentry_version_inc(struct dentry *dentry) 278 + void ovl_dentry_version_inc(struct dentry *dentry, bool impurity) 279 279 { 280 280 struct inode *inode = d_inode(dentry); 281 281 282 282 WARN_ON(!inode_is_locked(inode)); 283 - OVL_I(inode)->version++; 283 + /* 284 + * Version is used by readdir code to keep cache consistent. For merge 285 + * dirs all changes need to be noted. For non-merge dirs, cache only 286 + * contains impure (ones which have been copied up and have origins) 287 + * entries, so only need to note changes to impure entries. 288 + */ 289 + if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity) 290 + OVL_I(inode)->version++; 284 291 } 285 292 286 293 u64 ovl_dentry_version_get(struct dentry *dentry) ··· 387 380 void ovl_set_flag(unsigned long flag, struct inode *inode) 388 381 { 389 382 set_bit(flag, &OVL_I(inode)->flags); 383 + } 384 + 385 + void ovl_clear_flag(unsigned long flag, struct inode *inode) 386 + { 387 + clear_bit(flag, &OVL_I(inode)->flags); 390 388 } 391 389 392 390 bool ovl_test_flag(unsigned long flag, struct inode *inode)
+5 -4
fs/xattr.c
··· 23 23 #include <linux/posix_acl_xattr.h> 24 24 25 25 #include <linux/uaccess.h> 26 + #include "internal.h" 26 27 27 28 static const char * 28 29 strcmp_prefix(const char *a, const char *a_prefix) ··· 503 502 if (!f.file) 504 503 return error; 505 504 audit_file(f.file); 506 - error = mnt_want_write_file(f.file); 505 + error = mnt_want_write_file_path(f.file); 507 506 if (!error) { 508 507 error = setxattr(f.file->f_path.dentry, name, value, size, flags); 509 - mnt_drop_write_file(f.file); 508 + mnt_drop_write_file_path(f.file); 510 509 } 511 510 fdput(f); 512 511 return error; ··· 735 734 if (!f.file) 736 735 return error; 737 736 audit_file(f.file); 738 - error = mnt_want_write_file(f.file); 737 + error = mnt_want_write_file_path(f.file); 739 738 if (!error) { 740 739 error = removexattr(f.file->f_path.dentry, name); 741 - mnt_drop_write_file(f.file); 740 + mnt_drop_write_file_path(f.file); 742 741 } 743 742 fdput(f); 744 743 return error;
+9 -5
include/linux/dcache.h
··· 147 147 struct vfsmount *(*d_automount)(struct path *); 148 148 int (*d_manage)(const struct path *, bool); 149 149 struct dentry *(*d_real)(struct dentry *, const struct inode *, 150 - unsigned int); 150 + unsigned int, unsigned int); 151 151 } ____cacheline_aligned; 152 152 153 153 /* ··· 562 562 return upper; 563 563 } 564 564 565 + /* d_real() flags */ 566 + #define D_REAL_UPPER 0x2 /* return upper dentry or NULL if non-upper */ 567 + 565 568 /** 566 569 * d_real - Return the real dentry 567 570 * @dentry: the dentry to query 568 571 * @inode: inode to select the dentry from multiple layers (can be NULL) 569 - * @flags: open flags to control copy-up behavior 572 + * @open_flags: open flags to control copy-up behavior 573 + * @flags: flags to control what is returned by this function 570 574 * 571 575 * If dentry is on a union/overlay, then return the underlying, real dentry. 572 576 * Otherwise return the dentry itself. ··· 579 575 */ 580 576 static inline struct dentry *d_real(struct dentry *dentry, 581 577 const struct inode *inode, 582 - unsigned int flags) 578 + unsigned int open_flags, unsigned int flags) 583 579 { 584 580 if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) 585 - return dentry->d_op->d_real(dentry, inode, flags); 581 + return dentry->d_op->d_real(dentry, inode, open_flags, flags); 586 582 else 587 583 return dentry; 588 584 } ··· 597 593 static inline struct inode *d_real_inode(const struct dentry *dentry) 598 594 { 599 595 /* This usage of d_real() results in const dentry */ 600 - return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); 596 + return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0)); 601 597 } 602 598 603 599 struct name_snapshot {
+1 -1
include/linux/fs.h
··· 1235 1235 1236 1236 static inline struct dentry *file_dentry(const struct file *file) 1237 1237 { 1238 - return d_real(file->f_path.dentry, file_inode(file), 0); 1238 + return d_real(file->f_path.dentry, file_inode(file), 0, 0); 1239 1239 } 1240 1240 1241 1241 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)