Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ovl-fixes-6.8-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs

Pull overlayfs fix from Amir Goldstein:
"Change the on-disk format for the new "xwhiteouts" feature introduced
in v6.7

The change reduces unneeded overhead of an extra getxattr per readdir.
The only user of the "xwhiteout" feature is the external composefs
tool, which has been updated to support the new on-disk format.

This change is also designated for 6.7.y"

* tag 'ovl-fixes-6.8-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs:
ovl: mark xwhiteouts directory with overlay.opaque='x'

+110 -51
+13 -3
Documentation/filesystems/overlayfs.rst
··· 145 145 that files have been removed. This is done using whiteouts and opaque 146 146 directories (non-directories are always opaque). 147 147 148 - A whiteout is created as a character device with 0/0 device number. 148 + A whiteout is created as a character device with 0/0 device number or 149 + as a zero-size regular file with the xattr "trusted.overlay.whiteout". 150 + 149 151 When a whiteout is found in the upper level of a merged directory, any 150 152 matching name in the lower level is ignored, and the whiteout itself 151 153 is also hidden. ··· 155 153 A directory is made opaque by setting the xattr "trusted.overlay.opaque" 156 154 to "y". Where the upper filesystem contains an opaque directory, any 157 155 directory in the lower filesystem with the same name is ignored. 156 + 157 + An opaque directory should not conntain any whiteouts, because they do not 158 + serve any purpose. A merge directory containing regular files with the xattr 159 + "trusted.overlay.whiteout", should be additionally marked by setting the xattr 160 + "trusted.overlay.opaque" to "x" on the merge directory itself. 161 + This is needed to avoid the overhead of checking the "trusted.overlay.whiteout" 162 + on all entries during readdir in the common case. 158 163 159 164 readdir 160 165 ------- ··· 543 534 mount, so to support storing an effective whiteout file in an overlayfs mount an 544 535 alternative form of whiteout is supported. This form is a regular, zero-size 545 536 file with the "overlay.whiteout" xattr set, inside a directory with the 546 - "overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs, 547 - but can be used by userspace tools (like containers) that generate lower layers. 537 + "overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_). 538 + These alternative whiteouts are never created by overlayfs, but can be used by 539 + userspace tools (like containers) that generate lower layers. 548 540 These alternative whiteouts can be escaped using the standard xattr escape 549 541 mechanism in order to properly nest to any depth. 550 542
+27 -16
fs/overlayfs/namei.c
··· 18 18 19 19 struct ovl_lookup_data { 20 20 struct super_block *sb; 21 - struct vfsmount *mnt; 21 + const struct ovl_layer *layer; 22 22 struct qstr name; 23 23 bool is_dir; 24 24 bool opaque; 25 + bool xwhiteouts; 25 26 bool stop; 26 27 bool last; 27 28 char *redirect; ··· 202 201 return real; 203 202 } 204 203 205 - static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path) 206 - { 207 - return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE); 208 - } 209 - 210 204 static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, 211 205 const char *name, 212 206 struct dentry *base, int len, 213 207 bool drop_negative) 214 208 { 215 - struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len); 209 + struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name, 210 + base, len); 216 211 217 212 if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { 218 213 if (drop_negative && ret->d_lockref.count == 1) { ··· 229 232 size_t prelen, const char *post, 230 233 struct dentry **ret, bool drop_negative) 231 234 { 235 + struct ovl_fs *ofs = OVL_FS(d->sb); 232 236 struct dentry *this; 233 237 struct path path; 234 238 int err; 235 239 bool last_element = !post[0]; 240 + bool is_upper = d->layer->idx == 0; 241 + char val; 236 242 237 243 this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative); 238 244 if (IS_ERR(this)) { ··· 253 253 } 254 254 255 255 path.dentry = this; 256 - path.mnt = d->mnt; 257 - if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) { 256 + path.mnt = d->layer->mnt; 257 + if (ovl_path_is_whiteout(ofs, &path)) { 258 258 d->stop = d->opaque = true; 259 259 goto put_and_out; 260 260 } ··· 272 272 d->stop = true; 273 273 goto put_and_out; 274 274 } 275 - err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL); 275 + err = ovl_check_metacopy_xattr(ofs, &path, NULL); 276 276 if (err < 0) 277 277 goto out_err; 278 278 ··· 292 292 if (d->last) 293 293 goto out; 294 294 295 - if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) { 295 + /* overlay.opaque=x means xwhiteouts directory */ 296 + val = ovl_get_opaquedir_val(ofs, &path); 297 + if (last_element && !is_upper && val == 'x') { 298 + d->xwhiteouts = true; 299 + ovl_layer_set_xwhiteouts(ofs, d->layer); 300 + } else if (val == 'y') { 296 301 d->stop = true; 297 302 if (last_element) 298 303 d->opaque = true; ··· 868 863 * Returns next layer in stack starting from top. 869 864 * Returns -1 if this is the last layer. 870 865 */ 871 - int ovl_path_next(int idx, struct dentry *dentry, struct path *path) 866 + int ovl_path_next(int idx, struct dentry *dentry, struct path *path, 867 + const struct ovl_layer **layer) 872 868 { 873 869 struct ovl_entry *oe = OVL_E(dentry); 874 870 struct ovl_path *lowerstack = ovl_lowerstack(oe); ··· 877 871 BUG_ON(idx < 0); 878 872 if (idx == 0) { 879 873 ovl_path_upper(dentry, path); 880 - if (path->dentry) 874 + if (path->dentry) { 875 + *layer = &OVL_FS(dentry->d_sb)->layers[0]; 881 876 return ovl_numlower(oe) ? 1 : -1; 877 + } 882 878 idx++; 883 879 } 884 880 BUG_ON(idx > ovl_numlower(oe)); 885 881 path->dentry = lowerstack[idx - 1].dentry; 886 - path->mnt = lowerstack[idx - 1].layer->mnt; 882 + *layer = lowerstack[idx - 1].layer; 883 + path->mnt = (*layer)->mnt; 887 884 888 885 return (idx < ovl_numlower(oe)) ? idx + 1 : -1; 889 886 } ··· 1064 1055 old_cred = ovl_override_creds(dentry->d_sb); 1065 1056 upperdir = ovl_dentry_upper(dentry->d_parent); 1066 1057 if (upperdir) { 1067 - d.mnt = ovl_upper_mnt(ofs); 1058 + d.layer = &ofs->layers[0]; 1068 1059 err = ovl_lookup_layer(upperdir, &d, &upperdentry, true); 1069 1060 if (err) 1070 1061 goto out; ··· 1120 1111 else if (d.is_dir || !ofs->numdatalayer) 1121 1112 d.last = lower.layer->idx == ovl_numlower(roe); 1122 1113 1123 - d.mnt = lower.layer->mnt; 1114 + d.layer = lower.layer; 1124 1115 err = ovl_lookup_layer(lower.dentry, &d, &this, false); 1125 1116 if (err) 1126 1117 goto out_put; ··· 1287 1278 1288 1279 if (upperopaque) 1289 1280 ovl_dentry_set_opaque(dentry); 1281 + if (d.xwhiteouts) 1282 + ovl_dentry_set_xwhiteouts(dentry); 1290 1283 1291 1284 if (upperdentry) 1292 1285 ovl_dentry_set_upper_alias(dentry);
+17 -6
fs/overlayfs/overlayfs.h
··· 50 50 OVL_XATTR_METACOPY, 51 51 OVL_XATTR_PROTATTR, 52 52 OVL_XATTR_XWHITEOUT, 53 - OVL_XATTR_XWHITEOUTS, 54 53 }; 55 54 56 55 enum ovl_inode_flag { ··· 69 70 OVL_E_UPPER_ALIAS, 70 71 OVL_E_OPAQUE, 71 72 OVL_E_CONNECTED, 73 + /* Lower stack may contain xwhiteout entries */ 74 + OVL_E_XWHITEOUTS, 72 75 }; 73 76 74 77 enum { ··· 478 477 bool ovl_dentry_is_opaque(struct dentry *dentry); 479 478 bool ovl_dentry_is_whiteout(struct dentry *dentry); 480 479 void ovl_dentry_set_opaque(struct dentry *dentry); 480 + bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); 481 + void ovl_dentry_set_xwhiteouts(struct dentry *dentry); 482 + void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, 483 + const struct ovl_layer *layer); 481 484 bool ovl_dentry_has_upper_alias(struct dentry *dentry); 482 485 void ovl_dentry_set_upper_alias(struct dentry *dentry); 483 486 bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); ··· 499 494 int ovl_copy_up_start(struct dentry *dentry, int flags); 500 495 void ovl_copy_up_end(struct dentry *dentry); 501 496 bool ovl_already_copied_up(struct dentry *dentry, int flags); 502 - bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, 503 - enum ovl_xattr ox); 497 + char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, 498 + enum ovl_xattr ox); 504 499 bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); 505 500 bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); 506 - bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path); 507 501 bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, 508 502 const struct path *upperpath); 509 503 ··· 577 573 .mnt = ovl_upper_mnt(ofs), 578 574 }; 579 575 580 - return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE); 576 + return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y'; 577 + } 578 + 579 + static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, 580 + const struct path *path) 581 + { 582 + return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); 581 583 } 582 584 583 585 static inline bool ovl_redirect_follow(struct ovl_fs *ofs) ··· 690 680 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); 691 681 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, 692 682 struct dentry *origin, bool verify); 693 - int ovl_path_next(int idx, struct dentry *dentry, struct path *path); 683 + int ovl_path_next(int idx, struct dentry *dentry, struct path *path, 684 + const struct ovl_layer **layer); 694 685 int ovl_verify_lowerdata(struct dentry *dentry); 695 686 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 696 687 unsigned int flags);
+3 -1
fs/overlayfs/ovl_entry.h
··· 40 40 int idx; 41 41 /* One fsid per unique underlying sb (upper fsid == 0) */ 42 42 int fsid; 43 + /* xwhiteouts were found on this layer */ 44 + bool has_xwhiteouts; 43 45 }; 44 46 45 47 struct ovl_path { ··· 61 59 unsigned int numfs; 62 60 /* Number of data-only lower layers */ 63 61 unsigned int numdatalayer; 64 - const struct ovl_layer *layers; 62 + struct ovl_layer *layers; 65 63 struct ovl_sb *fs; 66 64 /* workbasedir is the path at workdir= mount option */ 67 65 struct dentry *workbasedir;
+4 -3
fs/overlayfs/readdir.c
··· 305 305 if (IS_ERR(realfile)) 306 306 return PTR_ERR(realfile); 307 307 308 - rdd->in_xwhiteouts_dir = rdd->dentry && 309 - ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath); 310 308 rdd->first_maybe_whiteout = NULL; 311 309 rdd->ctx.pos = 0; 312 310 do { ··· 357 359 .is_lowest = false, 358 360 }; 359 361 int idx, next; 362 + const struct ovl_layer *layer; 360 363 361 364 for (idx = 0; idx != -1; idx = next) { 362 - next = ovl_path_next(idx, dentry, &realpath); 365 + next = ovl_path_next(idx, dentry, &realpath, &layer); 363 366 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; 367 + rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && 368 + ovl_dentry_has_xwhiteouts(dentry); 364 369 365 370 if (next != -1) { 366 371 err = ovl_dir_read(&realpath, &rdd);
+15
fs/overlayfs/super.c
··· 1249 1249 struct ovl_entry *oe) 1250 1250 { 1251 1251 struct dentry *root; 1252 + struct ovl_fs *ofs = OVL_FS(sb); 1252 1253 struct ovl_path *lowerpath = ovl_lowerstack(oe); 1253 1254 unsigned long ino = d_inode(lowerpath->dentry)->i_ino; 1254 1255 int fsid = lowerpath->layer->fsid; ··· 1269 1268 ovl_dentry_set_upper_alias(root); 1270 1269 if (ovl_is_impuredir(sb, upperdentry)) 1271 1270 ovl_set_flag(OVL_IMPURE, d_inode(root)); 1271 + } 1272 + 1273 + /* Look for xwhiteouts marker except in the lowermost layer */ 1274 + for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) { 1275 + struct path path = { 1276 + .mnt = lowerpath->layer->mnt, 1277 + .dentry = lowerpath->dentry, 1278 + }; 1279 + 1280 + /* overlay.opaque=x means xwhiteouts directory */ 1281 + if (ovl_get_opaquedir_val(ofs, &path) == 'x') { 1282 + ovl_layer_set_xwhiteouts(ofs, lowerpath->layer); 1283 + ovl_dentry_set_xwhiteouts(root); 1284 + } 1272 1285 } 1273 1286 1274 1287 /* Root is always merge -> can have whiteouts */
+31 -22
fs/overlayfs/util.c
··· 461 461 ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); 462 462 } 463 463 464 + bool ovl_dentry_has_xwhiteouts(struct dentry *dentry) 465 + { 466 + return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry); 467 + } 468 + 469 + void ovl_dentry_set_xwhiteouts(struct dentry *dentry) 470 + { 471 + ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry); 472 + } 473 + 474 + /* 475 + * ovl_layer_set_xwhiteouts() is called before adding the overlay dir 476 + * dentry to dcache, while readdir of that same directory happens after 477 + * the overlay dir dentry is in dcache, so if some cpu observes that 478 + * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts 479 + * for the layers where xwhiteouts marker was found in that merge dir. 480 + */ 481 + void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, 482 + const struct ovl_layer *layer) 483 + { 484 + if (layer->has_xwhiteouts) 485 + return; 486 + 487 + /* Write once to read-mostly layer properties */ 488 + ofs->layers[layer->idx].has_xwhiteouts = true; 489 + } 490 + 464 491 /* 465 492 * For hard links and decoded file handles, it's possible for ovl_dentry_upper() 466 493 * to return positive, while there's no actual upper alias for the inode. ··· 766 739 return res >= 0; 767 740 } 768 741 769 - bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path) 770 - { 771 - struct dentry *dentry = path->dentry; 772 - int res; 773 - 774 - /* xattr.whiteouts must be a directory */ 775 - if (!d_is_dir(dentry)) 776 - return false; 777 - 778 - res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0); 779 - return res >= 0; 780 - } 781 - 782 742 /* 783 743 * Load persistent uuid from xattr into s_uuid if found, or store a new 784 744 * random generated value in s_uuid and in xattr. ··· 825 811 return false; 826 812 } 827 813 828 - bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, 829 - enum ovl_xattr ox) 814 + char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, 815 + enum ovl_xattr ox) 830 816 { 831 817 int res; 832 818 char val; 833 819 834 820 if (!d_is_dir(path->dentry)) 835 - return false; 821 + return 0; 836 822 837 823 res = ovl_path_getxattr(ofs, path, ox, &val, 1); 838 - if (res == 1 && val == 'y') 839 - return true; 840 - 841 - return false; 824 + return res == 1 ? val : 0; 842 825 } 843 826 844 827 #define OVL_XATTR_OPAQUE_POSTFIX "opaque" ··· 848 837 #define OVL_XATTR_METACOPY_POSTFIX "metacopy" 849 838 #define OVL_XATTR_PROTATTR_POSTFIX "protattr" 850 839 #define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" 851 - #define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts" 852 840 853 841 #define OVL_XATTR_TAB_ENTRY(x) \ 854 842 [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ ··· 864 854 OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), 865 855 OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), 866 856 OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), 867 - OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS), 868 857 }; 869 858 870 859 int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,