Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Add BCH_SUBVOLUME_UNLINKED

Snapshot deletion needs to become a multi step process, where we unlink,
then tear down the page cache, then delete the subvolume - the deleting
flag is equivalent to an inode with i_nlink = 0.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>

authored by

Kent Overstreet and committed by
Kent Overstreet
2027875b f3b1e193

+223 -51
+4
fs/bcachefs/bcachefs.h
··· 353 353 #include "quota_types.h" 354 354 #include "rebalance_types.h" 355 355 #include "replicas_types.h" 356 + #include "subvolume_types.h" 356 357 #include "super_types.h" 357 358 358 359 /* Number of nodes btree coalesce will try to coalesce at once */ ··· 658 657 struct bch_snapshot_table __rcu *snapshot_table; 659 658 struct mutex snapshot_table_lock; 660 659 struct work_struct snapshot_delete_work; 660 + struct work_struct snapshot_wait_for_pagecache_and_delete_work; 661 + struct snapshot_id_list snapshots_unlinked; 662 + struct mutex snapshots_unlinked_lock; 661 663 662 664 /* BTREE CACHE */ 663 665 struct bio_set btree_bio;
+1
fs/bcachefs/bcachefs_format.h
··· 974 974 * can delete it (or whether it should just be rm -rf'd) 975 975 */ 976 976 LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) 977 + LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) 977 978 978 979 /* Snapshots */ 979 980
+7 -23
fs/bcachefs/fs-common.c
··· 239 239 struct bch_inode_unpacked *dir_u, 240 240 struct bch_inode_unpacked *inode_u, 241 241 const struct qstr *name, 242 - int deleting_snapshot) 242 + bool deleting_snapshot) 243 243 { 244 244 struct bch_fs *c = trans->c; 245 245 struct btree_iter dir_iter = { NULL }; ··· 267 267 if (ret) 268 268 goto err; 269 269 270 - if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) { 270 + if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) { 271 271 ret = bch2_empty_dir_trans(trans, inum); 272 272 if (ret) 273 273 goto err; 274 274 } 275 275 276 - if (deleting_snapshot < 0 && 277 - inode_u->bi_subvol) { 278 - struct bch_subvolume s; 279 - 280 - ret = bch2_subvolume_get(trans, inode_u->bi_subvol, true, 281 - BTREE_ITER_CACHED| 282 - BTREE_ITER_WITH_UPDATES, 283 - &s); 284 - if (ret) 285 - goto err; 286 - 287 - if (BCH_SUBVOLUME_SNAP(&s)) 288 - deleting_snapshot = 1; 276 + if (deleting_snapshot && !inode_u->bi_subvol) { 277 + ret = -ENOENT; 278 + goto err; 289 279 } 290 280 291 - if (deleting_snapshot == 1) { 292 - if (!inode_u->bi_subvol) { 293 - ret = -ENOENT; 294 - goto err; 295 - } 296 - 297 - ret = bch2_subvolume_delete(trans, inode_u->bi_subvol, 298 - deleting_snapshot); 281 + if (deleting_snapshot || inode_u->bi_subvol) { 282 + ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol); 299 283 if (ret) 300 284 goto err; 301 285
+1 -1
fs/bcachefs/fs-common.h
··· 26 26 int bch2_unlink_trans(struct btree_trans *, subvol_inum, 27 27 struct bch_inode_unpacked *, 28 28 struct bch_inode_unpacked *, 29 - const struct qstr *, int); 29 + const struct qstr *, bool); 30 30 31 31 int bch2_rename_trans(struct btree_trans *, 32 32 subvol_inum, struct bch_inode_unpacked *,
+1 -1
fs/bcachefs/fs-ioctl.c
··· 441 441 442 442 dir = path.dentry->d_parent->d_inode; 443 443 444 - ret = __bch2_unlink(dir, path.dentry, 1); 444 + ret = __bch2_unlink(dir, path.dentry, true); 445 445 if (!ret) { 446 446 fsnotify_rmdir(dir, path.dentry); 447 447 d_delete(path.dentry);
+9 -2
fs/bcachefs/fs.c
··· 490 490 } 491 491 492 492 int __bch2_unlink(struct inode *vdir, struct dentry *dentry, 493 - int deleting_snapshot) 493 + bool deleting_snapshot) 494 494 { 495 495 struct bch_fs *c = vdir->i_sb->s_fs_info; 496 496 struct bch_inode_info *dir = to_bch_ei(vdir); ··· 527 527 528 528 static int bch2_unlink(struct inode *vdir, struct dentry *dentry) 529 529 { 530 - return __bch2_unlink(vdir, dentry, -1); 530 + return __bch2_unlink(vdir, dentry, false); 531 531 } 532 532 533 533 static int bch2_symlink(struct mnt_idmap *idmap, ··· 1292 1292 return ret; 1293 1293 } 1294 1294 1295 + static int bch2_drop_inode(struct inode *vinode) 1296 + { 1297 + 1298 + return generic_drop_inode(vinode); 1299 + } 1300 + 1295 1301 static void bch2_evict_inode(struct inode *vinode) 1296 1302 { 1297 1303 struct bch_fs *c = vinode->i_sb->s_fs_info; ··· 1502 1496 .alloc_inode = bch2_alloc_inode, 1503 1497 .destroy_inode = bch2_destroy_inode, 1504 1498 .write_inode = bch2_vfs_write_inode, 1499 + .drop_inode = bch2_drop_inode, 1505 1500 .evict_inode = bch2_evict_inode, 1506 1501 .sync_fs = bch2_sync_fs, 1507 1502 .statfs = bch2_statfs,
+1 -1
fs/bcachefs/fs.h
··· 183 183 int bch2_setattr_nonsize(struct mnt_idmap *, 184 184 struct bch_inode_info *, 185 185 struct iattr *); 186 - int __bch2_unlink(struct inode *, struct dentry *, int); 186 + int __bch2_unlink(struct inode *, struct dentry *, bool); 187 187 188 188 void bch2_vfs_exit(void); 189 189 int bch2_vfs_init(void);
+17 -1
fs/bcachefs/fsck.c
··· 256 256 257 257 /* Subvolume root? */ 258 258 if (inode_u.bi_subvol) { 259 - ret = bch2_subvolume_delete(trans, inode_u.bi_subvol, -1); 259 + ret = bch2_subvolume_delete(trans, inode_u.bi_subvol); 260 260 if (ret) 261 261 goto err; 262 262 } ··· 992 992 struct btree_trans trans; 993 993 struct btree_iter iter; 994 994 struct bkey_s_c k; 995 + struct bkey_s_c_subvolume subvol; 995 996 int ret; 996 997 997 998 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); 998 999 999 1000 for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, 1000 1001 0, k, ret) { 1002 + if (k.k->type != KEY_TYPE_subvolume) 1003 + continue; 1004 + 1005 + subvol = bkey_s_c_to_subvolume(k); 1006 + 1007 + if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { 1008 + ret = __bch2_trans_do(&trans, NULL, NULL, 1009 + BTREE_INSERT_LAZY_RW, 1010 + bch2_subvolume_delete(&trans, iter.pos.offset)); 1011 + if (ret) { 1012 + bch_err(c, "error deleting subvolume %llu: %i", 1013 + iter.pos.offset, ret); 1014 + break; 1015 + } 1016 + } 1001 1017 } 1002 1018 bch2_trans_iter_exit(&trans, &iter); 1003 1019
+1 -5
fs/bcachefs/inode.c
··· 709 709 bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); 710 710 711 711 /* Subvolume root? */ 712 - if (inode_u.bi_subvol) { 713 - ret = bch2_subvolume_delete(&trans, inode_u.bi_subvol, -1); 714 - if (ret) 715 - goto err; 716 - } 712 + BUG_ON(inode_u.bi_subvol); 717 713 718 714 bkey_inode_generation_init(&delete.k_i); 719 715 delete.k.p = iter.pos;
+166 -16
fs/bcachefs/subvolume.c
··· 4 4 #include "btree_key_cache.h" 5 5 #include "btree_update.h" 6 6 #include "error.h" 7 + #include "fs.h" 7 8 #include "subvolume.h" 8 9 9 10 /* Snapshot tree: */ ··· 542 541 return ret; 543 542 } 544 543 545 - /* List of snapshot IDs that are being deleted: */ 546 - struct snapshot_id_list { 547 - u32 nr; 548 - u32 size; 549 - u32 *d; 550 - }; 551 - 552 544 static bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) 553 545 { 554 546 unsigned i; ··· 813 819 return ret; 814 820 } 815 821 816 - /* XXX: mark snapshot id for deletion, walk btree and delete: */ 817 - int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, 818 - int deleting_snapshot) 822 + /* 823 + * Delete subvolume, mark snapshot ID as deleted, queue up snapshot 824 + * deletion/cleanup: 825 + */ 826 + int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) 819 827 { 820 828 struct btree_iter iter; 821 829 struct bkey_s_c k; ··· 845 849 subvol = bkey_s_c_to_subvolume(k); 846 850 snapid = le32_to_cpu(subvol.v->snapshot); 847 851 848 - if (deleting_snapshot >= 0 && 849 - deleting_snapshot != BCH_SUBVOLUME_SNAP(subvol.v)) { 850 - ret = -ENOENT; 851 - goto err; 852 - } 853 - 854 852 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 855 853 ret = PTR_ERR_OR_ZERO(delete); 856 854 if (ret) ··· 865 875 866 876 h->fn = bch2_delete_dead_snapshots_hook; 867 877 bch2_trans_commit_hook(trans, h); 878 + err: 879 + bch2_trans_iter_exit(trans, &iter); 880 + return ret; 881 + } 882 + 883 + static void bch2_evict_subvolume_inodes(struct bch_fs *c, 884 + struct snapshot_id_list *s) 885 + { 886 + struct super_block *sb = c->vfs_sb; 887 + struct inode *inode; 888 + 889 + spin_lock(&sb->s_inode_list_lock); 890 + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 891 + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || 892 + (inode->i_state & I_FREEING)) 893 + continue; 894 + 895 + d_mark_dontcache(inode); 896 + d_prune_aliases(inode); 897 + } 898 + spin_unlock(&sb->s_inode_list_lock); 899 + again: 900 + cond_resched(); 901 + spin_lock(&sb->s_inode_list_lock); 902 + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 903 + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || 904 + (inode->i_state & I_FREEING)) 905 + continue; 906 + 907 + if (!(inode->i_state & I_DONTCACHE)) { 908 + d_mark_dontcache(inode); 909 + d_prune_aliases(inode); 910 + } 911 + 912 + spin_lock(&inode->i_lock); 913 + if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && 914 + !(inode->i_state & I_FREEING)) { 915 + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); 916 + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 917 + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 918 + spin_unlock(&inode->i_lock); 919 + spin_unlock(&sb->s_inode_list_lock); 920 + schedule(); 921 + finish_wait(wq, &wait.wq_entry); 922 + goto again; 923 + } 924 + 925 + spin_unlock(&inode->i_lock); 926 + } 927 + spin_unlock(&sb->s_inode_list_lock); 928 + } 929 + 930 + void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) 931 + { 932 + struct bch_fs *c = container_of(work, struct bch_fs, 933 + snapshot_wait_for_pagecache_and_delete_work); 934 + struct snapshot_id_list s; 935 + u32 *id; 936 + int ret = 0; 937 + 938 + while (!ret) { 939 + mutex_lock(&c->snapshots_unlinked_lock); 940 + s = c->snapshots_unlinked; 941 + memset(&c->snapshots_unlinked, 0, sizeof(c->snapshots_unlinked)); 942 + mutex_unlock(&c->snapshots_unlinked_lock); 943 + 944 + if (!s.nr) 945 + break; 946 + 947 + bch2_evict_subvolume_inodes(c, &s); 948 + 949 + for (id = s.d; id < s.d + s.nr; id++) { 950 + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, 951 + bch2_subvolume_delete(&trans, *id)); 952 + if (ret) { 953 + bch_err(c, "error %i deleting subvolume %u", ret, *id); 954 + break; 955 + } 956 + } 957 + 958 + kfree(s.d); 959 + } 960 + 961 + percpu_ref_put(&c->writes); 962 + } 963 + 964 + struct subvolume_unlink_hook { 965 + struct btree_trans_commit_hook h; 966 + u32 subvol; 967 + }; 968 + 969 + int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, 970 + struct btree_trans_commit_hook *_h) 971 + { 972 + struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); 973 + struct bch_fs *c = trans->c; 974 + int ret = 0; 975 + 976 + mutex_lock(&c->snapshots_unlinked_lock); 977 + if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) 978 + ret = snapshot_id_add(&c->snapshots_unlinked, h->subvol); 979 + mutex_unlock(&c->snapshots_unlinked_lock); 980 + 981 + if (ret) 982 + return ret; 983 + 984 + if (unlikely(!percpu_ref_tryget(&c->writes))) 985 + return -EROFS; 986 + 987 + if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) 988 + percpu_ref_put(&c->writes); 989 + return 0; 990 + } 991 + 992 + int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) 993 + { 994 + struct btree_iter iter; 995 + struct bkey_s_c k; 996 + struct bkey_i_subvolume *n; 997 + struct subvolume_unlink_hook *h; 998 + int ret = 0; 999 + 1000 + bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, 1001 + POS(0, subvolid), 1002 + BTREE_ITER_CACHED| 1003 + BTREE_ITER_INTENT); 1004 + k = bch2_btree_iter_peek_slot(&iter); 1005 + ret = bkey_err(k); 1006 + if (ret) 1007 + goto err; 1008 + 1009 + if (k.k->type != KEY_TYPE_subvolume) { 1010 + bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid); 1011 + ret = -EIO; 1012 + goto err; 1013 + } 1014 + 1015 + n = bch2_trans_kmalloc(trans, sizeof(*n)); 1016 + ret = PTR_ERR_OR_ZERO(n); 1017 + if (ret) 1018 + goto err; 1019 + 1020 + bkey_reassemble(&n->k_i, k); 1021 + SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); 1022 + 1023 + ret = bch2_trans_update(trans, &iter, &n->k_i, 0); 1024 + if (ret) 1025 + goto err; 1026 + 1027 + h = bch2_trans_kmalloc(trans, sizeof(*h)); 1028 + ret = PTR_ERR_OR_ZERO(h); 1029 + if (ret) 1030 + goto err; 1031 + 1032 + h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; 1033 + h->subvol = subvolid; 1034 + bch2_trans_commit_hook(trans, &h->h); 868 1035 err: 869 1036 bch2_trans_iter_exit(trans, &iter); 870 1037 return ret; ··· 1124 977 int bch2_fs_subvolumes_init(struct bch_fs *c) 1125 978 { 1126 979 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); 980 + INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, 981 + bch2_subvolume_wait_for_pagecache_and_delete); 982 + mutex_init(&c->snapshots_unlinked_lock); 1127 983 return 0; 1128 984 }
+4 -1
fs/bcachefs/subvolume.h
··· 2 2 #ifndef _BCACHEFS_SUBVOLUME_H 3 3 #define _BCACHEFS_SUBVOLUME_H 4 4 5 + #include "subvolume_types.h" 6 + 5 7 void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); 6 8 const char *bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c); 7 9 ··· 110 108 bool, int, struct bch_subvolume *); 111 109 int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); 112 110 113 - int bch2_subvolume_delete(struct btree_trans *, u32, int); 111 + int bch2_subvolume_delete(struct btree_trans *, u32); 112 + int bch2_subvolume_unlink(struct btree_trans *, u32); 114 113 int bch2_subvolume_create(struct btree_trans *, u64, u32, 115 114 u32 *, u32 *, bool); 116 115
+11
fs/bcachefs/subvolume_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_SUBVOLUME_TYPES_H 3 + #define _BCACHEFS_SUBVOLUME_TYPES_H 4 + 5 + struct snapshot_id_list { 6 + u32 nr; 7 + u32 size; 8 + u32 *d; 9 + }; 10 + 11 + #endif /* _BCACHEFS_SUBVOLUME_TYPES_H */