Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'bcachefs-2024-09-09' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:

- fix ca->io_ref usage; analagous to previous patch doing that for main
discard path

- cond_resched() in __journal_keys_sort(), cutting down on "hung task"
warnings when journal is big

- rest of basic BCH_SB_MEMBER_INVALID support

- and the critical one: don't delete open files in online fsck, this
was causing the "dirent points to inode that doesn't point back"
inconsistencies some users were seeing

* tag 'bcachefs-2024-09-09' of git://evilpiepirate.org/bcachefs:
bcachefs: Don't delete open files in online fsck
bcachefs: fix btree_key_cache sysfs knob
bcachefs: More BCH_SB_MEMBER_INVALID support
bcachefs: Simplify bch2_bkey_drop_ptrs()
bcachefs: Add a cond_resched() to __journal_keys_sort()
bcachefs: Fix ca->io_ref usage

+79 -52
+12 -12
fs/bcachefs/alloc_background.c
··· 1968 1968 break; 1969 1969 } 1970 1970 1971 - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); 1972 1971 percpu_ref_put(&ca->io_ref); 1972 + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); 1973 1973 } 1974 1974 1975 1975 static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) ··· 1979 1979 if (discard_in_flight_add(ca, bucket, false)) 1980 1980 return; 1981 1981 1982 - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 1982 + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) 1983 1983 return; 1984 1984 1985 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) 1986 - goto put_ioref; 1985 + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 1986 + goto put_ref; 1987 1987 1988 1988 if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) 1989 1989 return; 1990 1990 1991 - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); 1992 - put_ioref: 1993 1991 percpu_ref_put(&ca->io_ref); 1992 + put_ref: 1993 + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); 1994 1994 } 1995 1995 1996 1996 static int invalidate_one_bucket(struct btree_trans *trans, ··· 2132 2132 bch2_trans_iter_exit(trans, &iter); 2133 2133 err: 2134 2134 bch2_trans_put(trans); 2135 - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); 2136 2135 percpu_ref_put(&ca->io_ref); 2136 + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); 2137 2137 } 2138 2138 2139 2139 void bch2_dev_do_invalidates(struct bch_dev *ca) 2140 2140 { 2141 2141 struct bch_fs *c = ca->fs; 2142 2142 2143 - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 2143 + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) 2144 2144 return; 2145 2145 2146 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) 2147 - goto put_ioref; 2146 + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 2147 + goto put_ref; 2148 2148 2149 2149 if (queue_work(c->write_ref_wq, &ca->invalidate_work)) 2150 2150 return; 2151 2151 2152 - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); 2153 - put_ioref: 2154 2152 percpu_ref_put(&ca->io_ref); 2153 + put_ref: 2154 + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); 2155 2155 } 2156 2156 2157 2157 void bch2_do_invalidates(struct bch_fs *c)
+2
fs/bcachefs/btree_journal_iter.c
··· 530 530 { 531 531 sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL); 532 532 533 + cond_resched(); 534 + 533 535 struct journal_key *dst = keys->data; 534 536 535 537 darray_for_each(*keys, src) {
+8 -7
fs/bcachefs/buckets.c
··· 100 100 101 101 struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); 102 102 if (!ca) { 103 - if (fsck_err(trans, ptr_to_invalid_device, 104 - "pointer to missing device %u\n" 105 - "while marking %s", 106 - p.ptr.dev, 107 - (printbuf_reset(&buf), 108 - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 103 + if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, 104 + trans, ptr_to_invalid_device, 105 + "pointer to missing device %u\n" 106 + "while marking %s", 107 + p.ptr.dev, 108 + (printbuf_reset(&buf), 109 + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 109 110 *do_update = true; 110 111 return 0; 111 112 } ··· 563 562 struct bch_fs *c = trans->c; 564 563 struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); 565 564 if (unlikely(!ca)) { 566 - if (insert) 565 + if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) 567 566 ret = -EIO; 568 567 goto err; 569 568 }
+3 -1
fs/bcachefs/ec.h
··· 97 97 const struct bch_extent_ptr *data_ptr, 98 98 unsigned sectors) 99 99 { 100 - return data_ptr->dev == stripe_ptr->dev && 100 + return (data_ptr->dev == stripe_ptr->dev || 101 + data_ptr->dev == BCH_SB_MEMBER_INVALID || 102 + stripe_ptr->dev == BCH_SB_MEMBER_INVALID) && 101 103 data_ptr->gen == stripe_ptr->gen && 102 104 data_ptr->offset >= stripe_ptr->offset && 103 105 data_ptr->offset < stripe_ptr->offset + sectors;
+10 -16
fs/bcachefs/extents.c
··· 781 781 /* 782 782 * Returns pointer to the next entry after the one being dropped: 783 783 */ 784 - union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k, 785 - struct bch_extent_ptr *ptr) 784 + void bch2_bkey_drop_ptr_noerror(struct bkey_s k, struct bch_extent_ptr *ptr) 786 785 { 787 786 struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); 788 787 union bch_extent_entry *entry = to_entry(ptr), *next; 789 - union bch_extent_entry *ret = entry; 790 788 bool drop_crc = true; 789 + 790 + if (k.k->type == KEY_TYPE_stripe) { 791 + ptr->dev = BCH_SB_MEMBER_INVALID; 792 + return; 793 + } 791 794 792 795 EBUG_ON(ptr < &ptrs.start->ptr || 793 796 ptr >= &ptrs.end->ptr); ··· 814 811 break; 815 812 816 813 if ((extent_entry_is_crc(entry) && drop_crc) || 817 - extent_entry_is_stripe_ptr(entry)) { 818 - ret = (void *) ret - extent_entry_bytes(entry); 814 + extent_entry_is_stripe_ptr(entry)) 819 815 extent_entry_drop(k, entry); 820 - } 821 816 } 822 - 823 - return ret; 824 817 } 825 818 826 - union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, 827 - struct bch_extent_ptr *ptr) 819 + void bch2_bkey_drop_ptr(struct bkey_s k, struct bch_extent_ptr *ptr) 828 820 { 829 821 bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr; 830 - union bch_extent_entry *ret = 831 - bch2_bkey_drop_ptr_noerror(k, ptr); 822 + 823 + bch2_bkey_drop_ptr_noerror(k, ptr); 832 824 833 825 /* 834 826 * If we deleted all the dirty pointers and there's still cached ··· 835 837 !bch2_bkey_dirty_devs(k.s_c).nr) { 836 838 k.k->type = KEY_TYPE_error; 837 839 set_bkey_val_u64s(k.k, 0); 838 - ret = NULL; 839 840 } else if (!bch2_bkey_nr_ptrs(k.s_c)) { 840 841 k.k->type = KEY_TYPE_deleted; 841 842 set_bkey_val_u64s(k.k, 0); 842 - ret = NULL; 843 843 } 844 - 845 - return ret; 846 844 } 847 845 848 846 void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
+9 -14
fs/bcachefs/extents.h
··· 649 649 650 650 void bch2_extent_ptr_decoded_append(struct bkey_i *, 651 651 struct extent_ptr_decoded *); 652 - union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s, 653 - struct bch_extent_ptr *); 654 - union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, 655 - struct bch_extent_ptr *); 652 + void bch2_bkey_drop_ptr_noerror(struct bkey_s, struct bch_extent_ptr *); 653 + void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); 656 654 657 655 #define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \ 658 656 do { \ 659 - struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \ 657 + __label__ _again; \ 658 + struct bkey_ptrs _ptrs; \ 659 + _again: \ 660 + _ptrs = bch2_bkey_ptrs(_k); \ 660 661 \ 661 - struct bch_extent_ptr *_ptr = &_ptrs.start->ptr; \ 662 - \ 663 - while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \ 662 + bkey_for_each_ptr(_ptrs, _ptr) \ 664 663 if (_cond) { \ 665 - _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \ 666 - _ptrs = bch2_bkey_ptrs(_k); \ 667 - continue; \ 664 + bch2_bkey_drop_ptr(_k, _ptr); \ 665 + goto _again; \ 668 666 } \ 669 - \ 670 - (_ptr)++; \ 671 - } \ 672 667 } while (0) 673 668 674 669 bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
+8
fs/bcachefs/fs.c
··· 177 177 return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL); 178 178 } 179 179 180 + struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) 181 + { 182 + return to_bch_ei(ilookup5_nowait(c->vfs_sb, 183 + bch2_inode_hash(inum), 184 + bch2_iget5_test, 185 + &inum)); 186 + } 187 + 180 188 static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode) 181 189 { 182 190 subvol_inum inum = inode_inum(inode);
+7
fs/bcachefs/fs.h
··· 56 56 }; 57 57 } 58 58 59 + struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum); 60 + 59 61 /* 60 62 * Set if we've gotten a btree error for this inode, and thus the vfs inode and 61 63 * btree inode may be inconsistent: ··· 195 193 #else 196 194 197 195 #define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); }) 196 + 197 + static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) 198 + { 199 + return NULL; 200 + } 198 201 199 202 static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, 200 203 snapshot_id_list *s) {}
+18
fs/bcachefs/fsck.c
··· 8 8 #include "darray.h" 9 9 #include "dirent.h" 10 10 #include "error.h" 11 + #include "fs.h" 11 12 #include "fs-common.h" 12 13 #include "fsck.h" 13 14 #include "inode.h" ··· 963 962 return ret; 964 963 } 965 964 965 + static bool bch2_inode_open(struct bch_fs *c, struct bpos p) 966 + { 967 + subvol_inum inum = { 968 + .subvol = snapshot_t(c, p.snapshot)->subvol, 969 + .inum = p.offset, 970 + }; 971 + 972 + /* snapshot tree corruption, can't safely delete */ 973 + if (!inum.subvol) { 974 + bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot); 975 + return true; 976 + } 977 + 978 + return __bch2_inode_hash_find(c, inum) != NULL; 979 + } 980 + 966 981 static int check_inode(struct btree_trans *trans, 967 982 struct btree_iter *iter, 968 983 struct bkey_s_c k, ··· 1057 1040 } 1058 1041 1059 1042 if (u.bi_flags & BCH_INODE_unlinked && 1043 + !bch2_inode_open(c, k.k->p) && 1060 1044 (!c->sb.clean || 1061 1045 fsck_err(trans, inode_unlinked_but_clean, 1062 1046 "filesystem marked clean, but inode %llu unlinked",
+1 -1
fs/bcachefs/replicas.c
··· 796 796 nr_online += test_bit(e->devs[i], devs.d); 797 797 798 798 struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]); 799 - nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed; 799 + nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; 800 800 } 801 801 rcu_read_unlock(); 802 802
+1 -1
fs/bcachefs/sysfs.c
··· 461 461 462 462 sc.gfp_mask = GFP_KERNEL; 463 463 sc.nr_to_scan = strtoul_or_return(buf); 464 - c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc); 464 + c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc); 465 465 } 466 466 467 467 if (attr == &sysfs_trigger_gc)