Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'bcachefs-2024-04-15' of https://evilpiepirate.org/git/bcachefs

Pull yet more bcachefs fixes from Kent Overstreet:
"This gets recovery working again for the affected user I've been
working with, and I'm still waiting to hear back on other bug reports
but should fix it for everyone else who's been having issues with
recovery.

- Various recovery fixes:

- fixes for the btree_insert_entry being resized on path
allocation btree_path array recently became dynamically
resizable, and btree_insert_entry along with it; this was being
observed during journal replay, when write buffer btree updates
don't use the write buffer and instead use the normal btree
update path

- multiple fixes for deadlock in recovery when we need to do lots
of btree node merges; excessive merges were clocking up the
whole pipeline

- write buffer path now correctly does btree node merges when
needed

- fix failure to go RW when superblock indicates recovery passes
needed (i.e. to complete an unfinished upgrade)

- Various unsafety fixes - test case contributed by a user who had
two drives out of a six drive array write out a whole bunch of
garbage after power failure

- New (tiny) on disk format feature: since it appears the btree node
scan tool will be a more regular thing (crappy hardware, user
error) - this adds a 64 bit per-device bitmap of regions that have
ever had btree nodes.

- A path->should_be_locked fix, from a larger patch series tightening
up invariants and assertions around btree transaction and path
locking state.

This particular fix prevents us from keeping around btree_paths
that are no longer needed"

* tag 'bcachefs-2024-04-15' of https://evilpiepirate.org/git/bcachefs: (24 commits)
bcachefs: set_btree_iter_dontneed also clears should_be_locked
bcachefs: fix error path of __bch2_read_super()
bcachefs: Check for backpointer bucket_offset >= bucket size
bcachefs: bch_member.btree_allocated_bitmap
bcachefs: sysfs internal/trigger_journal_flush
bcachefs: Fix bch2_btree_node_fill() for !path
bcachefs: add safety checks in bch2_btree_node_fill()
bcachefs: Interior known are required to have known key types
bcachefs: add missing bounds check in __bch2_bkey_val_invalid()
bcachefs: Fix btree node merging on write buffer btrees
bcachefs: Disable merges from interior update path
bcachefs: Run merges at BCH_WATERMARK_btree
bcachefs: Fix missing write refs in fs fio paths
bcachefs: Fix deadlock in journal replay
bcachefs: Go rw if running any explicit recovery passes
bcachefs: Standardize helpers for printing enum strs with bounds checks
bcachefs: don't queue btree nodes for rewrites during scan
bcachefs: fix race in bch2_btree_node_evict()
bcachefs: fix unsafety in bch2_stripe_to_text()
bcachefs: fix unsafety in bch2_extent_ptr_to_text()
...

+431 -181
+5 -3
fs/bcachefs/backpointers.c
··· 49 49 if (!bch2_dev_exists2(c, bp.k->p.inode)) 50 50 return 0; 51 51 52 + struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode); 52 53 struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); 53 54 int ret = 0; 54 55 55 - bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), 56 + bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 57 + !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), 56 58 c, err, 57 - backpointer_pos_wrong, 58 - "backpointer at wrong pos"); 59 + backpointer_bucket_offset_wrong, 60 + "backpointer bucket_offset wrong"); 59 61 fsck_err: 60 62 return ret; 61 63 }
+3 -6
fs/bcachefs/backpointers.h
··· 53 53 u64 bucket_offset) 54 54 { 55 55 struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); 56 - struct bpos ret; 57 - 58 - ret = POS(bucket.inode, 59 - (bucket_to_sector(ca, bucket.offset) << 60 - MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); 56 + struct bpos ret = POS(bucket.inode, 57 + (bucket_to_sector(ca, bucket.offset) << 58 + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); 61 59 62 60 EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); 63 - 64 61 return ret; 65 62 } 66 63
+2
fs/bcachefs/bcachefs.h
··· 709 709 x(stripe_delete) \ 710 710 x(reflink) \ 711 711 x(fallocate) \ 712 + x(fsync) \ 713 + x(dio_write) \ 712 714 x(discard) \ 713 715 x(discard_fast) \ 714 716 x(invalidate) \
+7 -4
fs/bcachefs/bcachefs_format.h
··· 578 578 __le64 nbuckets; /* device size */ 579 579 __le16 first_bucket; /* index of first bucket used */ 580 580 __le16 bucket_size; /* sectors */ 581 - __le32 pad; 581 + __u8 btree_bitmap_shift; 582 + __u8 pad[3]; 582 583 __le64 last_mount; /* time_t */ 583 584 584 585 __le64 flags; ··· 588 587 __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; 589 588 __le64 errors_reset_time; 590 589 __le64 seq; 590 + __le64 btree_allocated_bitmap; 591 591 }; 592 592 593 593 #define BCH_MEMBER_V1_BYTES 56 ··· 878 876 x(rebalance_work, BCH_VERSION(1, 3)) \ 879 877 x(member_seq, BCH_VERSION(1, 4)) \ 880 878 x(subvolume_fs_parent, BCH_VERSION(1, 5)) \ 881 - x(btree_subvolume_children, BCH_VERSION(1, 6)) 879 + x(btree_subvolume_children, BCH_VERSION(1, 6)) \ 880 + x(mi_btree_bitmap, BCH_VERSION(1, 7)) 882 881 883 882 enum bcachefs_metadata_version { 884 883 bcachefs_metadata_version_min = 9, ··· 1317 1314 x(write_buffer_keys, 11) \ 1318 1315 x(datetime, 12) 1319 1316 1320 - enum { 1317 + enum bch_jset_entry_type { 1321 1318 #define x(f, nr) BCH_JSET_ENTRY_##f = nr, 1322 1319 BCH_JSET_ENTRY_TYPES() 1323 1320 #undef x ··· 1363 1360 x(inodes, 1) \ 1364 1361 x(key_version, 2) 1365 1362 1366 - enum { 1363 + enum bch_fs_usage_type { 1367 1364 #define x(f, nr) BCH_FS_USAGE_##f = nr, 1368 1365 BCH_FS_USAGE_TYPES() 1369 1366 #undef x
+6
fs/bcachefs/bkey.h
··· 314 314 return bkey_packed(k) ? format->key_u64s : BKEY_U64s; 315 315 } 316 316 317 + static inline bool bkeyp_u64s_valid(const struct bkey_format *f, 318 + const struct bkey_packed *k) 319 + { 320 + return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s); 321 + } 322 + 317 323 static inline unsigned bkeyp_key_bytes(const struct bkey_format *format, 318 324 const struct bkey_packed *k) 319 325 {
+6 -2
fs/bcachefs/bkey_methods.c
··· 171 171 if (type >= BKEY_TYPE_NR) 172 172 return 0; 173 173 174 - bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && 174 + bkey_fsck_err_on((type == BKEY_TYPE_btree || 175 + (flags & BKEY_INVALID_COMMIT)) && 175 176 !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, 176 177 bkey_invalid_type_for_btree, 177 178 "invalid key type for btree %s (%s)", 178 - bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); 179 + bch2_btree_node_type_str(type), 180 + k.k->type < KEY_TYPE_MAX 181 + ? bch2_bkey_types[k.k->type] 182 + : "(unknown)"); 179 183 180 184 if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { 181 185 bkey_fsck_err_on(k.k->size == 0, c, err,
+46 -29
fs/bcachefs/btree_cache.c
··· 709 709 struct bch_fs *c = trans->c; 710 710 struct btree_cache *bc = &c->btree_cache; 711 711 struct btree *b; 712 - u32 seq; 713 712 714 - BUG_ON(level + 1 >= BTREE_MAX_DEPTH); 713 + if (unlikely(level >= BTREE_MAX_DEPTH)) { 714 + int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", 715 + level, BTREE_MAX_DEPTH); 716 + return ERR_PTR(ret); 717 + } 718 + 719 + if (unlikely(!bkey_is_btree_ptr(&k->k))) { 720 + struct printbuf buf = PRINTBUF; 721 + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); 722 + 723 + int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); 724 + printbuf_exit(&buf); 725 + return ERR_PTR(ret); 726 + } 727 + 728 + if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { 729 + struct printbuf buf = PRINTBUF; 730 + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); 731 + 732 + int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); 733 + printbuf_exit(&buf); 734 + return ERR_PTR(ret); 735 + } 736 + 715 737 /* 716 738 * Parent node must be locked, else we could read in a btree node that's 717 739 * been freed: ··· 774 752 } 775 753 776 754 set_btree_node_read_in_flight(b); 777 - 778 755 six_unlock_write(&b->c.lock); 779 - seq = six_lock_seq(&b->c.lock); 780 - six_unlock_intent(&b->c.lock); 781 - 782 - /* Unlock before doing IO: */ 783 - if (path && sync) 784 - bch2_trans_unlock_noassert(trans); 785 - 786 - bch2_btree_node_read(trans, b, sync); 787 - 788 - if (!sync) 789 - return NULL; 790 756 791 757 if (path) { 792 - int ret = bch2_trans_relock(trans) ?: 793 - bch2_btree_path_relock_intent(trans, path); 794 - if (ret) { 795 - BUG_ON(!trans->restarted); 796 - return ERR_PTR(ret); 797 - } 798 - } 758 + u32 seq = six_lock_seq(&b->c.lock); 799 759 800 - if (!six_relock_type(&b->c.lock, lock_type, seq)) { 801 - BUG_ON(!path); 760 + /* Unlock before doing IO: */ 761 + six_unlock_intent(&b->c.lock); 762 + bch2_trans_unlock_noassert(trans); 802 763 803 - trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path); 804 - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); 764 + bch2_btree_node_read(trans, b, sync); 765 + 766 + if (!sync) 767 + return NULL; 768 + 769 + if (!six_relock_type(&b->c.lock, lock_type, seq)) 770 + b = NULL; 771 + } else { 772 + bch2_btree_node_read(trans, b, sync); 773 + if (lock_type == SIX_LOCK_read) 774 + six_lock_downgrade(&b->c.lock); 805 775 } 806 776 807 777 return b; ··· 1126 1112 { 1127 1113 struct bch_fs *c = trans->c; 1128 1114 struct btree_cache *bc = &c->btree_cache; 1129 - struct btree *b; 1130 1115 1131 1116 BUG_ON(path && !btree_node_locked(path, level + 1)); 1132 1117 BUG_ON(level >= BTREE_MAX_DEPTH); 1133 1118 1134 - b = btree_cache_find(bc, k); 1119 + struct btree *b = btree_cache_find(bc, k); 1135 1120 if (b) 1136 1121 return 0; 1137 1122 1138 1123 b = bch2_btree_node_fill(trans, path, k, btree_id, 1139 1124 level, SIX_LOCK_read, false); 1140 - return PTR_ERR_OR_ZERO(b); 1125 + if (!IS_ERR_OR_NULL(b)) 1126 + six_unlock_read(&b->c.lock); 1127 + return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); 1141 1128 } 1142 1129 1143 1130 void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) ··· 1163 1148 1164 1149 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); 1165 1150 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); 1151 + if (unlikely(b->hash_val != btree_ptr_hash_val(k))) 1152 + goto out; 1166 1153 1167 1154 if (btree_node_dirty(b)) { 1168 1155 __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); ··· 1179 1162 btree_node_data_free(c, b); 1180 1163 bch2_btree_node_hash_remove(bc, b); 1181 1164 mutex_unlock(&bc->lock); 1182 - 1165 + out: 1183 1166 six_unlock_write(&b->c.lock); 1184 1167 six_unlock_intent(&b->c.lock); 1185 1168 }
+13
fs/bcachefs/btree_gc.c
··· 828 828 struct bch_fs *c = trans->c; 829 829 struct bkey deleted = KEY(0, 0, 0); 830 830 struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; 831 + struct printbuf buf = PRINTBUF; 831 832 int ret = 0; 832 833 833 834 deleted.p = k->k->p; ··· 849 848 if (ret) 850 849 goto err; 851 850 851 + if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k), 852 + c, btree_bitmap_not_marked, 853 + "btree ptr not marked in member info btree allocated bitmap\n %s", 854 + (bch2_bkey_val_to_text(&buf, c, *k), 855 + buf.buf))) { 856 + mutex_lock(&c->sb_lock); 857 + bch2_dev_btree_bitmap_mark(c, *k); 858 + bch2_write_super(c); 859 + mutex_unlock(&c->sb_lock); 860 + } 861 + 852 862 ret = commit_do(trans, NULL, NULL, 0, 853 863 bch2_key_trigger(trans, btree_id, level, old, 854 864 unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); 855 865 fsck_err: 856 866 err: 867 + printbuf_exit(&buf); 857 868 bch_err_fn(c, ret); 858 869 return ret; 859 870 }
+11 -8
fs/bcachefs/btree_io.c
··· 831 831 (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); 832 832 } 833 833 834 - static bool __bkey_valid(struct bch_fs *c, struct btree *b, 834 + static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, 835 835 struct bset *i, struct bkey_packed *k) 836 836 { 837 837 if (bkey_p_next(k) > vstruct_last(i)) ··· 840 840 if (k->format > KEY_FORMAT_CURRENT) 841 841 return false; 842 842 843 - if (k->u64s < bkeyp_key_u64s(&b->format, k)) 843 + if (!bkeyp_u64s_valid(&b->format, k)) 844 844 return false; 845 845 846 846 struct printbuf buf = PRINTBUF; ··· 884 884 "invalid bkey format %u", k->format)) 885 885 goto drop_this_key; 886 886 887 - if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k), 887 + if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), 888 888 -BCH_ERR_btree_node_read_err_fixable, 889 889 c, NULL, b, i, 890 890 btree_node_bkey_bad_u64s, 891 - "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k))) 891 + "bad k->u64s %u (min %u max %lu)", k->u64s, 892 + bkeyp_key_u64s(&b->format, k), 893 + U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) 892 894 goto drop_this_key; 893 895 894 896 if (!write) ··· 949 947 * do 950 948 */ 951 949 952 - if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { 950 + if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { 953 951 for (next_good_key = 1; 954 952 next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; 955 953 next_good_key++) 956 - if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) 954 + if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) 957 955 goto got_good_key; 958 - 959 956 } 960 957 961 958 /* ··· 1340 1339 rb->start_time); 1341 1340 bio_put(&rb->bio); 1342 1341 1343 - if (saw_error && !btree_node_read_error(b)) { 1342 + if (saw_error && 1343 + !btree_node_read_error(b) && 1344 + c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { 1344 1345 printbuf_reset(&buf); 1345 1346 bch2_bpos_to_text(&buf, b->key.k.p); 1346 1347 bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
+7 -2
fs/bcachefs/btree_iter.h
··· 498 498 { 499 499 struct btree_trans *trans = iter->trans; 500 500 501 - if (!trans->restarted) 502 - btree_iter_path(trans, iter)->preserve = false; 501 + if (!iter->path || trans->restarted) 502 + return; 503 + 504 + struct btree_path *path = btree_iter_path(trans, iter); 505 + path->preserve = false; 506 + if (path->ref == 1) 507 + path->should_be_locked = false; 503 508 } 504 509 505 510 void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
+17 -2
fs/bcachefs/btree_node_scan.c
··· 133 133 if (le64_to_cpu(bn->magic) != bset_magic(c)) 134 134 return; 135 135 136 + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { 137 + struct nonce nonce = btree_nonce(&bn->keys, 0); 138 + unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; 139 + 140 + bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes); 141 + } 142 + 136 143 if (btree_id_is_alloc(BTREE_NODE_ID(bn))) 144 + return; 145 + 146 + if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) 137 147 return; 138 148 139 149 rcu_read_lock(); ··· 205 195 last_print = jiffies; 206 196 } 207 197 208 - try_read_btree_node(w->f, ca, bio, buf, 209 - bucket * ca->mi.bucket_size + bucket_offset); 198 + u64 sector = bucket * ca->mi.bucket_size + bucket_offset; 199 + 200 + if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap && 201 + !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) 202 + continue; 203 + 204 + try_read_btree_node(w->f, ca, bio, buf, sector); 210 205 } 211 206 err: 212 207 bio_put(bio);
+18 -16
fs/bcachefs/btree_trans_commit.c
··· 397 397 struct bkey_cached *ck = (void *) path->l[0].b; 398 398 unsigned new_u64s; 399 399 struct bkey_i *new_k; 400 + unsigned watermark = flags & BCH_WATERMARK_MASK; 400 401 401 402 EBUG_ON(path->level); 402 403 403 - if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && 404 - bch2_btree_key_cache_must_wait(c) && 405 - !(flags & BCH_TRANS_COMMIT_journal_reclaim)) 404 + if (watermark < BCH_WATERMARK_reclaim && 405 + !test_bit(BKEY_CACHED_DIRTY, &ck->flags) && 406 + bch2_btree_key_cache_must_wait(c)) 406 407 return -BCH_ERR_btree_insert_need_journal_reclaim; 407 408 408 409 /* ··· 500 499 } 501 500 502 501 static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, 503 - struct btree_insert_entry *btree_id_start) 502 + unsigned btree_id_start) 504 503 { 505 - struct btree_insert_entry *i; 506 504 bool trans_trigger_run; 507 505 int ret, overwrite; 508 506 ··· 514 514 do { 515 515 trans_trigger_run = false; 516 516 517 - for (i = btree_id_start; 518 - i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; 517 + for (unsigned i = btree_id_start; 518 + i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; 519 519 i++) { 520 - if (i->btree_id != btree_id) 520 + if (trans->updates[i].btree_id != btree_id) 521 521 continue; 522 522 523 - ret = run_one_trans_trigger(trans, i, overwrite); 523 + ret = run_one_trans_trigger(trans, trans->updates + i, overwrite); 524 524 if (ret < 0) 525 525 return ret; 526 526 if (ret) ··· 534 534 535 535 static int bch2_trans_commit_run_triggers(struct btree_trans *trans) 536 536 { 537 - struct btree_insert_entry *btree_id_start = trans->updates; 538 - unsigned btree_id = 0; 537 + unsigned btree_id = 0, btree_id_start = 0; 539 538 int ret = 0; 540 539 541 540 /* ··· 548 549 if (btree_id == BTREE_ID_alloc) 549 550 continue; 550 551 551 - while (btree_id_start < trans->updates + trans->nr_updates && 552 - btree_id_start->btree_id < btree_id) 552 + while (btree_id_start < trans->nr_updates && 553 + trans->updates[btree_id_start].btree_id < btree_id) 553 554 btree_id_start++; 554 555 555 556 ret = run_btree_triggers(trans, btree_id, btree_id_start); ··· 557 558 return ret; 558 559 } 559 560 560 - trans_for_each_update(trans, i) { 561 + for (unsigned idx = 0; idx < trans->nr_updates; idx++) { 562 + struct btree_insert_entry *i = trans->updates + idx; 563 + 561 564 if (i->btree_id > BTREE_ID_alloc) 562 565 break; 563 566 if (i->btree_id == BTREE_ID_alloc) { 564 - ret = run_btree_triggers(trans, BTREE_ID_alloc, i); 567 + ret = run_btree_triggers(trans, BTREE_ID_alloc, idx); 565 568 if (ret) 566 569 return ret; 567 570 break; ··· 827 826 struct bch_fs *c = trans->c; 828 827 int ret = 0, u64s_delta = 0; 829 828 830 - trans_for_each_update(trans, i) { 829 + for (unsigned idx = 0; idx < trans->nr_updates; idx++) { 830 + struct btree_insert_entry *i = trans->updates + idx; 831 831 if (i->cached) 832 832 continue; 833 833
+66 -15
fs/bcachefs/btree_update_interior.c
··· 21 21 #include "keylist.h" 22 22 #include "recovery_passes.h" 23 23 #include "replicas.h" 24 + #include "sb-members.h" 24 25 #include "super-io.h" 25 26 #include "trace.h" 26 27 ··· 606 605 bch2_keylist_push(keys); 607 606 } 608 607 608 + static bool btree_update_new_nodes_marked_sb(struct btree_update *as) 609 + { 610 + for_each_keylist_key(&as->new_keys, k) 611 + if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k))) 612 + return false; 613 + return true; 614 + } 615 + 616 + static void btree_update_new_nodes_mark_sb(struct btree_update *as) 617 + { 618 + struct bch_fs *c = as->c; 619 + 620 + mutex_lock(&c->sb_lock); 621 + for_each_keylist_key(&as->new_keys, k) 622 + bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k)); 623 + 624 + bch2_write_super(c); 625 + mutex_unlock(&c->sb_lock); 626 + } 627 + 609 628 /* 610 629 * The transactional part of an interior btree node update, where we journal the 611 630 * update we did to the interior node and update alloc info: ··· 682 661 ret = bch2_journal_error(&c->journal); 683 662 if (ret) 684 663 goto err; 664 + 665 + if (!btree_update_new_nodes_marked_sb(as)) 666 + btree_update_new_nodes_mark_sb(as); 685 667 686 668 /* 687 669 * Wait for any in flight writes to finish before we free the old nodes ··· 1304 1280 bch2_recalc_btree_reserve(c); 1305 1281 } 1306 1282 1307 - static void bch2_btree_set_root(struct btree_update *as, 1308 - struct btree_trans *trans, 1309 - struct btree_path *path, 1310 - struct btree *b) 1283 + static int bch2_btree_set_root(struct btree_update *as, 1284 + struct btree_trans *trans, 1285 + struct btree_path *path, 1286 + struct btree *b, 1287 + bool nofail) 1311 1288 { 1312 1289 struct bch_fs *c = as->c; 1313 - struct btree *old; 1314 1290 1315 1291 trace_and_count(c, btree_node_set_root, trans, b); 1316 1292 1317 - old = btree_node_root(c, b); 1293 + struct btree *old = btree_node_root(c, b); 1318 1294 1319 1295 /* 1320 1296 * Ensure no one is using the old root while we switch to the 1321 1297 * new root: 1322 1298 */ 1323 - bch2_btree_node_lock_write_nofail(trans, path, &old->c); 1299 + if (nofail) { 1300 + bch2_btree_node_lock_write_nofail(trans, path, &old->c); 1301 + } else { 1302 + int ret = bch2_btree_node_lock_write(trans, path, &old->c); 1303 + if (ret) 1304 + return ret; 1305 + } 1324 1306 1325 1307 bch2_btree_set_root_inmem(c, b); 1326 1308 ··· 1340 1310 * depend on the new root would have to update the new root. 1341 1311 */ 1342 1312 bch2_btree_node_unlock_write(trans, path, old); 1313 + return 0; 1343 1314 } 1344 1315 1345 1316 /* Interior node updates: */ ··· 1683 1652 if (parent) { 1684 1653 /* Split a non root node */ 1685 1654 ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys); 1686 - if (ret) 1687 - goto err; 1688 1655 } else if (n3) { 1689 - bch2_btree_set_root(as, trans, trans->paths + path, n3); 1656 + ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false); 1690 1657 } else { 1691 1658 /* Root filled up but didn't need to be split */ 1692 - bch2_btree_set_root(as, trans, trans->paths + path, n1); 1659 + ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false); 1693 1660 } 1661 + 1662 + if (ret) 1663 + goto err; 1694 1664 1695 1665 if (n3) { 1696 1666 bch2_btree_update_get_open_buckets(as, n3); ··· 1895 1863 bch2_keylist_add(&as->parent_keys, &b->key); 1896 1864 btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys); 1897 1865 1898 - bch2_btree_set_root(as, trans, path, n); 1866 + int ret = bch2_btree_set_root(as, trans, path, n, true); 1867 + BUG_ON(ret); 1868 + 1899 1869 bch2_btree_update_get_open_buckets(as, n); 1900 1870 bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); 1901 1871 bch2_trans_node_add(trans, path, n); ··· 1949 1915 1950 1916 BUG_ON(!trans->paths[path].should_be_locked); 1951 1917 BUG_ON(!btree_node_locked(&trans->paths[path], level)); 1918 + 1919 + /* 1920 + * Work around a deadlock caused by the btree write buffer not doing 1921 + * merges and leaving tons of merges for us to do - we really don't need 1922 + * to be doing merges at all from the interior update path, and if the 1923 + * interior update path is generating too many new interior updates we 1924 + * deadlock: 1925 + */ 1926 + if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates) 1927 + return 0; 1928 + 1929 + flags &= ~BCH_WATERMARK_MASK; 1952 1930 1953 1931 b = trans->paths[path].l[level].b; 1954 1932 ··· 2107 2061 bch2_path_put(trans, new_path, true); 2108 2062 bch2_path_put(trans, sib_path, true); 2109 2063 bch2_trans_verify_locks(trans); 2064 + if (ret == -BCH_ERR_journal_reclaim_would_deadlock) 2065 + ret = 0; 2066 + if (!ret) 2067 + ret = bch2_trans_relock(trans); 2110 2068 return ret; 2111 2069 err_free_update: 2112 2070 bch2_btree_node_free_never_used(as, trans, n); ··· 2156 2106 if (parent) { 2157 2107 bch2_keylist_add(&as->parent_keys, &n->key); 2158 2108 ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys); 2159 - if (ret) 2160 - goto err; 2161 2109 } else { 2162 - bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n); 2110 + ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false); 2163 2111 } 2112 + 2113 + if (ret) 2114 + goto err; 2164 2115 2165 2116 bch2_btree_update_get_open_buckets(as, n); 2166 2117 bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+12 -2
fs/bcachefs/btree_write_buffer.c
··· 316 316 bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) { 317 317 bch2_btree_node_unlock_write(trans, path, path->l[0].b); 318 318 write_locked = false; 319 + 320 + ret = lockrestart_do(trans, 321 + bch2_btree_iter_traverse(&iter) ?: 322 + bch2_foreground_maybe_merge(trans, iter.path, 0, 323 + BCH_WATERMARK_reclaim| 324 + BCH_TRANS_COMMIT_journal_reclaim| 325 + BCH_TRANS_COMMIT_no_check_rw| 326 + BCH_TRANS_COMMIT_no_enospc)); 327 + if (ret) 328 + goto err; 319 329 } 320 330 } 321 331 ··· 392 382 393 383 ret = commit_do(trans, NULL, NULL, 394 384 BCH_WATERMARK_reclaim| 385 + BCH_TRANS_COMMIT_journal_reclaim| 395 386 BCH_TRANS_COMMIT_no_check_rw| 396 387 BCH_TRANS_COMMIT_no_enospc| 397 - BCH_TRANS_COMMIT_no_journal_res| 398 - BCH_TRANS_COMMIT_journal_reclaim, 388 + BCH_TRANS_COMMIT_no_journal_res , 399 389 btree_write_buffered_insert(trans, i)); 400 390 if (ret) 401 391 goto err;
-8
fs/bcachefs/buckets.h
··· 395 395 : "(invalid data type)"; 396 396 } 397 397 398 - static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type) 399 - { 400 - if (type < BCH_DATA_NR) 401 - prt_str(out, __bch2_data_types[type]); 402 - else 403 - prt_printf(out, "(invalid data type %u)", type); 404 - } 405 - 406 398 /* disk reservations: */ 407 399 408 400 static inline void bch2_disk_reservation_put(struct bch_fs *c,
+14 -9
fs/bcachefs/checksum.c
··· 429 429 extent_nonce(version, crc_old), bio); 430 430 431 431 if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { 432 - bch_err(c, "checksum error in %s() (memory corruption or bug?)\n" 433 - "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)", 434 - __func__, 435 - crc_old.csum.hi, 436 - crc_old.csum.lo, 437 - merged.hi, 438 - merged.lo, 439 - bch2_csum_types[crc_old.csum_type], 440 - bch2_csum_types[new_csum_type]); 432 + struct printbuf buf = PRINTBUF; 433 + prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n" 434 + "expected %0llx:%0llx got %0llx:%0llx (old type ", 435 + __func__, 436 + crc_old.csum.hi, 437 + crc_old.csum.lo, 438 + merged.hi, 439 + merged.lo); 440 + bch2_prt_csum_type(&buf, crc_old.csum_type); 441 + prt_str(&buf, " new type "); 442 + bch2_prt_csum_type(&buf, new_csum_type); 443 + prt_str(&buf, ")"); 444 + bch_err(c, "%s", buf.buf); 445 + printbuf_exit(&buf); 441 446 return -EIO; 442 447 } 443 448
+3 -2
fs/bcachefs/checksum.h
··· 61 61 struct bch_csum expected, 62 62 struct bch_csum got) 63 63 { 64 - prt_printf(out, "checksum error: got "); 64 + prt_str(out, "checksum error, type "); 65 + bch2_prt_csum_type(out, type); 66 + prt_str(out, ": got "); 65 67 bch2_csum_to_text(out, type, got); 66 68 prt_str(out, " should be "); 67 69 bch2_csum_to_text(out, type, expected); 68 - prt_printf(out, " type %s", bch2_csum_types[type]); 69 70 } 70 71 71 72 int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
-8
fs/bcachefs/compress.h
··· 47 47 return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; 48 48 } 49 49 50 - static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type) 51 - { 52 - if (type < BCH_COMPRESSION_TYPE_NR) 53 - prt_str(out, __bch2_compression_types[type]); 54 - else 55 - prt_printf(out, "(invalid compression type %u)", type); 56 - } 57 - 58 50 int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, 59 51 struct bch_extent_crc_unpacked *); 60 52 int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
+26 -24
fs/bcachefs/ec.c
··· 131 131 void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, 132 132 struct bkey_s_c k) 133 133 { 134 - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; 135 - unsigned i, nr_data = s->nr_blocks - s->nr_redundant; 134 + const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v; 135 + struct bch_stripe s = {}; 136 136 137 - prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", 138 - s->algorithm, 139 - le16_to_cpu(s->sectors), 140 - nr_data, 141 - s->nr_redundant, 142 - s->csum_type, 143 - 1U << s->csum_granularity_bits); 137 + memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k))); 144 138 145 - for (i = 0; i < s->nr_blocks; i++) { 146 - const struct bch_extent_ptr *ptr = s->ptrs + i; 147 - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); 148 - u32 offset; 149 - u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); 139 + unsigned nr_data = s.nr_blocks - s.nr_redundant; 150 140 151 - prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset); 152 - if (i < nr_data) 153 - prt_printf(out, "#%u", stripe_blockcount_get(s, i)); 154 - prt_printf(out, " gen %u", ptr->gen); 155 - if (ptr_stale(ca, ptr)) 156 - prt_printf(out, " stale"); 141 + prt_printf(out, "algo %u sectors %u blocks %u:%u csum ", 142 + s.algorithm, 143 + le16_to_cpu(s.sectors), 144 + nr_data, 145 + s.nr_redundant); 146 + bch2_prt_csum_type(out, s.csum_type); 147 + prt_printf(out, " gran %u", 1U << s.csum_granularity_bits); 148 + 149 + for (unsigned i = 0; i < s.nr_blocks; i++) { 150 + const struct bch_extent_ptr *ptr = sp->ptrs + i; 151 + 152 + if ((void *) ptr >= bkey_val_end(k)) 153 + break; 154 + 155 + bch2_extent_ptr_to_text(out, c, ptr); 156 + 157 + if (s.csum_type < BCH_CSUM_NR && 158 + i < nr_data && 159 + stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k)) 160 + prt_printf(out, "#%u", stripe_blockcount_get(sp, i)); 157 161 } 158 162 } 159 163 ··· 611 607 struct printbuf err = PRINTBUF; 612 608 struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev); 613 609 614 - prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n", 615 - want.hi, want.lo, 616 - got.hi, got.lo, 617 - bch2_csum_types[v->csum_type]); 610 + prt_str(&err, "stripe "); 611 + bch2_csum_err_msg(&err, v->csum_type, want, got); 618 612 prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); 619 613 bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); 620 614 bch_err_ratelimited(ca, "%s", err.buf);
+2
fs/bcachefs/ec.h
··· 32 32 static inline unsigned stripe_csum_offset(const struct bch_stripe *s, 33 33 unsigned dev, unsigned csum_idx) 34 34 { 35 + EBUG_ON(s->csum_type >= BCH_CSUM_NR); 36 + 35 37 unsigned csum_bytes = bch_crc_bytes[s->csum_type]; 36 38 37 39 return sizeof(struct bch_stripe) +
+7 -4
fs/bcachefs/extents.c
··· 998 998 prt_str(out, " cached"); 999 999 if (ptr->unwritten) 1000 1000 prt_str(out, " unwritten"); 1001 - if (ca && ptr_stale(ca, ptr)) 1001 + if (b >= ca->mi.first_bucket && 1002 + b < ca->mi.nbuckets && 1003 + ptr_stale(ca, ptr)) 1002 1004 prt_printf(out, " stale"); 1003 1005 } 1004 1006 } ··· 1030 1028 struct bch_extent_crc_unpacked crc = 1031 1029 bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); 1032 1030 1033 - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ", 1031 + prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ", 1034 1032 crc.compressed_size, 1035 1033 crc.uncompressed_size, 1036 - crc.offset, crc.nonce, 1037 - bch2_csum_types[crc.csum_type]); 1034 + crc.offset, crc.nonce); 1035 + bch2_prt_csum_type(out, crc.csum_type); 1036 + prt_str(out, " compress "); 1038 1037 bch2_prt_compression_type(out, crc.compression_type); 1039 1038 break; 1040 1039 }
+13 -6
fs/bcachefs/fs-io-direct.c
··· 387 387 ret = dio->op.error ?: ((long) dio->written << 9); 388 388 bio_put(&dio->op.wbio.bio); 389 389 390 + bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write); 391 + 390 392 /* inode->i_dio_count is our ref on inode and thus bch_fs */ 391 393 inode_dio_end(&inode->v); 392 394 ··· 592 590 prefetch(&inode->ei_inode); 593 591 prefetch((void *) &inode->ei_inode + 64); 594 592 593 + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write)) 594 + return -EROFS; 595 + 595 596 inode_lock(&inode->v); 596 597 597 598 ret = generic_write_checks(req, iter); 598 599 if (unlikely(ret <= 0)) 599 - goto err; 600 + goto err_put_write_ref; 600 601 601 602 ret = file_remove_privs(file); 602 603 if (unlikely(ret)) 603 - goto err; 604 + goto err_put_write_ref; 604 605 605 606 ret = file_update_time(file); 606 607 if (unlikely(ret)) 607 - goto err; 608 + goto err_put_write_ref; 608 609 609 610 if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) 610 - goto err; 611 + goto err_put_write_ref; 611 612 612 613 inode_dio_begin(&inode->v); 613 614 bch2_pagecache_block_get(inode); ··· 650 645 } 651 646 652 647 ret = bch2_dio_write_loop(dio); 653 - err: 648 + out: 654 649 if (locked) 655 650 inode_unlock(&inode->v); 656 651 return ret; ··· 658 653 bch2_pagecache_block_put(inode); 659 654 bio_put(bio); 660 655 inode_dio_end(&inode->v); 661 - goto err; 656 + err_put_write_ref: 657 + bch2_write_ref_put(c, BCH_WRITE_REF_dio_write); 658 + goto out; 662 659 } 663 660 664 661 void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
+8 -8
fs/bcachefs/fs-io.c
··· 174 174 static int bch2_flush_inode(struct bch_fs *c, 175 175 struct bch_inode_info *inode) 176 176 { 177 - struct bch_inode_unpacked u; 178 - int ret; 179 - 180 177 if (c->opts.journal_flush_disabled) 181 178 return 0; 182 179 183 - ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); 184 - if (ret) 185 - return ret; 180 + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) 181 + return -EROFS; 186 182 187 - return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 188 - bch2_inode_flush_nocow_writes(c, inode); 183 + struct bch_inode_unpacked u; 184 + int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: 185 + bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 186 + bch2_inode_flush_nocow_writes(c, inode); 187 + bch2_write_ref_put(c, BCH_WRITE_REF_fsync); 188 + return ret; 189 189 } 190 190 191 191 int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+9 -8
fs/bcachefs/journal_io.c
··· 247 247 248 248 if (entry) { 249 249 prt_str(out, " type="); 250 - prt_str(out, bch2_jset_entry_types[entry->type]); 250 + bch2_prt_jset_entry_type(out, entry->type); 251 251 } 252 252 253 253 if (!jset) { ··· 403 403 jset_entry_for_each_key(entry, k) { 404 404 if (!first) { 405 405 prt_newline(out); 406 - prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); 406 + bch2_prt_jset_entry_type(out, entry->type); 407 + prt_str(out, ": "); 407 408 } 408 409 prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); 409 410 bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); ··· 564 563 struct jset_entry_usage *u = 565 564 container_of(entry, struct jset_entry_usage, entry); 566 565 567 - prt_printf(out, "type=%s v=%llu", 568 - bch2_fs_usage_types[u->entry.btree_id], 569 - le64_to_cpu(u->v)); 566 + prt_str(out, "type="); 567 + bch2_prt_fs_usage_type(out, u->entry.btree_id); 568 + prt_printf(out, " v=%llu", le64_to_cpu(u->v)); 570 569 } 571 570 572 571 static int journal_entry_data_usage_validate(struct bch_fs *c, ··· 828 827 void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, 829 828 struct jset_entry *entry) 830 829 { 830 + bch2_prt_jset_entry_type(out, entry->type); 831 + 831 832 if (entry->type < BCH_JSET_ENTRY_NR) { 832 - prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); 833 + prt_str(out, ": "); 833 834 bch2_jset_entry_ops[entry->type].to_text(out, c, entry); 834 - } else { 835 - prt_printf(out, "(unknown type %u)", entry->type); 836 835 } 837 836 } 838 837
+25 -4
fs/bcachefs/opts.c
··· 43 43 NULL 44 44 }; 45 45 46 - const char * const bch2_csum_types[] = { 46 + static const char * const __bch2_csum_types[] = { 47 47 BCH_CSUM_TYPES() 48 48 NULL 49 49 }; ··· 53 53 NULL 54 54 }; 55 55 56 - const char * const __bch2_compression_types[] = { 56 + static const char * const __bch2_compression_types[] = { 57 57 BCH_COMPRESSION_TYPES() 58 58 NULL 59 59 }; ··· 83 83 NULL 84 84 }; 85 85 86 - const char * const bch2_jset_entry_types[] = { 86 + static const char * const __bch2_jset_entry_types[] = { 87 87 BCH_JSET_ENTRY_TYPES() 88 88 NULL 89 89 }; 90 90 91 - const char * const bch2_fs_usage_types[] = { 91 + static const char * const __bch2_fs_usage_types[] = { 92 92 BCH_FS_USAGE_TYPES() 93 93 NULL 94 94 }; 95 95 96 96 #undef x 97 + 98 + static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[], 99 + unsigned nr, const char *type, unsigned idx) 100 + { 101 + if (idx < nr) 102 + prt_str(out, opts[idx]); 103 + else 104 + prt_printf(out, "(unknown %s %u)", type, idx); 105 + } 106 + 107 + #define PRT_STR_OPT_BOUNDSCHECKED(name, type) \ 108 + void bch2_prt_##name(struct printbuf *out, type t) \ 109 + { \ 110 + prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\ 111 + } 112 + 113 + PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type); 114 + PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type); 115 + PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); 116 + PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); 117 + PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); 97 118 98 119 static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res, 99 120 struct printbuf *err)
+6 -4
fs/bcachefs/opts.h
··· 16 16 extern const char * const bch2_sb_features[]; 17 17 extern const char * const bch2_sb_compat[]; 18 18 extern const char * const __bch2_btree_ids[]; 19 - extern const char * const bch2_csum_types[]; 20 19 extern const char * const bch2_csum_opts[]; 21 - extern const char * const __bch2_compression_types[]; 22 20 extern const char * const bch2_compression_opts[]; 23 21 extern const char * const bch2_str_hash_types[]; 24 22 extern const char * const bch2_str_hash_opts[]; 25 23 extern const char * const __bch2_data_types[]; 26 24 extern const char * const bch2_member_states[]; 27 - extern const char * const bch2_jset_entry_types[]; 28 - extern const char * const bch2_fs_usage_types[]; 29 25 extern const char * const bch2_d_types[]; 26 + 27 + void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type); 28 + void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type); 29 + void bch2_prt_data_type(struct printbuf *, enum bch_data_type); 30 + void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); 31 + void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); 30 32 31 33 static inline const char *bch2_d_type_str(unsigned d_type) 32 34 {
+1 -1
fs/bcachefs/recovery_passes.c
··· 44 44 45 45 set_bit(BCH_FS_may_go_rw, &c->flags); 46 46 47 - if (keys->nr || c->opts.fsck || !c->sb.clean) 47 + if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit) 48 48 return bch2_fs_read_write_early(c); 49 49 return 0; 50 50 }
+4 -1
fs/bcachefs/sb-downgrade.c
··· 51 51 BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \ 52 52 x(btree_subvolume_children, \ 53 53 BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \ 54 - BCH_FSCK_ERR_subvol_children_not_set) 54 + BCH_FSCK_ERR_subvol_children_not_set) \ 55 + x(mi_btree_bitmap, \ 56 + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 57 + BCH_FSCK_ERR_btree_bitmap_not_marked) 55 58 56 59 #define DOWNGRADE_TABLE() 57 60
+3 -2
fs/bcachefs/sb-errors_types.h
··· 130 130 x(bucket_gens_nonzero_for_invalid_buckets, 122) \ 131 131 x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ 132 132 x(need_discard_freespace_key_bad, 124) \ 133 - x(backpointer_pos_wrong, 125) \ 133 + x(backpointer_bucket_offset_wrong, 125) \ 134 134 x(backpointer_to_missing_device, 126) \ 135 135 x(backpointer_to_missing_alloc, 127) \ 136 136 x(backpointer_to_missing_ptr, 128) \ ··· 270 270 x(btree_ptr_v2_min_key_bad, 262) \ 271 271 x(btree_root_unreadable_and_scan_found_nothing, 263) \ 272 272 x(snapshot_node_missing, 264) \ 273 - x(dup_backpointer_to_bad_csum_extent, 265) 273 + x(dup_backpointer_to_bad_csum_extent, 265) \ 274 + x(btree_bitmap_not_marked, 266) 274 275 275 276 enum bch_sb_error_id { 276 277 #define x(t, n) BCH_FSCK_ERR_##t = n,
+53
fs/bcachefs/sb-members.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #include "bcachefs.h" 4 + #include "btree_cache.h" 4 5 #include "disk_groups.h" 5 6 #include "opts.h" 6 7 #include "replicas.h" ··· 426 425 427 426 bch2_write_super(c); 428 427 mutex_unlock(&c->sb_lock); 428 + } 429 + 430 + /* 431 + * Per member "range has btree nodes" bitmap: 432 + * 433 + * This is so that if we ever have to run the btree node scan to repair we don't 434 + * have to scan full devices: 435 + */ 436 + 437 + bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) 438 + { 439 + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) 440 + if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev), 441 + ptr->offset, btree_sectors(c))) 442 + return false; 443 + return true; 444 + } 445 + 446 + static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, 447 + u64 start, unsigned sectors) 448 + { 449 + struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); 450 + u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); 451 + 452 + u64 end = start + sectors; 453 + 454 + int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); 455 + if (resize > 0) { 456 + u64 new_bitmap = 0; 457 + 458 + for (unsigned i = 0; i < 64; i++) 459 + if (bitmap & BIT_ULL(i)) 460 + new_bitmap |= BIT_ULL(i >> resize); 461 + bitmap = new_bitmap; 462 + m->btree_bitmap_shift += resize; 463 + } 464 + 465 + for (unsigned bit = sectors >> m->btree_bitmap_shift; 466 + bit << m->btree_bitmap_shift < end; 467 + bit++) 468 + bitmap |= BIT_ULL(bit); 469 + 470 + m->btree_allocated_bitmap = cpu_to_le64(bitmap); 471 + } 472 + 473 + void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) 474 + { 475 + lockdep_assert_held(&c->sb_lock); 476 + 477 + struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 478 + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) 479 + __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); 429 480 }
+21
fs/bcachefs/sb-members.h
··· 3 3 #define _BCACHEFS_SB_MEMBERS_H 4 4 5 5 #include "darray.h" 6 + #include "bkey_types.h" 6 7 7 8 extern char * const bch2_member_error_strs[]; 8 9 ··· 221 220 : 1, 222 221 .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), 223 222 .valid = bch2_member_exists(mi), 223 + .btree_bitmap_shift = mi->btree_bitmap_shift, 224 + .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap), 224 225 }; 225 226 } 226 227 ··· 230 227 231 228 void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); 232 229 void bch2_dev_errors_reset(struct bch_dev *); 230 + 231 + static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors) 232 + { 233 + u64 end = start + sectors; 234 + 235 + if (end > 64 << ca->mi.btree_bitmap_shift) 236 + return false; 237 + 238 + for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift; 239 + bit << ca->mi.btree_bitmap_shift < end; 240 + bit++) 241 + if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) 242 + return false; 243 + return true; 244 + } 245 + 246 + bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); 247 + void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c); 233 248 234 249 #endif /* _BCACHEFS_SB_MEMBERS_H */
+5 -2
fs/bcachefs/super-io.c
··· 700 700 return -ENOMEM; 701 701 702 702 sb->sb_name = kstrdup(path, GFP_KERNEL); 703 - if (!sb->sb_name) 704 - return -ENOMEM; 703 + if (!sb->sb_name) { 704 + ret = -ENOMEM; 705 + prt_printf(&err, "error allocating memory for sb_name"); 706 + goto err; 707 + } 705 708 706 709 #ifndef __KERNEL__ 707 710 if (opt_get(*opts, direct_io) == false)
+2
fs/bcachefs/super_types.h
··· 37 37 u8 durability; 38 38 u8 freespace_initialized; 39 39 u8 valid; 40 + u8 btree_bitmap_shift; 41 + u64 btree_allocated_bitmap; 40 42 }; 41 43 42 44 #endif /* _BCACHEFS_SUPER_TYPES_H */
+10 -1
fs/bcachefs/sysfs.c
··· 25 25 #include "ec.h" 26 26 #include "inode.h" 27 27 #include "journal.h" 28 + #include "journal_reclaim.h" 28 29 #include "keylist.h" 29 30 #include "move.h" 30 31 #include "movinggc.h" ··· 139 138 write_attribute(trigger_gc); 140 139 write_attribute(trigger_discards); 141 140 write_attribute(trigger_invalidates); 141 + write_attribute(trigger_journal_flush); 142 142 write_attribute(prune_cache); 143 143 write_attribute(btree_wakeup); 144 144 rw_attribute(btree_gc_periodic); ··· 502 500 503 501 /* Debugging: */ 504 502 505 - if (!test_bit(BCH_FS_rw, &c->flags)) 503 + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)) 506 504 return -EROFS; 507 505 508 506 if (attr == &sysfs_prune_cache) { ··· 535 533 if (attr == &sysfs_trigger_invalidates) 536 534 bch2_do_invalidates(c); 537 535 536 + if (attr == &sysfs_trigger_journal_flush) { 537 + bch2_journal_flush_all_pins(&c->journal); 538 + bch2_journal_meta(&c->journal); 539 + } 540 + 538 541 #ifdef CONFIG_BCACHEFS_TESTS 539 542 if (attr == &sysfs_perf_test) { 540 543 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; ··· 560 553 size = ret; 561 554 } 562 555 #endif 556 + bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); 563 557 return size; 564 558 } 565 559 SYSFS_OPS(bch2_fs); ··· 659 651 &sysfs_trigger_gc, 660 652 &sysfs_trigger_discards, 661 653 &sysfs_trigger_invalidates, 654 + &sysfs_trigger_journal_flush, 662 655 &sysfs_prune_cache, 663 656 &sysfs_btree_wakeup, 664 657