Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
"This fixes one minor regression from the btree cache fixes (in the
scan_for_btree_nodes repair path) - and the shutdown path fix is the
big one here, in terms of bugs closed:

- Assorted tiny syzbot fixes

- Shutdown path fix: "bch2_btree_write_buffer_flush_going_ro()"

The shutdown path wasn't flushing the btree write buffer, leading
to shutting down while we still had operations in flight. This
fixes a whole slew of syzbot bugs, and undoubtedly other strange
heisenbugs.

* tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs:
bcachefs: Fix assertion pop in bch2_ptr_swab()
bcachefs: Fix journal_entry_dev_usage_to_text() overrun
bcachefs: Allow for unknown key types in backpointers fsck
bcachefs: Fix assertion pop in topology repair
bcachefs: Fix hidden btree errors when reading roots
bcachefs: Fix validate_bset() repair path
bcachefs: Fix missing validation for bch_backpointer.level
bcachefs: Fix bch_member.btree_bitmap_shift validation
bcachefs: bch2_btree_write_buffer_flush_going_ro()

+72 -19
+12 -5
fs/bcachefs/backpointers.c
··· 52 52 enum bch_validate_flags flags) 53 53 { 54 54 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 55 + int ret = 0; 56 + 57 + bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH, 58 + c, backpointer_level_bad, 59 + "backpointer level bad: %u >= %u", 60 + bp.v->level, BTREE_MAX_DEPTH); 55 61 56 62 rcu_read_lock(); 57 63 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); ··· 70 64 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 71 65 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 72 66 rcu_read_unlock(); 73 - int ret = 0; 74 67 75 68 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 76 69 !bpos_eq(bp.k->p, bp_pos), ··· 952 947 static int check_one_backpointer(struct btree_trans *trans, 953 948 struct bbpos start, 954 949 struct bbpos end, 955 - struct bkey_s_c_backpointer bp, 950 + struct bkey_s_c bp_k, 956 951 struct bkey_buf *last_flushed) 957 952 { 953 + if (bp_k.k->type != KEY_TYPE_backpointer) 954 + return 0; 955 + 956 + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); 958 957 struct bch_fs *c = trans->c; 959 958 struct btree_iter iter; 960 959 struct bbpos pos = bp_to_bbpos(*bp.v); ··· 1013 1004 POS_MIN, BTREE_ITER_prefetch, k, 1014 1005 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1015 1006 progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); 1016 - check_one_backpointer(trans, start, end, 1017 - bkey_s_c_to_backpointer(k), 1018 - &last_flushed); 1007 + check_one_backpointer(trans, start, end, k, &last_flushed); 1019 1008 })); 1020 1009 1021 1010 bch2_bkey_buf_exit(&last_flushed, c);
+1 -1
fs/bcachefs/btree_gc.c
··· 182 182 bch2_btree_node_drop_keys_outside_node(b); 183 183 184 184 mutex_lock(&c->btree_cache.lock); 185 - bch2_btree_node_hash_remove(&c->btree_cache, b); 185 + __bch2_btree_node_hash_remove(&c->btree_cache, b); 186 186 187 187 bkey_copy(&b->key, &new->k_i); 188 188 ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+1 -5
fs/bcachefs/btree_io.c
··· 733 733 c, ca, b, i, NULL, 734 734 bset_past_end_of_btree_node, 735 735 "bset past end of btree node (offset %u len %u but written %zu)", 736 - offset, sectors, ptr_written ?: btree_sectors(c))) { 736 + offset, sectors, ptr_written ?: btree_sectors(c))) 737 737 i->u64s = 0; 738 - ret = 0; 739 - goto out; 740 - } 741 738 742 739 btree_err_on(offset && !i->u64s, 743 740 -BCH_ERR_btree_node_read_err_fixable, ··· 826 829 BSET_BIG_ENDIAN(i), write, 827 830 &bn->format); 828 831 } 829 - out: 830 832 fsck_err: 831 833 printbuf_exit(&buf2); 832 834 printbuf_exit(&buf1);
+2 -1
fs/bcachefs/btree_update_interior.c
··· 2398 2398 if (new_hash) { 2399 2399 mutex_lock(&c->btree_cache.lock); 2400 2400 bch2_btree_node_hash_remove(&c->btree_cache, new_hash); 2401 - bch2_btree_node_hash_remove(&c->btree_cache, b); 2401 + 2402 + __bch2_btree_node_hash_remove(&c->btree_cache, b); 2402 2403 2403 2404 bkey_copy(&b->key, new_key); 2404 2405 ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+27 -3
fs/bcachefs/btree_write_buffer.c
··· 277 277 bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); 278 278 int ret = 0; 279 279 280 + ret = bch2_journal_error(&c->journal); 281 + if (ret) 282 + return ret; 283 + 280 284 bch2_trans_unlock(trans); 281 285 bch2_trans_begin(trans); 282 286 ··· 495 491 return ret; 496 492 } 497 493 498 - static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq) 494 + static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, 495 + bool *did_work) 499 496 { 500 497 struct bch_fs *c = trans->c; 501 498 struct btree_write_buffer *wb = &c->btree_write_buffer; ··· 506 501 bch2_trans_unlock(trans); 507 502 508 503 fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); 504 + 505 + *did_work |= wb->inc.keys.nr || wb->flushing.keys.nr; 509 506 510 507 /* 511 508 * On memory allocation failure, bch2_btree_write_buffer_flush_locked() ··· 528 521 struct journal_entry_pin *_pin, u64 seq) 529 522 { 530 523 struct bch_fs *c = container_of(j, struct bch_fs, journal); 524 + bool did_work = false; 531 525 532 - return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq)); 526 + return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work)); 533 527 } 534 528 535 529 int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) 536 530 { 537 531 struct bch_fs *c = trans->c; 532 + bool did_work = false; 538 533 539 534 trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); 540 535 541 - return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal)); 536 + return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work); 537 + } 538 + 539 + /* 540 + * The write buffer requires flushing when going RO: keys in the journal for the 541 + * write buffer don't have a journal pin yet 542 + */ 543 + bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c) 544 + { 545 + if (bch2_journal_error(&c->journal)) 546 + return false; 547 + 548 + bool did_work = false; 549 + bch2_trans_run(c, btree_write_buffer_flush_seq(trans, 550 + journal_cur_seq(&c->journal), &did_work)); 551 + return did_work; 542 552 } 543 553 544 554 int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
+1
fs/bcachefs/btree_write_buffer.h
··· 21 21 22 22 struct btree_trans; 23 23 int bch2_btree_write_buffer_flush_sync(struct btree_trans *); 24 + bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *); 24 25 int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); 25 26 int bch2_btree_write_buffer_tryflush(struct btree_trans *); 26 27
+4 -1
fs/bcachefs/extents.c
··· 1364 1364 for (entry = ptrs.start; 1365 1365 entry < ptrs.end; 1366 1366 entry = extent_entry_next(entry)) { 1367 - switch (extent_entry_type(entry)) { 1367 + switch (__extent_entry_type(entry)) { 1368 1368 case BCH_EXTENT_ENTRY_ptr: 1369 1369 break; 1370 1370 case BCH_EXTENT_ENTRY_crc32: ··· 1384 1384 break; 1385 1385 case BCH_EXTENT_ENTRY_rebalance: 1386 1386 break; 1387 + default: 1388 + /* Bad entry type: will be caught by validate() */ 1389 + return; 1387 1390 } 1388 1391 } 1389 1392 }
+3
fs/bcachefs/journal_io.c
··· 708 708 container_of(entry, struct jset_entry_dev_usage, entry); 709 709 unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); 710 710 711 + if (vstruct_bytes(entry) < sizeof(*u)) 712 + return; 713 + 711 714 prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); 712 715 713 716 printbuf_indent_add(out, 2);
+6
fs/bcachefs/recovery_passes.c
··· 27 27 NULL 28 28 }; 29 29 30 + /* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */ 31 + static int bch2_recovery_pass_empty(struct bch_fs *c) 32 + { 33 + return 0; 34 + } 35 + 30 36 static int bch2_set_may_go_rw(struct bch_fs *c) 31 37 { 32 38 struct journal_keys *keys = &c->journal_keys;
+1
fs/bcachefs/recovery_passes_types.h
··· 13 13 * must never change: 14 14 */ 15 15 #define BCH_RECOVERY_PASSES() \ 16 + x(recovery_pass_empty, 41, PASS_SILENT) \ 16 17 x(scan_for_btree_nodes, 37, 0) \ 17 18 x(check_topology, 4, 0) \ 18 19 x(accounting_read, 39, PASS_ALWAYS) \
+5 -1
fs/bcachefs/sb-errors_format.h
··· 136 136 x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ 137 137 x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ 138 138 x(need_discard_freespace_key_bad, 124, 0) \ 139 + x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ 139 140 x(backpointer_bucket_offset_wrong, 125, 0) \ 141 + x(backpointer_level_bad, 294, 0) \ 140 142 x(backpointer_to_missing_device, 126, 0) \ 141 143 x(backpointer_to_missing_alloc, 127, 0) \ 142 144 x(backpointer_to_missing_ptr, 128, 0) \ ··· 179 177 x(ptr_stripe_redundant, 163, 0) \ 180 178 x(reservation_key_nr_replicas_invalid, 164, 0) \ 181 179 x(reflink_v_refcount_wrong, 165, 0) \ 180 + x(reflink_v_pos_bad, 292, 0) \ 182 181 x(reflink_p_to_missing_reflink_v, 166, 0) \ 182 + x(reflink_refcount_underflow, 293, 0) \ 183 183 x(stripe_pos_bad, 167, 0) \ 184 184 x(stripe_val_size_bad, 168, 0) \ 185 185 x(stripe_csum_granularity_bad, 290, 0) \ ··· 306 302 x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ 307 303 x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ 308 304 x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ 309 - x(MAX, 291, 0) 305 + x(MAX, 295, 0) 310 306 311 307 enum bch_sb_error_id { 312 308 #define x(t, n, ...) BCH_FSCK_ERR_##t = n,
+2 -2
fs/bcachefs/sb-members.c
··· 163 163 return -BCH_ERR_invalid_sb_members; 164 164 } 165 165 166 - if (m.btree_bitmap_shift >= 64) { 166 + if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) { 167 167 prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift); 168 168 return -BCH_ERR_invalid_sb_members; 169 169 } ··· 450 450 m->btree_bitmap_shift += resize; 451 451 } 452 452 453 - BUG_ON(m->btree_bitmap_shift > 57); 453 + BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX); 454 454 BUG_ON(end > 64ULL << m->btree_bitmap_shift); 455 455 456 456 for (unsigned bit = start >> m->btree_bitmap_shift;
+6
fs/bcachefs/sb-members_format.h
··· 66 66 }; 67 67 68 68 /* 69 + * btree_allocated_bitmap can represent sector addresses of a u64: it itself has 70 + * 64 elements, so 64 - ilog2(64) 71 + */ 72 + #define BCH_MI_BTREE_BITMAP_SHIFT_MAX 58 73 + 74 + /* 69 75 * This limit comes from the bucket_gens array - it's a single allocation, and 70 76 * kernel allocation are limited to INT_MAX 71 77 */
+1
fs/bcachefs/super.c
··· 272 272 clean_passes++; 273 273 274 274 if (bch2_btree_interior_updates_flush(c) || 275 + bch2_btree_write_buffer_flush_going_ro(c) || 275 276 bch2_journal_flush_all_pins(&c->journal) || 276 277 bch2_btree_flush_all_writes(c) || 277 278 seq != atomic64_read(&c->journal.seq)) {