Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "bcachefs: Delete some obsolete journal_seq_blacklist code"

This reverts commit f95b61228efd04c9c158123da5827c96e9773b29.

It turns out, we're seeing filesystems in the wild end up with
blacklisted btree node bsets - this should not be happening, and until
we understand why and fix it we need to keep this code around.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>

authored by

Kent Overstreet and committed by
Kent Overstreet
9b6e2f1e 03ea3962

+100 -8
+1
fs/bcachefs/bcachefs.h
··· 755 755 /* JOURNAL SEQ BLACKLIST */ 756 756 struct journal_seq_blacklist_table * 757 757 journal_seq_blacklist_table; 758 + struct work_struct journal_seq_blacklist_gc_work; 758 759 759 760 /* ALLOCATOR */ 760 761 spinlock_t freelist_lock;
+78
fs/bcachefs/journal_seq_blacklist.c
··· 241 241 .validate = bch2_sb_journal_seq_blacklist_validate, 242 242 .to_text = bch2_sb_journal_seq_blacklist_to_text 243 243 }; 244 + 245 + void bch2_blacklist_entries_gc(struct work_struct *work) 246 + { 247 + struct bch_fs *c = container_of(work, struct bch_fs, 248 + journal_seq_blacklist_gc_work); 249 + struct journal_seq_blacklist_table *t; 250 + struct bch_sb_field_journal_seq_blacklist *bl; 251 + struct journal_seq_blacklist_entry *src, *dst; 252 + struct btree_trans trans; 253 + unsigned i, nr, new_nr; 254 + int ret; 255 + 256 + bch2_trans_init(&trans, c, 0, 0); 257 + 258 + for (i = 0; i < BTREE_ID_NR; i++) { 259 + struct btree_iter iter; 260 + struct btree *b; 261 + 262 + bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN, 263 + 0, 0, BTREE_ITER_PREFETCH); 264 + retry: 265 + bch2_trans_begin(&trans); 266 + 267 + b = bch2_btree_iter_peek_node(&iter); 268 + 269 + while (!(ret = PTR_ERR_OR_ZERO(b)) && 270 + b && 271 + !test_bit(BCH_FS_STOPPING, &c->flags)) 272 + b = bch2_btree_iter_next_node(&iter); 273 + 274 + if (ret == -EINTR) 275 + goto retry; 276 + 277 + bch2_trans_iter_exit(&trans, &iter); 278 + } 279 + 280 + bch2_trans_exit(&trans); 281 + if (ret) 282 + return; 283 + 284 + mutex_lock(&c->sb_lock); 285 + bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); 286 + if (!bl) 287 + goto out; 288 + 289 + nr = blacklist_nr_entries(bl); 290 + dst = bl->start; 291 + 292 + t = c->journal_seq_blacklist_table; 293 + BUG_ON(nr != t->nr); 294 + 295 + for (src = bl->start, i = eytzinger0_first(t->nr); 296 + src < bl->start + nr; 297 + src++, i = eytzinger0_next(i, nr)) { 298 + BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); 299 + BUG_ON(t->entries[i].end != le64_to_cpu(src->end)); 300 + 301 + if (t->entries[i].dirty) 302 + *dst++ = *src; 303 + } 304 + 305 + new_nr = dst - bl->start; 306 + 307 + bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr); 308 + 309 + if (new_nr != nr) { 310 + bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 311 + new_nr ? sb_blacklist_u64s(new_nr) : 0); 312 + BUG_ON(new_nr && !bl); 313 + 314 + if (!new_nr) 315 + c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3)); 316 + 317 + bch2_write_super(c); 318 + } 319 + out: 320 + mutex_unlock(&c->sb_lock); 321 + }
+2
fs/bcachefs/journal_seq_blacklist.h
··· 17 17 18 18 extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist; 19 19 20 + void bch2_blacklist_entries_gc(struct work_struct *); 21 + 20 22 #endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
+14 -8
fs/bcachefs/recovery.c
··· 1065 1065 if (ret) 1066 1066 goto err; 1067 1067 1068 + /* 1069 + * After an unclean shutdown, skip then next few journal sequence 1070 + * numbers as they may have been referenced by btree writes that 1071 + * happened before their corresponding journal writes - those btree 1072 + * writes need to be ignored, by skipping and blacklisting the next few 1073 + * journal sequence numbers: 1074 + */ 1075 + if (!c->sb.clean) 1076 + journal_seq += 8; 1077 + 1068 1078 if (blacklist_seq != journal_seq) { 1069 1079 ret = bch2_journal_seq_blacklist_add(c, 1070 1080 blacklist_seq, journal_seq); ··· 1220 1210 } 1221 1211 1222 1212 mutex_lock(&c->sb_lock); 1223 - /* 1224 - * With journal replay done, we can clear the journal seq blacklist 1225 - * table: 1226 - */ 1227 - BUG_ON(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)); 1228 - if (le16_to_cpu(c->sb.version_min) >= bcachefs_metadata_version_btree_ptr_sectors_written) 1229 - bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 0); 1230 - 1231 1213 if (c->opts.version_upgrade) { 1232 1214 c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); 1233 1215 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); ··· 1260 1258 goto err; 1261 1259 bch_info(c, "scanning for old btree nodes done"); 1262 1260 } 1261 + 1262 + if (c->journal_seq_blacklist_table && 1263 + c->journal_seq_blacklist_table->nr > 128) 1264 + queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work); 1263 1265 1264 1266 ret = 0; 1265 1267 out:
+5
fs/bcachefs/super.c
··· 535 535 536 536 set_bit(BCH_FS_STOPPING, &c->flags); 537 537 538 + cancel_work_sync(&c->journal_seq_blacklist_gc_work); 539 + 538 540 down_write(&c->state_lock); 539 541 bch2_fs_read_only(c); 540 542 up_write(&c->state_lock); ··· 699 697 mutex_init(&c->snapshot_table_lock); 700 698 701 699 spin_lock_init(&c->btree_write_error_lock); 700 + 701 + INIT_WORK(&c->journal_seq_blacklist_gc_work, 702 + bch2_blacklist_entries_gc); 702 703 703 704 INIT_LIST_HEAD(&c->journal_entries); 704 705 INIT_LIST_HEAD(&c->journal_iters);