Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.17-rc5 457 lines 15 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Superblock section that contains a list of recovery passes to run when 5 * downgrading past a given version 6 */ 7 8#include "bcachefs.h" 9#include "darray.h" 10#include "recovery_passes.h" 11#include "sb-downgrade.h" 12#include "sb-errors.h" 13#include "super-io.h" 14 15#define RECOVERY_PASS_ALL_FSCK BIT_ULL(63) 16 17/* 18 * Upgrade, downgrade tables - run certain recovery passes, fix certain errors 19 * 20 * x(version, recovery_passes, errors...) 21 */ 22#define UPGRADE_TABLE() \ 23 x(snapshot_2, \ 24 RECOVERY_PASS_ALL_FSCK, \ 25 BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ 26 BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ 27 x(backpointers, \ 28 RECOVERY_PASS_ALL_FSCK) \ 29 x(inode_v3, \ 30 RECOVERY_PASS_ALL_FSCK) \ 31 x(unwritten_extents, \ 32 RECOVERY_PASS_ALL_FSCK) \ 33 x(bucket_gens, \ 34 BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ 35 RECOVERY_PASS_ALL_FSCK) \ 36 x(lru_v2, \ 37 RECOVERY_PASS_ALL_FSCK) \ 38 x(fragmentation_lru, \ 39 RECOVERY_PASS_ALL_FSCK) \ 40 x(no_bps_in_alloc_keys, \ 41 RECOVERY_PASS_ALL_FSCK) \ 42 x(snapshot_trees, \ 43 RECOVERY_PASS_ALL_FSCK) \ 44 x(snapshot_skiplists, \ 45 BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \ 46 BCH_FSCK_ERR_snapshot_bad_depth, \ 47 BCH_FSCK_ERR_snapshot_bad_skiplist) \ 48 x(deleted_inodes, \ 49 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 50 BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ 51 x(rebalance_work, \ 52 BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \ 53 x(subvolume_fs_parent, \ 54 BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \ 55 BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \ 56 x(btree_subvolume_children, \ 57 BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \ 58 BCH_FSCK_ERR_subvol_children_not_set) \ 59 x(mi_btree_bitmap, \ 60 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 61 BCH_FSCK_ERR_btree_bitmap_not_marked) \ 62 x(disk_accounting_v2, \ 63 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 64 BCH_FSCK_ERR_bkey_version_in_future, \ 65 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 66 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 67 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 68 BCH_FSCK_ERR_accounting_mismatch) \ 69 x(disk_accounting_v3, \ 70 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 71 BCH_FSCK_ERR_bkey_version_in_future, \ 72 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 73 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 74 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 75 BCH_FSCK_ERR_accounting_mismatch, \ 76 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 77 BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ 78 BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ 79 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 80 x(disk_accounting_inum, \ 81 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 82 BCH_FSCK_ERR_accounting_mismatch) \ 83 x(rebalance_work_acct_fix, \ 84 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 85 BCH_FSCK_ERR_accounting_mismatch) \ 86 x(inode_has_child_snapshots, \ 87 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 88 BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \ 89 x(backpointer_bucket_gen, \ 90 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 91 BCH_FSCK_ERR_backpointer_to_missing_ptr, \ 92 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 93 x(disk_accounting_big_endian, \ 94 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 95 BCH_FSCK_ERR_accounting_mismatch, \ 96 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 97 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 98 x(cached_backpointers, \ 99 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 100 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 101 x(stripe_backpointers, \ 102 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 103 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 104 x(inode_has_case_insensitive, \ 105 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 106 BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \ 107 BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set) 108 109#define DOWNGRADE_TABLE() \ 110 x(bucket_stripe_sectors, \ 111 0) \ 112 x(disk_accounting_v2, \ 113 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 114 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 115 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 116 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 117 BCH_FSCK_ERR_fs_usage_hidden_wrong, \ 118 BCH_FSCK_ERR_fs_usage_btree_wrong, \ 119 BCH_FSCK_ERR_fs_usage_data_wrong, \ 120 BCH_FSCK_ERR_fs_usage_cached_wrong, \ 121 BCH_FSCK_ERR_fs_usage_reserved_wrong, \ 122 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ 123 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ 124 BCH_FSCK_ERR_fs_usage_replicas_wrong, \ 125 BCH_FSCK_ERR_bkey_version_in_future) \ 126 x(disk_accounting_v3, \ 127 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 128 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 129 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 130 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 131 BCH_FSCK_ERR_fs_usage_hidden_wrong, \ 132 BCH_FSCK_ERR_fs_usage_btree_wrong, \ 133 BCH_FSCK_ERR_fs_usage_data_wrong, \ 134 BCH_FSCK_ERR_fs_usage_cached_wrong, \ 135 BCH_FSCK_ERR_fs_usage_reserved_wrong, \ 136 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ 137 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ 138 BCH_FSCK_ERR_fs_usage_replicas_wrong, \ 139 BCH_FSCK_ERR_accounting_replicas_not_marked, \ 140 BCH_FSCK_ERR_bkey_version_in_future) \ 141 x(rebalance_work_acct_fix, \ 142 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 143 BCH_FSCK_ERR_accounting_mismatch, \ 144 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 145 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 146 x(backpointer_bucket_gen, \ 147 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 148 BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \ 149 BCH_FSCK_ERR_backpointer_to_missing_ptr, \ 150 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 151 x(disk_accounting_big_endian, \ 152 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 153 BCH_FSCK_ERR_accounting_mismatch, \ 154 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 155 BCH_FSCK_ERR_accounting_key_junk_at_end) 156 157struct upgrade_downgrade_entry { 158 u64 recovery_passes; 159 u16 version; 160 u16 nr_errors; 161 const u16 *errors; 162}; 163 164#define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ }; 165UPGRADE_TABLE() 166#undef x 167 168static const struct upgrade_downgrade_entry upgrade_table[] = { 169#define x(ver, passes, ...) { \ 170 .recovery_passes = passes, \ 171 .version = bcachefs_metadata_version_##ver,\ 172 .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \ 173 .errors = upgrade_##ver##_errors, \ 174}, 175UPGRADE_TABLE() 176#undef x 177}; 178 179static int have_stripes(struct bch_fs *c) 180{ 181 if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b)) 182 return 0; 183 184 return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b); 185} 186 187int bch2_sb_set_upgrade_extra(struct bch_fs *c) 188{ 189 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; 190 unsigned new_version = c->sb.version; 191 bool write_sb = false; 192 int ret = 0; 193 194 mutex_lock(&c->sb_lock); 195 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 196 197 if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && 198 new_version >= bcachefs_metadata_version_bucket_stripe_sectors && 199 (ret = have_stripes(c) > 0)) { 200 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); 201 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); 202 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent); 203 write_sb = true; 204 } 205 206 if (write_sb) 207 bch2_write_super(c); 208 mutex_unlock(&c->sb_lock); 209 210 return ret < 0 ? ret : 0; 211} 212 213void bch2_sb_set_upgrade(struct bch_fs *c, 214 unsigned old_version, 215 unsigned new_version) 216{ 217 lockdep_assert_held(&c->sb_lock); 218 219 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 220 221 for (const struct upgrade_downgrade_entry *i = upgrade_table; 222 i < upgrade_table + ARRAY_SIZE(upgrade_table); 223 i++) 224 if (i->version > old_version && i->version <= new_version) { 225 u64 passes = i->recovery_passes; 226 227 if (passes & RECOVERY_PASS_ALL_FSCK) 228 passes |= bch2_fsck_recovery_passes(); 229 passes &= ~RECOVERY_PASS_ALL_FSCK; 230 231 ext->recovery_passes_required[0] |= 232 cpu_to_le64(bch2_recovery_passes_to_stable(passes)); 233 234 for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++) 235 __set_bit_le64(*e, ext->errors_silent); 236 } 237} 238 239#define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ }; 240DOWNGRADE_TABLE() 241#undef x 242 243static const struct upgrade_downgrade_entry downgrade_table[] = { 244#define x(ver, passes, ...) { \ 245 .recovery_passes = passes, \ 246 .version = bcachefs_metadata_version_##ver,\ 247 .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \ 248 .errors = downgrade_##ver##_errors, \ 249}, 250DOWNGRADE_TABLE() 251#undef x 252}; 253 254static int downgrade_table_extra(struct bch_fs *c, darray_char *table) 255{ 256 unsigned dst_offset = table->nr; 257 struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); 258 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); 259 int ret = 0; 260 261 unsigned nr_errors = le16_to_cpu(dst->nr_errors); 262 263 switch (le16_to_cpu(dst->version)) { 264 case bcachefs_metadata_version_bucket_stripe_sectors: 265 if (have_stripes(c)) { 266 bytes += sizeof(dst->errors[0]) * 2; 267 268 ret = darray_make_room(table, bytes); 269 if (ret) 270 return ret; 271 272 dst = (void *) &table->data[dst_offset]; 273 dst->nr_errors = cpu_to_le16(nr_errors + 1); 274 275 /* open coded __set_bit_le64, as dst is packed and 276 * dst->recovery_passes is misaligned */ 277 unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; 278 dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64)); 279 280 dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong); 281 } 282 break; 283 } 284 285 return ret; 286} 287 288static inline const struct bch_sb_field_downgrade_entry * 289downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) 290{ 291 return (void *) &e->errors[le16_to_cpu(e->nr_errors)]; 292} 293 294#define for_each_downgrade_entry(_d, _i) \ 295 for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \ 296 (void *) _i < vstruct_end(&(_d)->field) && \ 297 (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) && \ 298 (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field); \ 299 _i = downgrade_entry_next_c(_i)) 300 301static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, 302 enum bch_validate_flags flags, struct printbuf *err) 303{ 304 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 305 306 for (const struct bch_sb_field_downgrade_entry *i = e->entries; 307 (void *) i < vstruct_end(&e->field); 308 i = downgrade_entry_next_c(i)) { 309 /* 310 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but 311 * section sizes are 8 byte aligned - an empty entry spanning 312 * the end of the section is allowed (and ignored): 313 */ 314 if ((void *) &i->errors[0] > vstruct_end(&e->field)) 315 break; 316 317 if (flags & BCH_VALIDATE_write && 318 (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) { 319 prt_printf(err, "downgrade entry overruns end of superblock section"); 320 return -BCH_ERR_invalid_sb_downgrade; 321 } 322 323 if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) != 324 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) { 325 prt_printf(err, "downgrade entry with mismatched major version (%u != %u)", 326 BCH_VERSION_MAJOR(le16_to_cpu(i->version)), 327 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))); 328 return -BCH_ERR_invalid_sb_downgrade; 329 } 330 } 331 332 return 0; 333} 334 335static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, 336 struct bch_sb_field *f) 337{ 338 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 339 340 if (out->nr_tabstops <= 1) 341 printbuf_tabstop_push(out, 16); 342 343 for_each_downgrade_entry(e, i) { 344 prt_str(out, "version:\t"); 345 bch2_version_to_text(out, le16_to_cpu(i->version)); 346 prt_newline(out); 347 348 prt_str(out, "recovery passes:\t"); 349 prt_bitflags(out, bch2_recovery_passes, 350 bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0]))); 351 prt_newline(out); 352 353 prt_str(out, "errors:\t"); 354 bool first = true; 355 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 356 if (!first) 357 prt_char(out, ','); 358 first = false; 359 bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j])); 360 } 361 prt_newline(out); 362 } 363} 364 365const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { 366 .validate = bch2_sb_downgrade_validate, 367 .to_text = bch2_sb_downgrade_to_text, 368}; 369 370int bch2_sb_downgrade_update(struct bch_fs *c) 371{ 372 if (!test_bit(BCH_FS_btree_running, &c->flags)) 373 return 0; 374 375 darray_char table = {}; 376 int ret = 0; 377 378 for (const struct upgrade_downgrade_entry *src = downgrade_table; 379 src < downgrade_table + ARRAY_SIZE(downgrade_table); 380 src++) { 381 if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 382 continue; 383 384 if (src->version < c->sb.version_incompat) 385 continue; 386 387 struct bch_sb_field_downgrade_entry *dst; 388 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; 389 390 ret = darray_make_room(&table, bytes); 391 if (ret) 392 goto out; 393 394 dst = (void *) &darray_top(table); 395 dst->version = cpu_to_le16(src->version); 396 dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes)); 397 dst->recovery_passes[1] = 0; 398 dst->nr_errors = cpu_to_le16(src->nr_errors); 399 for (unsigned i = 0; i < src->nr_errors; i++) 400 dst->errors[i] = cpu_to_le16(src->errors[i]); 401 402 ret = downgrade_table_extra(c, &table); 403 if (ret) 404 goto out; 405 406 if (!dst->recovery_passes[0] && 407 !dst->recovery_passes[1] && 408 !dst->nr_errors) 409 continue; 410 411 table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); 412 } 413 414 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 415 416 unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); 417 418 if (d && le32_to_cpu(d->field.u64s) > sb_u64s) 419 goto out; 420 421 d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); 422 if (!d) { 423 ret = bch_err_throw(c, ENOSPC_sb_downgrade); 424 goto out; 425 } 426 427 memcpy(d->entries, table.data, table.nr); 428 memset_u64s_tail(d->entries, 0, table.nr); 429out: 430 darray_exit(&table); 431 return ret; 432} 433 434void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) 435{ 436 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 437 if (!d) 438 return; 439 440 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 441 442 for_each_downgrade_entry(d, i) { 443 unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version)); 444 if (new_minor < minor && minor <= old_minor) { 445 ext->recovery_passes_required[0] |= i->recovery_passes[0]; 446 ext->recovery_passes_required[1] |= i->recovery_passes[1]; 447 448 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 449 unsigned e = le16_to_cpu(i->errors[j]); 450 if (e < BCH_FSCK_ERR_MAX) 451 __set_bit(e, c->sb.errors_silent); 452 if (e < sizeof(ext->errors_silent) * 8) 453 __set_bit_le64(e, ext->errors_silent); 454 } 455 } 456 } 457}