Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: bch_sb_field_downgrade

Add a new superblock section that contains a list of
{ minor version, recovery passes, errors_to_fix }

that is - a list of recovery passes that must be run when downgrading
past a given version, and a list of errors to silently fix.

The upcoming disk accounting rewrite is not going to be fully
compatible: we're going to have to regenerate accounting both when
upgrading to the new version, and also from downgrading from the new
version, since the new method of doing disk space accounting is a
completely different architecture based on deltas, and synchronizing
them for every jounal entry write to maintain compatibility is going to
be too expensive and impractical.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+255 -9
+1
fs/bcachefs/Makefile
··· 71 71 reflink.o \ 72 72 replicas.o \ 73 73 sb-clean.o \ 74 + sb-downgrade.o \ 74 75 sb-errors.o \ 75 76 sb-members.o \ 76 77 siphash.o \
+19 -1
fs/bcachefs/bcachefs_format.h
··· 1220 1220 x(counters, 10) \ 1221 1221 x(members_v2, 11) \ 1222 1222 x(errors, 12) \ 1223 - x(ext, 13) 1223 + x(ext, 13) \ 1224 + x(downgrade, 14) 1224 1225 1225 1226 enum bch_sb_field_type { 1226 1227 #define x(f, nr) BCH_SB_FIELD_##f = nr, ··· 1639 1638 __le64 errors_silent[8]; 1640 1639 }; 1641 1640 1641 + struct bch_sb_field_downgrade_entry { 1642 + __le16 version; 1643 + __le64 recovery_passes[2]; 1644 + __le16 nr_errors; 1645 + __le16 errors[] __counted_by(nr_errors); 1646 + } __packed __aligned(2); 1647 + 1648 + struct bch_sb_field_downgrade { 1649 + struct bch_sb_field field; 1650 + struct bch_sb_field_downgrade_entry entries[]; 1651 + }; 1652 + 1642 1653 /* Superblock: */ 1643 1654 1644 1655 /* ··· 1664 1651 1665 1652 #define RECOVERY_PASS_ALL_FSCK (1ULL << 63) 1666 1653 1654 + /* 1655 + * field 1: version name 1656 + * field 2: BCH_VERSION(major, minor) 1657 + * field 3: recovery passess required on upgrade 1658 + */ 1667 1659 #define BCH_METADATA_VERSIONS() \ 1668 1660 x(bkey_renumber, BCH_VERSION(0, 10), \ 1669 1661 RECOVERY_PASS_ALL_FSCK) \
+2
fs/bcachefs/errcode.h
··· 95 95 x(ENOSPC, ENOSPC_sb_members) \ 96 96 x(ENOSPC, ENOSPC_sb_members_v2) \ 97 97 x(ENOSPC, ENOSPC_sb_crypt) \ 98 + x(ENOSPC, ENOSPC_sb_downgrade) \ 98 99 x(ENOSPC, ENOSPC_btree_slot) \ 99 100 x(ENOSPC, ENOSPC_snapshot_tree) \ 100 101 x(ENOENT, ENOENT_bkey_type_mismatch) \ ··· 220 219 x(BCH_ERR_invalid_sb, invalid_sb_errors) \ 221 220 x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ 222 221 x(BCH_ERR_invalid_sb, invalid_sb_ext) \ 222 + x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ 223 223 x(BCH_ERR_invalid, invalid_bkey) \ 224 224 x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ 225 225 x(EIO, btree_node_read_err) \
+23 -1
fs/bcachefs/recovery.c
··· 27 27 #include "recovery.h" 28 28 #include "replicas.h" 29 29 #include "sb-clean.h" 30 + #include "sb-downgrade.h" 30 31 #include "snapshot.h" 31 32 #include "subvolume.h" 32 33 #include "super-io.h" ··· 745 744 printbuf_exit(&buf); 746 745 } 747 746 747 + if (bch2_check_version_downgrade(c)) { 748 + struct printbuf buf = PRINTBUF; 749 + 750 + prt_str(&buf, "Version downgrade required:\n"); 751 + 752 + __le64 passes = ext->recovery_passes_required[0]; 753 + bch2_sb_set_downgrade(c, 754 + BCH_VERSION_MINOR(bcachefs_metadata_version_current), 755 + BCH_VERSION_MINOR(c->sb.version)); 756 + passes = ext->recovery_passes_required[0] & ~passes; 757 + if (passes) { 758 + prt_str(&buf, " running recovery passes: "); 759 + prt_bitflags(&buf, bch2_recovery_passes, 760 + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); 761 + } 762 + 763 + bch_info(c, "%s", buf.buf); 764 + printbuf_exit(&buf); 765 + write_sb = true; 766 + } 767 + 748 768 if (check_version_upgrade(c)) 749 769 write_sb = true; 750 770 ··· 1044 1022 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); 1045 1023 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); 1046 1024 1047 - bch2_sb_maybe_downgrade(c); 1025 + bch2_check_version_downgrade(c); 1048 1026 1049 1027 if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { 1050 1028 bch2_sb_upgrade(c, bcachefs_metadata_version_current);
-2
fs/bcachefs/sb-clean.c
··· 332 332 333 333 mutex_lock(&c->sb_lock); 334 334 SET_BCH_SB_CLEAN(c->disk_sb.sb, false); 335 - 336 - bch2_sb_maybe_downgrade(c); 337 335 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); 338 336 339 337 ret = bch2_write_super(c);
+188
fs/bcachefs/sb-downgrade.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Superblock section that contains a list of recovery passes to run when 5 + * downgrading past a given version 6 + */ 7 + 8 + #include "bcachefs.h" 9 + #include "darray.h" 10 + #include "recovery.h" 11 + #include "sb-downgrade.h" 12 + #include "sb-errors.h" 13 + #include "super-io.h" 14 + 15 + /* 16 + * Downgrade table: 17 + * When dowgrading past certain versions, we need to run certain recovery passes 18 + * and fix certain errors: 19 + * 20 + * x(version, recovery_passes, errors...) 21 + */ 22 + 23 + #define DOWNGRADE_TABLE() 24 + 25 + struct downgrade_entry { 26 + u64 recovery_passes; 27 + u16 version; 28 + u16 nr_errors; 29 + const u16 *errors; 30 + }; 31 + 32 + #define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ }; 33 + DOWNGRADE_TABLE() 34 + #undef x 35 + 36 + static const struct downgrade_entry downgrade_table[] = { 37 + #define x(ver, passes, ...) { \ 38 + .recovery_passes = passes, \ 39 + .version = bcachefs_metadata_version_##ver,\ 40 + .nr_errors = ARRAY_SIZE(ver_##errors), \ 41 + .errors = ver_##errors, \ 42 + }, 43 + DOWNGRADE_TABLE() 44 + #undef x 45 + }; 46 + 47 + static inline const struct bch_sb_field_downgrade_entry * 48 + downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) 49 + { 50 + return (void *) &e->errors[le16_to_cpu(e->nr_errors)]; 51 + } 52 + 53 + #define for_each_downgrade_entry(_d, _i) \ 54 + for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \ 55 + (void *) _i < vstruct_end(&(_d)->field) && \ 56 + (void *) &_i->errors[0] < vstruct_end(&(_d)->field); \ 57 + _i = downgrade_entry_next_c(_i)) 58 + 59 + static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, 60 + struct printbuf *err) 61 + { 62 + struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 63 + 64 + for_each_downgrade_entry(e, i) { 65 + if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) != 66 + BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) { 67 + prt_printf(err, "downgrade entry with mismatched major version (%u != %u)", 68 + BCH_VERSION_MAJOR(le16_to_cpu(i->version)), 69 + BCH_VERSION_MAJOR(le16_to_cpu(sb->version))); 70 + return -BCH_ERR_invalid_sb_downgrade; 71 + } 72 + } 73 + 74 + return 0; 75 + } 76 + 77 + static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, 78 + struct bch_sb_field *f) 79 + { 80 + struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 81 + 82 + if (out->nr_tabstops <= 1) 83 + printbuf_tabstop_push(out, 16); 84 + 85 + for_each_downgrade_entry(e, i) { 86 + prt_str(out, "version:"); 87 + prt_tab(out); 88 + bch2_version_to_text(out, le16_to_cpu(i->version)); 89 + prt_newline(out); 90 + 91 + prt_str(out, "recovery passes:"); 92 + prt_tab(out); 93 + prt_bitflags(out, bch2_recovery_passes, 94 + bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0]))); 95 + prt_newline(out); 96 + 97 + prt_str(out, "errors:"); 98 + prt_tab(out); 99 + bool first = true; 100 + for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 101 + if (!first) 102 + prt_char(out, ','); 103 + first = false; 104 + unsigned e = le16_to_cpu(i->errors[j]); 105 + prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)"); 106 + } 107 + prt_newline(out); 108 + } 109 + } 110 + 111 + const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { 112 + .validate = bch2_sb_downgrade_validate, 113 + .to_text = bch2_sb_downgrade_to_text, 114 + }; 115 + 116 + int bch2_sb_downgrade_update(struct bch_fs *c) 117 + { 118 + darray_char table = {}; 119 + int ret = 0; 120 + 121 + for (const struct downgrade_entry *src = downgrade_table; 122 + src < downgrade_table + ARRAY_SIZE(downgrade_table); 123 + src++) { 124 + if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 125 + continue; 126 + 127 + struct bch_sb_field_downgrade_entry *dst; 128 + unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; 129 + 130 + ret = darray_make_room(&table, bytes); 131 + if (ret) 132 + goto out; 133 + 134 + dst = (void *) &darray_top(table); 135 + dst->version = cpu_to_le16(src->version); 136 + dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes); 137 + dst->recovery_passes[1] = 0; 138 + dst->nr_errors = cpu_to_le16(src->nr_errors); 139 + for (unsigned i = 0; i < src->nr_errors; i++) 140 + dst->errors[i] = cpu_to_le16(src->errors[i]); 141 + 142 + table.nr += bytes; 143 + } 144 + 145 + struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 146 + 147 + unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); 148 + 149 + if (d && le32_to_cpu(d->field.u64s) > sb_u64s) 150 + goto out; 151 + 152 + d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); 153 + if (!d) { 154 + ret = -BCH_ERR_ENOSPC_sb_downgrade; 155 + goto out; 156 + } 157 + 158 + memcpy(d->entries, table.data, table.nr); 159 + memset_u64s_tail(d->entries, 0, table.nr); 160 + out: 161 + darray_exit(&table); 162 + return ret; 163 + } 164 + 165 + void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) 166 + { 167 + struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 168 + if (!d) 169 + return; 170 + 171 + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 172 + 173 + for_each_downgrade_entry(d, i) { 174 + unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version)); 175 + if (new_minor < minor && minor <= old_minor) { 176 + ext->recovery_passes_required[0] |= i->recovery_passes[0]; 177 + ext->recovery_passes_required[1] |= i->recovery_passes[1]; 178 + 179 + for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 180 + unsigned e = le16_to_cpu(i->errors[j]); 181 + if (e < BCH_SB_ERR_MAX) 182 + __set_bit(e, c->sb.errors_silent); 183 + if (e < sizeof(ext->errors_silent) * 8) 184 + ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64)); 185 + } 186 + } 187 + } 188 + }
+10
fs/bcachefs/sb-downgrade.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_SB_DOWNGRADE_H 3 + #define _BCACHEFS_SB_DOWNGRADE_H 4 + 5 + extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade; 6 + 7 + int bch2_sb_downgrade_update(struct bch_fs *); 8 + void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); 9 + 10 + #endif /* _BCACHEFS_SB_DOWNGRADE_H */
+1 -3
fs/bcachefs/sb-errors.c
··· 20 20 21 21 static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e) 22 22 { 23 - return e 24 - ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0]) 25 - : 0; 23 + return bch2_sb_field_nr_entries(e); 26 24 } 27 25 28 26 static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
+10 -1
fs/bcachefs/super-io.c
··· 13 13 #include "replicas.h" 14 14 #include "quota.h" 15 15 #include "sb-clean.h" 16 + #include "sb-downgrade.h" 16 17 #include "sb-errors.h" 17 18 #include "sb-members.h" 18 19 #include "super-io.h" ··· 940 939 bch2_sb_members_from_cpu(c); 941 940 bch2_sb_members_cpy_v2_v1(&c->disk_sb); 942 941 bch2_sb_errors_from_cpu(c); 942 + bch2_sb_downgrade_update(c); 943 943 944 944 for_each_online_member(ca, c, i) 945 945 bch2_sb_from_fs(c, ca); ··· 1064 1062 } 1065 1063 1066 1064 /* Downgrade if superblock is at a higher version than currently supported: */ 1067 - void bch2_sb_maybe_downgrade(struct bch_fs *c) 1065 + bool bch2_check_version_downgrade(struct bch_fs *c) 1068 1066 { 1067 + bool ret = bcachefs_metadata_version_current < c->sb.version; 1068 + 1069 1069 lockdep_assert_held(&c->sb_lock); 1070 1070 1071 1071 /* ··· 1081 1077 if (c->sb.version_min > bcachefs_metadata_version_current) 1082 1078 c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); 1083 1079 c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); 1080 + return ret; 1084 1081 } 1085 1082 1086 1083 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) 1087 1084 { 1088 1085 lockdep_assert_held(&c->sb_lock); 1086 + 1087 + if (BCH_VERSION_MAJOR(new_version) > 1088 + BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 1089 + bch2_sb_field_resize(&c->disk_sb, downgrade, 0); 1089 1090 1090 1091 c->disk_sb.sb->version = cpu_to_le16(new_version); 1091 1092 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
+1 -1
fs/bcachefs/super-io.h
··· 93 93 __bch2_check_set_feature(c, feat); 94 94 } 95 95 96 - void bch2_sb_maybe_downgrade(struct bch_fs *); 96 + bool bch2_check_version_downgrade(struct bch_fs *); 97 97 void bch2_sb_upgrade(struct bch_fs *, unsigned); 98 98 99 99 void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,