Merge tag 'bcachefs-2024-10-05' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
"A lot of little fixes, bigger ones include:

- bcachefs's __wait_on_freeing_inode() was broken in rc1 due to vfs
changes, now fixed along with another lost wakeup

- fragmentation LRU fixes; fsck now repairs successfully (this is the
data structure copygc uses); along with some nice simplification.

- Rework logged op error handling, so that if logged op replay errors
(due to another filesystem error) we delete the logged op instead
of going into an infinite loop)

- Various small filesystem connectivitity repair fixes"

* tag 'bcachefs-2024-10-05' of git://evilpiepirate.org/bcachefs:
bcachefs: Rework logged op error handling
bcachefs: Add warn param to subvol_get_snapshot, peek_inode
bcachefs: Kill snapshot arg to fsck_write_inode()
bcachefs: Check for unlinked, non-empty dirs in check_inode()
bcachefs: Check for unlinked inodes with dirents
bcachefs: Check for directories with no backpointers
bcachefs: Kill alloc_v4.fragmentation_lru
bcachefs: minor lru fsck fixes
bcachefs: Mark more errors AUTOFIX
bcachefs: Make sure we print error that causes fsck to bail out
bcachefs: bkey errors are only AUTOFIX during read
bcachefs: Create lost+found in correct snapshot
bcachefs: Fix reattach_inode()
bcachefs: Add missing wakeup to bch2_inode_hash_remove()
bcachefs: Fix trans_commit disk accounting revert
bcachefs: Fix bch2_inode_is_open() check
bcachefs: Fix return type of dirent_points_to_inode_nowarn()
bcachefs: Fix bad shift in bch2_read_flag_list()

+20 -10
fs/bcachefs/alloc_background.c
··· 332 332 a->io_time[1] = swab64(a->io_time[1]); 333 333 a->stripe = swab32(a->stripe); 334 334 a->nr_external_backpointers = swab32(a->nr_external_backpointers); 335 - a->fragmentation_lru = swab64(a->fragmentation_lru); 336 335 a->stripe_sectors = swab32(a->stripe_sectors); 337 336 338 337 bps = alloc_v4_backpointers(a); ··· 346 347 { 347 348 struct bch_alloc_v4 _a; 348 349 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); 350 + struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL; 349 351 350 352 prt_newline(out); 351 353 printbuf_indent_add(out, 2); ··· 364 364 prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); 365 365 prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); 366 366 prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); 367 - prt_printf(out, "fragmentation %llu\n", a->fragmentation_lru); 367 + 368 + if (ca) 369 + prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); 368 370 prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); 369 371 printbuf_indent_sub(out, 2); 372 + 373 + bch2_dev_put(ca); 370 374 } 371 375 372 376 void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) ··· 886 882 goto err; 887 883 } 888 884 889 - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, ca); 890 - if (old_a->fragmentation_lru != new_a->fragmentation_lru) { 885 + old_lru = alloc_lru_idx_fragmentation(*old_a, ca); 886 + new_lru = alloc_lru_idx_fragmentation(*new_a, ca); 887 + if (old_lru != new_lru) { 891 888 ret = bch2_lru_change(trans, 892 889 BCH_LRU_FRAGMENTATION_START, 893 890 bucket_to_u64(new.k->p), 894 - old_a->fragmentation_lru, new_a->fragmentation_lru); 891 + old_lru, new_lru); 895 892 if (ret) 896 893 goto err; 897 894 } ··· 1634 1629 if (ret) 1635 1630 return ret; 1636 1631 1632 + struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); 1633 + if (!ca) 1634 + return 0; 1635 + 1637 1636 a = bch2_alloc_to_v4(alloc_k, &a_convert); 1638 1637 1639 - if (a->fragmentation_lru) { 1638 + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); 1639 + if (lru_idx) { 1640 1640 ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, 1641 - a->fragmentation_lru, 1642 - alloc_k, last_flushed); 1641 + lru_idx, alloc_k, last_flushed); 1643 1642 if (ret) 1644 - return ret; 1643 + goto err; 1645 1644 } 1646 1645 1647 1646 if (a->data_type != BCH_DATA_cached) 1648 - return 0; 1647 + goto err; 1649 1648 1650 1649 if (fsck_err_on(!a->io_time[READ], 1651 1650 trans, alloc_key_cached_but_read_time_zero, ··· 1678 1669 goto err; 1679 1670 err: 1680 1671 fsck_err: 1672 + bch2_dev_put(ca); 1681 1673 printbuf_exit(&buf); 1682 1674 return ret; 1683 1675 }
+1 -1
fs/bcachefs/alloc_background_format.h
··· 70 70 __u32 stripe; 71 71 __u32 nr_external_backpointers; 72 72 /* end of fields in original version of alloc_v4 */ 73 - __u64 fragmentation_lru; 73 + __u64 _fragmentation_lru; /* obsolete */ 74 74 __u32 stripe_sectors; 75 75 __u32 pad; 76 76 } __packed __aligned(8);
-3
fs/bcachefs/btree_gc.c
··· 828 828 return ret; 829 829 } 830 830 831 - gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); 832 - 833 831 if (fsck_err_on(new.data_type != gc.data_type, 834 832 trans, alloc_key_data_type_wrong, 835 833 "bucket %llu:%llu gen %u has wrong data_type" ··· 855 857 copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); 856 858 copy_bucket_field(alloc_key_stripe_wrong, stripe); 857 859 copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); 858 - copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); 859 860 #undef copy_bucket_field 860 861 861 862 if (!bch2_alloc_v4_cmp(*old, new))
+2 -1
fs/bcachefs/btree_trans_commit.c
··· 832 832 for (struct jset_entry *entry2 = trans->journal_entries; 833 833 entry2 != entry; 834 834 entry2 = vstruct_next(entry2)) 835 - if (jset_entry_is_key(entry2) && entry2->start->k.type == KEY_TYPE_accounting) { 835 + if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && 836 + entry2->start->k.type == KEY_TYPE_accounting) { 836 837 struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); 837 838 838 839 bch2_accounting_neg(a);
+18 -5
fs/bcachefs/error.c
··· 393 393 !(flags & FSCK_CAN_IGNORE))) 394 394 ret = -BCH_ERR_fsck_errors_not_fixed; 395 395 396 + bool exiting = 397 + test_bit(BCH_FS_fsck_running, &c->flags) && 398 + (ret != -BCH_ERR_fsck_fix && 399 + ret != -BCH_ERR_fsck_ignore); 400 + 401 + if (exiting) 402 + print = true; 403 + 396 404 if (print) { 397 405 if (bch2_fs_stdio_redirect(c)) 398 406 bch2_print(c, "%s\n", out->buf); ··· 408 400 bch2_print_string_as_lines(KERN_ERR, out->buf); 409 401 } 410 402 411 - if (test_bit(BCH_FS_fsck_running, &c->flags) && 412 - (ret != -BCH_ERR_fsck_fix && 413 - ret != -BCH_ERR_fsck_ignore)) 403 + if (exiting) 414 404 bch_err(c, "Unable to continue, halting"); 415 405 else if (suppressing) 416 406 bch_err(c, "Ratelimiting new instances of previous error"); ··· 436 430 437 431 int __bch2_bkey_fsck_err(struct bch_fs *c, 438 432 struct bkey_s_c k, 439 - enum bch_fsck_flags flags, 433 + enum bch_validate_flags validate_flags, 440 434 enum bch_sb_error_id err, 441 435 const char *fmt, ...) 442 436 { 437 + if (validate_flags & BCH_VALIDATE_silent) 438 + return -BCH_ERR_fsck_delete_bkey; 439 + 440 + unsigned fsck_flags = 0; 441 + if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) 442 + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; 443 + 443 444 struct printbuf buf = PRINTBUF; 444 445 va_list args; 445 446 ··· 458 445 va_end(args); 459 446 prt_str(&buf, ": delete?"); 460 447 461 - int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); 448 + int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); 462 449 printbuf_exit(&buf); 463 450 return ret; 464 451 }
+3 -6
fs/bcachefs/error.h
··· 167 167 #define fsck_err_on(cond, c, _err_type, ...) \ 168 168 __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) 169 169 170 + enum bch_validate_flags; 170 171 __printf(5, 6) 171 172 int __bch2_bkey_fsck_err(struct bch_fs *, 172 173 struct bkey_s_c, 173 - enum bch_fsck_flags, 174 + enum bch_validate_flags, 174 175 enum bch_sb_error_id, 175 176 const char *, ...); 176 177 ··· 181 180 */ 182 181 #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ 183 182 do { \ 184 - if ((flags & BCH_VALIDATE_silent)) { \ 185 - ret = -BCH_ERR_fsck_delete_bkey; \ 186 - goto fsck_err; \ 187 - } \ 188 - int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\ 183 + int _ret = __bch2_bkey_fsck_err(c, k, flags, \ 189 184 BCH_FSCK_ERR_##_err_type, \ 190 185 _err_msg, ##__VA_ARGS__); \ 191 186 if (_ret != -BCH_ERR_fsck_fix && \
+22 -13
fs/bcachefs/fs.c
··· 174 174 .automatic_shrinking = true, 175 175 }; 176 176 177 - static void __wait_on_freeing_inode(struct inode *inode) 178 - { 179 - wait_queue_head_t *wq; 180 - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 181 - wq = bit_waitqueue(&inode->i_state, __I_NEW); 182 - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 183 - spin_unlock(&inode->i_lock); 184 - schedule(); 185 - finish_wait(wq, &wait.wq_entry); 186 - } 187 - 188 177 struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) 189 178 { 190 179 return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); 180 + } 181 + 182 + static void __wait_on_freeing_inode(struct bch_fs *c, 183 + struct bch_inode_info *inode, 184 + subvol_inum inum) 185 + { 186 + wait_queue_head_t *wq; 187 + DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); 188 + wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW); 189 + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 190 + spin_unlock(&inode->v.i_lock); 191 + 192 + if (__bch2_inode_hash_find(c, inum) == inode) 193 + schedule_timeout(HZ * 10); 194 + finish_wait(wq, &wait.wq_entry); 191 195 } 192 196 193 197 static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans, ··· 208 204 } 209 205 if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { 210 206 if (!trans) { 211 - __wait_on_freeing_inode(&inode->v); 207 + __wait_on_freeing_inode(c, inode, inum); 212 208 } else { 213 209 bch2_trans_unlock(trans); 214 - __wait_on_freeing_inode(&inode->v); 210 + __wait_on_freeing_inode(c, inode, inum); 215 211 int ret = bch2_trans_relock(trans); 216 212 if (ret) 217 213 return ERR_PTR(ret); ··· 236 232 &inode->hash, bch2_vfs_inodes_params); 237 233 BUG_ON(ret); 238 234 inode->v.i_hash.pprev = NULL; 235 + /* 236 + * This pairs with the bch2_inode_hash_find() -> 237 + * __wait_on_freeing_inode() path 238 + */ 239 + inode_wake_up_bit(&inode->v, __I_NEW); 239 240 } 240 241 } 241 242
+124 -70
fs/bcachefs/fsck.c
··· 28 28 inode->bi_dir_offset == d.k->p.offset; 29 29 } 30 30 31 - static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, 32 - struct bch_inode_unpacked *inode) 31 + static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, 32 + struct bch_inode_unpacked *inode) 33 33 { 34 34 if (d.v->d_type == DT_SUBVOL 35 35 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol ··· 137 137 return ret; 138 138 } 139 139 140 - static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 141 - struct bch_inode_unpacked *inode, 142 - u32 *snapshot) 140 + static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot, 141 + struct bch_inode_unpacked *inode) 143 142 { 144 143 struct btree_iter iter; 145 144 struct bkey_s_c k; 146 145 int ret; 147 146 148 147 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 149 - SPOS(0, inode_nr, *snapshot), 0); 148 + SPOS(0, inode_nr, snapshot), 0); 150 149 ret = bkey_err(k); 151 150 if (ret) 152 151 goto err; ··· 153 154 ret = bkey_is_inode(k.k) 154 155 ? bch2_inode_unpack(k, inode) 155 156 : -BCH_ERR_ENOENT_inode; 156 - if (!ret) 157 - *snapshot = iter.pos.snapshot; 158 157 err: 159 158 bch2_trans_iter_exit(trans, &iter); 160 159 return ret; ··· 247 250 248 251 struct bch_inode_unpacked root_inode; 249 252 struct bch_hash_info root_hash_info; 250 - u32 root_inode_snapshot = snapshot; 251 - ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 253 + ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); 252 254 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 253 255 root_inum.inum, le32_to_cpu(st.master_subvol)); 254 256 if (ret) ··· 273 277 * The bch2_check_dirents pass has already run, dangling dirents 274 278 * shouldn't exist here: 275 279 */ 276 - ret = lookup_inode(trans, inum, lostfound, &snapshot); 280 + ret = lookup_inode(trans, inum, snapshot, lostfound); 277 281 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 278 282 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 279 283 return ret; 280 284 281 285 create_lostfound: 282 286 /* 287 + * we always create lost+found in the root snapshot; we don't want 288 + * different branches of the snapshot tree to have different lost+found 289 + */ 290 + snapshot = le32_to_cpu(st.root_snapshot); 291 + /* 283 292 * XXX: we could have a nicer log message here if we had a nice way to 284 293 * walk backpointers to print a path 285 294 */ 286 - bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 295 + bch_notice(c, "creating lost+found in subvol %llu snapshot %u", 296 + root_inum.subvol, le32_to_cpu(st.root_snapshot)); 287 297 288 298 u64 now = bch2_current_time(c); 289 299 struct btree_iter lostfound_iter = { NULL }; ··· 298 296 bch2_inode_init_early(c, lostfound); 299 297 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 300 298 lostfound->bi_dir = root_inode.bi_inum; 299 + lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); 301 300 302 301 root_inode.bi_nlink++; 303 302 ··· 326 323 return ret; 327 324 } 328 325 329 - static int reattach_inode(struct btree_trans *trans, 330 - struct bch_inode_unpacked *inode, 331 - u32 inode_snapshot) 326 + static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) 332 327 { 333 328 struct bch_fs *c = trans->c; 334 329 struct bch_hash_info dir_hash; ··· 334 333 char name_buf[20]; 335 334 struct qstr name; 336 335 u64 dir_offset = 0; 337 - u32 dirent_snapshot = inode_snapshot; 336 + u32 dirent_snapshot = inode->bi_snapshot; 338 337 int ret; 339 338 340 339 if (inode->bi_subvol) { ··· 355 354 if (ret) 356 355 return ret; 357 356 358 - if (S_ISDIR(inode->bi_mode)) { 359 - lostfound.bi_nlink++; 357 + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); 360 358 361 - ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 362 - if (ret) 363 - return ret; 359 + /* ensure lost+found inode is also present in inode snapshot */ 360 + if (!inode->bi_subvol) { 361 + BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot)); 362 + lostfound.bi_snapshot = inode->bi_snapshot; 364 363 } 364 + 365 + ret = __bch2_fsck_write_inode(trans, &lostfound); 366 + if (ret) 367 + return ret; 365 368 366 369 dir_hash = bch2_hash_info_init(c, &lostfound); 367 370 ··· 388 383 inode->bi_dir = lostfound.bi_inum; 389 384 inode->bi_dir_offset = dir_offset; 390 385 391 - return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 386 + return __bch2_fsck_write_inode(trans, inode); 392 387 } 393 388 394 389 static int remove_backpointer(struct btree_trans *trans, ··· 427 422 if (ret) 428 423 return ret; 429 424 430 - ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 425 + ret = reattach_inode(trans, &inode); 431 426 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 432 427 return ret; 433 428 } ··· 545 540 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); 546 541 new_inode.bi_size = i_size; 547 542 new_inode.bi_inum = inum; 543 + new_inode.bi_snapshot = snapshot; 548 544 549 - return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 545 + return __bch2_fsck_write_inode(trans, &new_inode); 550 546 } 551 547 552 548 struct snapshots_seen { ··· 1030 1024 bool full) 1031 1025 { 1032 1026 struct bch_fs *c = trans->c; 1027 + struct printbuf buf = PRINTBUF; 1033 1028 struct bch_inode_unpacked u; 1034 1029 bool do_update = false; 1035 1030 int ret; ··· 1064 1057 trans, inode_snapshot_mismatch, 1065 1058 "inodes in different snapshots don't match")) { 1066 1059 bch_err(c, "repair not implemented yet"); 1067 - return -BCH_ERR_fsck_repair_unimplemented; 1060 + ret = -BCH_ERR_fsck_repair_unimplemented; 1061 + goto err_noprint; 1062 + } 1063 + 1064 + if (u.bi_dir || u.bi_dir_offset) { 1065 + ret = check_inode_dirent_inode(trans, &u, &do_update); 1066 + if (ret) 1067 + goto err; 1068 + } 1069 + 1070 + if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), 1071 + trans, inode_unlinked_but_has_dirent, 1072 + "inode unlinked but has dirent\n%s", 1073 + (printbuf_reset(&buf), 1074 + bch2_inode_unpacked_to_text(&buf, &u), 1075 + buf.buf))) { 1076 + u.bi_flags &= ~BCH_INODE_unlinked; 1077 + do_update = true; 1078 + } 1079 + 1080 + if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) { 1081 + /* Check for this early so that check_unreachable_inode() will reattach it */ 1082 + 1083 + ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot); 1084 + if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty) 1085 + goto err; 1086 + 1087 + fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty, 1088 + "dir unlinked but not empty\n%s", 1089 + (printbuf_reset(&buf), 1090 + bch2_inode_unpacked_to_text(&buf, &u), 1091 + buf.buf)); 1092 + u.bi_flags &= ~BCH_INODE_unlinked; 1093 + do_update = true; 1094 + ret = 0; 1068 1095 } 1069 1096 1070 1097 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && ··· 1111 1070 1112 1071 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1113 1072 1114 - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1073 + ret = __bch2_fsck_write_inode(trans, &u); 1115 1074 1116 1075 bch_err_msg(c, ret, "in fsck updating inode"); 1117 1076 if (ret) 1118 - return ret; 1077 + goto err_noprint; 1119 1078 1120 1079 if (!bpos_eq(new_min_pos, POS_MIN)) 1121 1080 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1122 - return 0; 1081 + goto err_noprint; 1123 1082 } 1124 1083 1125 1084 if (u.bi_flags & BCH_INODE_unlinked) { ··· 1136 1095 */ 1137 1096 ret = check_inode_deleted_list(trans, k.k->p); 1138 1097 if (ret < 0) 1139 - return ret; 1098 + goto err_noprint; 1140 1099 1141 1100 fsck_err_on(!ret, 1142 1101 trans, unlinked_inode_not_on_deleted_list, ··· 1147 1106 if (ret) 1148 1107 goto err; 1149 1108 } else { 1150 - if (fsck_err_on(bch2_inode_is_open(c, k.k->p), 1109 + if (fsck_err_on(!bch2_inode_is_open(c, k.k->p), 1151 1110 trans, inode_unlinked_and_not_open, 1152 1111 "inode %llu%u unlinked and not open", 1153 1112 u.bi_inum, u.bi_snapshot)) { 1154 1113 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1155 1114 bch_err_msg(c, ret, "in fsck deleting inode"); 1156 - return ret; 1115 + goto err_noprint; 1157 1116 } 1158 1117 } 1159 1118 } ··· 1218 1177 do_update = true; 1219 1178 } 1220 1179 1221 - if (u.bi_dir || u.bi_dir_offset) { 1222 - ret = check_inode_dirent_inode(trans, &u, &do_update); 1223 - if (ret) 1224 - goto err; 1225 - } 1226 - 1227 1180 if (fsck_err_on(u.bi_parent_subvol && 1228 1181 (u.bi_subvol == 0 || 1229 1182 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), ··· 1259 1224 } 1260 1225 do_update: 1261 1226 if (do_update) { 1262 - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1227 + ret = __bch2_fsck_write_inode(trans, &u); 1263 1228 bch_err_msg(c, ret, "in fsck updating inode"); 1264 1229 if (ret) 1265 - return ret; 1230 + goto err_noprint; 1266 1231 } 1267 1232 err: 1268 1233 fsck_err: 1269 1234 bch_err_fn(c, ret); 1235 + err_noprint: 1236 + printbuf_exit(&buf); 1270 1237 return ret; 1271 1238 } 1272 1239 ··· 1384 1347 w->last_pos.inode, i->snapshot, 1385 1348 i->inode.bi_sectors, i->count)) { 1386 1349 i->inode.bi_sectors = i->count; 1387 - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1350 + ret = bch2_fsck_write_inode(trans, &i->inode); 1388 1351 if (ret) 1389 1352 break; 1390 1353 } ··· 1826 1789 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1827 1790 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1828 1791 i->inode.bi_nlink = i->count; 1829 - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1792 + ret = bch2_fsck_write_inode(trans, &i->inode); 1830 1793 if (ret) 1831 1794 break; 1832 1795 } ··· 1847 1810 static int check_dirent_inode_dirent(struct btree_trans *trans, 1848 1811 struct btree_iter *iter, 1849 1812 struct bkey_s_c_dirent d, 1850 - struct bch_inode_unpacked *target, 1851 - u32 target_snapshot) 1813 + struct bch_inode_unpacked *target) 1852 1814 { 1853 1815 struct bch_fs *c = trans->c; 1854 1816 struct printbuf buf = PRINTBUF; ··· 1856 1820 1857 1821 if (inode_points_to_dirent(target, d)) 1858 1822 return 0; 1823 + 1824 + if (!target->bi_dir && 1825 + !target->bi_dir_offset) { 1826 + fsck_err_on(S_ISDIR(target->bi_mode), 1827 + trans, inode_dir_missing_backpointer, 1828 + "directory with missing backpointer\n%s", 1829 + (printbuf_reset(&buf), 1830 + bch2_bkey_val_to_text(&buf, c, d.s_c), 1831 + prt_printf(&buf, "\n"), 1832 + bch2_inode_unpacked_to_text(&buf, target), 1833 + buf.buf)); 1834 + 1835 + fsck_err_on(target->bi_flags & BCH_INODE_unlinked, 1836 + trans, inode_unlinked_but_has_dirent, 1837 + "inode unlinked but has dirent\n%s", 1838 + (printbuf_reset(&buf), 1839 + bch2_bkey_val_to_text(&buf, c, d.s_c), 1840 + prt_printf(&buf, "\n"), 1841 + bch2_inode_unpacked_to_text(&buf, target), 1842 + buf.buf)); 1843 + 1844 + target->bi_flags &= ~BCH_INODE_unlinked; 1845 + target->bi_dir = d.k->p.inode; 1846 + target->bi_dir_offset = d.k->p.offset; 1847 + return __bch2_fsck_write_inode(trans, target); 1848 + } 1859 1849 1860 1850 if (bch2_inode_should_have_bp(target) && 1861 1851 !fsck_err(trans, inode_wrong_backpointer, ··· 1892 1830 buf.buf))) 1893 1831 goto err; 1894 1832 1895 - if (!target->bi_dir && 1896 - !target->bi_dir_offset) { 1897 - target->bi_dir = d.k->p.inode; 1898 - target->bi_dir_offset = d.k->p.offset; 1899 - return __bch2_fsck_write_inode(trans, target, target_snapshot); 1900 - } 1901 - 1902 1833 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1903 - SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1834 + SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot)); 1904 1835 ret = bkey_err(bp_dirent); 1905 1836 if (ret && !bch2_err_matches(ret, ENOENT)) 1906 1837 goto err; ··· 1906 1851 "inode %llu:%u has wrong backpointer:\n" 1907 1852 "got %llu:%llu\n" 1908 1853 "should be %llu:%llu", 1909 - target->bi_inum, target_snapshot, 1854 + target->bi_inum, target->bi_snapshot, 1910 1855 target->bi_dir, 1911 1856 target->bi_dir_offset, 1912 1857 d.k->p.inode, 1913 1858 d.k->p.offset)) { 1914 1859 target->bi_dir = d.k->p.inode; 1915 1860 target->bi_dir_offset = d.k->p.offset; 1916 - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1861 + ret = __bch2_fsck_write_inode(trans, target); 1917 1862 goto out; 1918 1863 } 1919 1864 ··· 1928 1873 trans, inode_dir_multiple_links, 1929 1874 "%s %llu:%u with multiple links\n%s", 1930 1875 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1931 - target->bi_inum, target_snapshot, buf.buf)) { 1876 + target->bi_inum, target->bi_snapshot, buf.buf)) { 1932 1877 ret = __remove_dirent(trans, d.k->p); 1933 1878 goto out; 1934 1879 } ··· 1941 1886 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1942 1887 trans, inode_multiple_links_but_nlink_0, 1943 1888 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1944 - target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1889 + target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1945 1890 target->bi_nlink++; 1946 1891 target->bi_flags &= ~BCH_INODE_unlinked; 1947 - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1892 + ret = __bch2_fsck_write_inode(trans, target); 1948 1893 if (ret) 1949 1894 goto err; 1950 1895 } ··· 1961 1906 static int check_dirent_target(struct btree_trans *trans, 1962 1907 struct btree_iter *iter, 1963 1908 struct bkey_s_c_dirent d, 1964 - struct bch_inode_unpacked *target, 1965 - u32 target_snapshot) 1909 + struct bch_inode_unpacked *target) 1966 1910 { 1967 1911 struct bch_fs *c = trans->c; 1968 1912 struct bkey_i_dirent *n; 1969 1913 struct printbuf buf = PRINTBUF; 1970 1914 int ret = 0; 1971 1915 1972 - ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1916 + ret = check_dirent_inode_dirent(trans, iter, d, target); 1973 1917 if (ret) 1974 1918 goto err; 1975 1919 ··· 2127 2073 u64 target_inum = le64_to_cpu(s.v->inode); 2128 2074 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 2129 2075 2130 - ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 2076 + ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root); 2131 2077 if (ret && !bch2_err_matches(ret, ENOENT)) 2132 2078 goto err; 2133 2079 ··· 2143 2089 target_inum, 2144 2090 subvol_root.bi_parent_subvol, parent_subvol)) { 2145 2091 subvol_root.bi_parent_subvol = parent_subvol; 2146 - ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 2092 + subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot); 2093 + ret = __bch2_fsck_write_inode(trans, &subvol_root); 2147 2094 if (ret) 2148 2095 goto err; 2149 2096 } 2150 2097 2151 - ret = check_dirent_target(trans, iter, d, &subvol_root, 2152 - target_snapshot); 2098 + ret = check_dirent_target(trans, iter, d, &subvol_root); 2153 2099 if (ret) 2154 2100 goto err; 2155 2101 out: ··· 2242 2188 } 2243 2189 2244 2190 darray_for_each(target->inodes, i) { 2245 - ret = check_dirent_target(trans, iter, d, 2246 - &i->inode, i->snapshot); 2191 + ret = check_dirent_target(trans, iter, d, &i->inode); 2247 2192 if (ret) 2248 2193 goto err; 2249 2194 } ··· 2383 2330 goto err; 2384 2331 } 2385 2332 2386 - ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2333 + ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode); 2387 2334 if (ret && !bch2_err_matches(ret, ENOENT)) 2388 2335 return ret; 2389 2336 ··· 2396 2343 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2397 2344 0, NULL); 2398 2345 root_inode.bi_inum = inum; 2346 + root_inode.bi_snapshot = snapshot; 2399 2347 2400 - ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2348 + ret = __bch2_fsck_write_inode(trans, &root_inode); 2401 2349 bch_err_msg(c, ret, "writing root inode"); 2402 2350 } 2403 2351 err: ··· 2565 2511 (printbuf_reset(&buf), 2566 2512 bch2_bkey_val_to_text(&buf, c, inode_k), 2567 2513 buf.buf))) 2568 - ret = reattach_inode(trans, &inode, snapshot); 2514 + ret = reattach_inode(trans, &inode); 2569 2515 goto out; 2570 2516 } 2571 2517 ··· 2611 2557 if (ret) 2612 2558 break; 2613 2559 2614 - ret = reattach_inode(trans, &inode, snapshot); 2560 + ret = reattach_inode(trans, &inode); 2615 2561 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2616 2562 } 2617 2563 break; ··· 2841 2787 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2842 2788 bch2_inode_nlink_get(&u), link->count)) { 2843 2789 bch2_inode_nlink_set(&u, link->count); 2844 - ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2790 + ret = __bch2_fsck_write_inode(trans, &u); 2845 2791 } 2846 2792 fsck_err: 2847 2793 return ret;
+15 -29
fs/bcachefs/inode.c
··· 327 327 : bch2_inode_unpack_slowpath(k, unpacked); 328 328 } 329 329 330 - int bch2_inode_peek_nowarn(struct btree_trans *trans, 331 - struct btree_iter *iter, 332 - struct bch_inode_unpacked *inode, 333 - subvol_inum inum, unsigned flags) 330 + int __bch2_inode_peek(struct btree_trans *trans, 331 + struct btree_iter *iter, 332 + struct bch_inode_unpacked *inode, 333 + subvol_inum inum, unsigned flags, 334 + bool warn) 334 335 { 335 - struct bkey_s_c k; 336 336 u32 snapshot; 337 - int ret; 338 - 339 - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 337 + int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn); 340 338 if (ret) 341 339 return ret; 342 340 343 - k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, 344 - SPOS(0, inum.inum, snapshot), 345 - flags|BTREE_ITER_cached); 341 + struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, 342 + SPOS(0, inum.inum, snapshot), 343 + flags|BTREE_ITER_cached); 346 344 ret = bkey_err(k); 347 345 if (ret) 348 346 return ret; ··· 355 357 356 358 return 0; 357 359 err: 360 + if (warn) 361 + bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); 358 362 bch2_trans_iter_exit(trans, iter); 359 - return ret; 360 - } 361 - 362 - int bch2_inode_peek(struct btree_trans *trans, 363 - struct btree_iter *iter, 364 - struct bch_inode_unpacked *inode, 365 - subvol_inum inum, unsigned flags) 366 - { 367 - int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); 368 - bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); 369 363 return ret; 370 364 } 371 365 ··· 377 387 return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); 378 388 } 379 389 380 - int __bch2_fsck_write_inode(struct btree_trans *trans, 381 - struct bch_inode_unpacked *inode, 382 - u32 snapshot) 390 + int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) 383 391 { 384 392 struct bkey_inode_buf *inode_p = 385 393 bch2_trans_kmalloc(trans, sizeof(*inode_p)); ··· 386 398 return PTR_ERR(inode_p); 387 399 388 400 bch2_inode_pack(inode_p, inode); 389 - inode_p->inode.k.p.snapshot = snapshot; 401 + inode_p->inode.k.p.snapshot = inode->bi_snapshot; 390 402 391 403 return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, 392 404 &inode_p->inode.k_i, 393 405 BTREE_UPDATE_internal_snapshot_node); 394 406 } 395 407 396 - int bch2_fsck_write_inode(struct btree_trans *trans, 397 - struct bch_inode_unpacked *inode, 398 - u32 snapshot) 408 + int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) 399 409 { 400 410 int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 401 - __bch2_fsck_write_inode(trans, inode, snapshot)); 411 + __bch2_fsck_write_inode(trans, inode)); 402 412 bch_err_fn(trans->c, ret); 403 413 return ret; 404 414 }
+22 -6
fs/bcachefs/inode.h
··· 97 97 98 98 void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); 99 99 100 - int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *, 101 - struct bch_inode_unpacked *, subvol_inum, unsigned); 102 - int bch2_inode_peek(struct btree_trans *, struct btree_iter *, 103 - struct bch_inode_unpacked *, subvol_inum, unsigned); 100 + int __bch2_inode_peek(struct btree_trans *, struct btree_iter *, 101 + struct bch_inode_unpacked *, subvol_inum, unsigned, bool); 102 + 103 + static inline int bch2_inode_peek_nowarn(struct btree_trans *trans, 104 + struct btree_iter *iter, 105 + struct bch_inode_unpacked *inode, 106 + subvol_inum inum, unsigned flags) 107 + { 108 + return __bch2_inode_peek(trans, iter, inode, inum, flags, false); 109 + } 110 + 111 + static inline int bch2_inode_peek(struct btree_trans *trans, 112 + struct btree_iter *iter, 113 + struct bch_inode_unpacked *inode, 114 + subvol_inum inum, unsigned flags) 115 + { 116 + return __bch2_inode_peek(trans, iter, inode, inum, flags, true); 117 + int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); 118 + return ret; 119 + } 104 120 105 121 int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, 106 122 struct bch_inode_unpacked *, enum btree_iter_update_trigger_flags); ··· 128 112 return bch2_inode_write_flags(trans, iter, inode, 0); 129 113 } 130 114 131 - int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); 132 - int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); 115 + int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); 116 + int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); 133 117 134 118 void bch2_inode_init_early(struct bch_fs *, 135 119 struct bch_inode_unpacked *);
+43 -20
fs/bcachefs/io_misc.c
··· 224 224 225 225 static int truncate_set_isize(struct btree_trans *trans, 226 226 subvol_inum inum, 227 - u64 new_i_size) 227 + u64 new_i_size, 228 + bool warn) 228 229 { 229 230 struct btree_iter iter = { NULL }; 230 231 struct bch_inode_unpacked inode_u; 231 232 int ret; 232 233 233 - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent) ?: 234 + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: 234 235 (inode_u.bi_size = new_i_size, 0) ?: 235 236 bch2_inode_write(trans, &iter, &inode_u); 236 237 ··· 248 247 struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); 249 248 subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; 250 249 u64 new_i_size = le64_to_cpu(op->v.new_i_size); 250 + bool warn_errors = i_sectors_delta != NULL; 251 251 int ret; 252 252 253 253 ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 254 - truncate_set_isize(trans, inum, new_i_size)); 254 + truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); 255 255 if (ret) 256 256 goto err; 257 257 ··· 265 263 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 266 264 ret = 0; 267 265 err: 268 - bch2_logged_op_finish(trans, op_k); 269 - bch_err_fn(c, ret); 266 + if (warn_errors) 267 + bch_err_fn(c, ret); 270 268 return ret; 271 269 } 272 270 ··· 290 288 * resume only proceeding in one of the snapshots 291 289 */ 292 290 down_read(&c->snapshot_create_lock); 293 - int ret = bch2_trans_run(c, 294 - bch2_logged_op_start(trans, &op.k_i) ?: 295 - __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); 291 + struct btree_trans *trans = bch2_trans_get(c); 292 + int ret = bch2_logged_op_start(trans, &op.k_i); 293 + if (ret) 294 + goto out; 295 + ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); 296 + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; 297 + out: 298 + bch2_trans_put(trans); 296 299 up_read(&c->snapshot_create_lock); 297 300 298 301 return ret; ··· 315 308 prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); 316 309 } 317 310 318 - static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) 311 + static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, 312 + u64 offset, s64 len, bool warn) 319 313 { 320 314 struct btree_iter iter; 321 315 struct bch_inode_unpacked inode_u; ··· 325 317 offset <<= 9; 326 318 len <<= 9; 327 319 328 - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); 320 + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); 329 321 if (ret) 330 322 return ret; 331 323 ··· 365 357 u64 len = abs(shift); 366 358 u64 pos = le64_to_cpu(op->v.pos); 367 359 bool insert = shift > 0; 360 + u32 snapshot; 361 + bool warn_errors = i_sectors_delta != NULL; 368 362 int ret = 0; 369 363 370 364 ret = bch2_inum_opts_get(trans, inum, &opts); 365 + if (ret) 366 + return ret; 367 + 368 + /* 369 + * check for missing subvolume before fpunch, as in resume we don't want 370 + * it to be a fatal error 371 + */ 372 + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); 371 373 if (ret) 372 374 return ret; 373 375 ··· 391 373 392 374 if (insert) { 393 375 ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 394 - adjust_i_size(trans, inum, src_offset, len) ?: 376 + adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: 395 377 bch2_logged_op_update(trans, &op->k_i)); 396 378 if (ret) 397 379 goto err; ··· 414 396 struct bkey_i delete, *copy; 415 397 struct bkey_s_c k; 416 398 struct bpos src_pos = POS(inum.inum, src_offset); 417 - u32 snapshot; 418 399 419 400 bch2_trans_begin(trans); 420 401 421 - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 402 + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, 403 + warn_errors); 422 404 if (ret) 423 405 goto btree_err; 424 406 ··· 481 463 482 464 if (!insert) { 483 465 ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 484 - adjust_i_size(trans, inum, src_offset, shift) ?: 466 + adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: 485 467 bch2_logged_op_update(trans, &op->k_i)); 486 468 } else { 487 469 /* We need an inode update to update bi_journal_seq for fsync: */ 488 470 ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 489 - adjust_i_size(trans, inum, 0, 0) ?: 471 + adjust_i_size(trans, inum, 0, 0, warn_errors) ?: 490 472 bch2_logged_op_update(trans, &op->k_i)); 491 473 } 492 474 ··· 495 477 break; 496 478 } 497 479 err: 498 - bch_err_fn(c, ret); 499 - bch2_logged_op_finish(trans, op_k); 500 480 bch2_trans_iter_exit(trans, &iter); 481 + if (warn_errors) 482 + bch_err_fn(c, ret); 501 483 return ret; 502 484 } 503 485 ··· 526 508 * resume only proceeding in one of the snapshots 527 509 */ 528 510 down_read(&c->snapshot_create_lock); 529 - int ret = bch2_trans_run(c, 530 - bch2_logged_op_start(trans, &op.k_i) ?: 531 - __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); 511 + struct btree_trans *trans = bch2_trans_get(c); 512 + int ret = bch2_logged_op_start(trans, &op.k_i); 513 + if (ret) 514 + goto out; 515 + ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); 516 + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; 517 + out: 518 + bch2_trans_put(trans); 532 519 up_read(&c->snapshot_create_lock); 533 520 534 521 return ret;
+9 -7
fs/bcachefs/logged_ops.c
··· 34 34 struct bkey_s_c k) 35 35 { 36 36 struct bch_fs *c = trans->c; 37 - const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); 38 - struct bkey_buf sk; 39 37 u32 restart_count = trans->restart_count; 40 38 struct printbuf buf = PRINTBUF; 41 39 int ret = 0; ··· 44 46 (bch2_bkey_val_to_text(&buf, c, k), 45 47 buf.buf)); 46 48 47 - if (!fn) 48 - return 0; 49 - 49 + struct bkey_buf sk; 50 50 bch2_bkey_buf_init(&sk); 51 51 bch2_bkey_buf_reassemble(&sk, c, k); 52 52 53 - fn->resume(trans, sk.k); 53 + const struct bch_logged_op_fn *fn = logged_op_fn(sk.k->k.type); 54 + if (fn) 55 + fn->resume(trans, sk.k); 56 + 57 + ret = bch2_logged_op_finish(trans, sk.k); 54 58 55 59 bch2_bkey_buf_exit(&sk, c); 56 60 fsck_err: ··· 93 93 __bch2_logged_op_start(trans, k)); 94 94 } 95 95 96 - void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) 96 + int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) 97 97 { 98 98 int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 99 99 bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0)); ··· 113 113 buf.buf, bch2_err_str(ret)); 114 114 printbuf_exit(&buf); 115 115 } 116 + 117 + return ret; 116 118 }
+1 -1
fs/bcachefs/logged_ops.h
··· 15 15 16 16 int bch2_resume_logged_ops(struct bch_fs *); 17 17 int bch2_logged_op_start(struct btree_trans *, struct bkey_i *); 18 - void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); 18 + int bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); 19 19 20 20 #endif /* _BCACHEFS_LOGGED_OPS_H */
+20 -14
fs/bcachefs/lru.c
··· 2 2 3 3 #include "bcachefs.h" 4 4 #include "alloc_background.h" 5 + #include "bkey_buf.h" 5 6 #include "btree_iter.h" 6 7 #include "btree_update.h" 7 8 #include "btree_write_buffer.h" ··· 119 118 static int bch2_check_lru_key(struct btree_trans *trans, 120 119 struct btree_iter *lru_iter, 121 120 struct bkey_s_c lru_k, 122 - struct bpos *last_flushed_pos) 121 + struct bkey_buf *last_flushed) 123 122 { 124 123 struct bch_fs *c = trans->c; 125 124 struct btree_iter iter; ··· 133 132 u64 idx; 134 133 int ret; 135 134 136 - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), 135 + struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_pos); 136 + 137 + if (fsck_err_on(!ca, 137 138 trans, lru_entry_to_invalid_bucket, 138 139 "lru key points to nonexistent device:bucket %llu:%llu", 139 140 alloc_pos.inode, alloc_pos.offset)) 140 - return bch2_btree_delete_at(trans, lru_iter, 0); 141 + return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); 141 142 142 143 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); 143 144 ret = bkey_err(k); ··· 153 150 idx = alloc_lru_idx_read(*a); 154 151 break; 155 152 case BCH_LRU_fragmentation: 156 - idx = a->fragmentation_lru; 153 + idx = alloc_lru_idx_fragmentation(*a, ca); 157 154 break; 158 155 } 159 156 160 157 if (lru_k.k->type != KEY_TYPE_set || 161 158 lru_pos_time(lru_k.k->p) != idx) { 162 - if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { 163 - *last_flushed_pos = lru_k.k->p; 164 - ret = bch2_btree_write_buffer_flush_sync(trans) ?: 165 - -BCH_ERR_transaction_restart_write_buffer_flush; 166 - goto out; 167 - } 159 + ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed); 160 + if (ret) 161 + goto err; 168 162 169 163 if (fsck_err(trans, lru_entry_bad, 170 164 "incorrect lru entry: lru %s time %llu\n" ··· 171 171 lru_pos_time(lru_k.k->p), 172 172 (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), 173 173 (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) 174 - ret = bch2_btree_delete_at(trans, lru_iter, 0); 174 + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); 175 175 } 176 - out: 177 176 err: 178 177 fsck_err: 179 178 bch2_trans_iter_exit(trans, &iter); 179 + bch2_dev_put(ca); 180 180 printbuf_exit(&buf2); 181 181 printbuf_exit(&buf1); 182 182 return ret; ··· 184 184 185 185 int bch2_check_lrus(struct bch_fs *c) 186 186 { 187 - struct bpos last_flushed_pos = POS_MIN; 187 + struct bkey_buf last_flushed; 188 + 189 + bch2_bkey_buf_init(&last_flushed); 190 + bkey_init(&last_flushed.k->k); 191 + 188 192 int ret = bch2_trans_run(c, 189 193 for_each_btree_key_commit(trans, iter, 190 194 BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, 191 195 NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, 192 - bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); 196 + bch2_check_lru_key(trans, &iter, k, &last_flushed))); 197 + 198 + bch2_bkey_buf_exit(&last_flushed, c); 193 199 bch_err_fn(c, ret); 194 200 return ret; 195 201
+1 -1
fs/bcachefs/move.c
··· 692 692 a = bch2_alloc_to_v4(k, &a_convert); 693 693 dirty_sectors = bch2_bucket_sectors_dirty(*a); 694 694 bucket_size = ca->mi.bucket_size; 695 - fragmentation = a->fragmentation_lru; 695 + fragmentation = alloc_lru_idx_fragmentation(*a, ca); 696 696 697 697 ret = bch2_btree_write_buffer_tryflush(trans); 698 698 bch_err_msg(c, ret, "flushing btree write buffer");
+9 -3
fs/bcachefs/movinggc.c
··· 73 73 static int bch2_bucket_is_movable(struct btree_trans *trans, 74 74 struct move_bucket *b, u64 time) 75 75 { 76 + struct bch_fs *c = trans->c; 76 77 struct btree_iter iter; 77 78 struct bkey_s_c k; 78 79 struct bch_alloc_v4 _a; ··· 91 90 if (ret) 92 91 return ret; 93 92 93 + struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); 94 + if (!ca) 95 + goto out; 96 + 94 97 a = bch2_alloc_to_v4(k, &_a); 95 98 b->k.gen = a->gen; 96 99 b->sectors = bch2_bucket_sectors_dirty(*a); 100 + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); 97 101 98 - ret = data_type_movable(a->data_type) && 99 - a->fragmentation_lru && 100 - a->fragmentation_lru <= time; 102 + ret = lru_idx && lru_idx <= time; 101 103 104 + bch2_dev_put(ca); 105 + out: 102 106 bch2_trans_iter_exit(trans, &iter); 103 107 return ret; 104 108 }
+18 -15
fs/bcachefs/sb-errors_format.h
··· 115 115 x(alloc_key_data_type_inconsistency, 101, 0) \ 116 116 x(alloc_key_to_missing_dev_bucket, 102, 0) \ 117 117 x(alloc_key_cached_inconsistency, 103, 0) \ 118 - x(alloc_key_cached_but_read_time_zero, 104, 0) \ 119 - x(alloc_key_to_missing_lru_entry, 105, 0) \ 118 + x(alloc_key_cached_but_read_time_zero, 104, FSCK_AUTOFIX) \ 119 + x(alloc_key_to_missing_lru_entry, 105, FSCK_AUTOFIX) \ 120 120 x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \ 121 121 x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \ 122 122 x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \ ··· 129 129 x(freespace_key_wrong, 115, 0) \ 130 130 x(freespace_hole_missing, 116, 0) \ 131 131 x(bucket_gens_val_size_bad, 117, 0) \ 132 - x(bucket_gens_key_wrong, 118, 0) \ 133 - x(bucket_gens_hole_wrong, 119, 0) \ 134 - x(bucket_gens_to_invalid_dev, 120, 0) \ 135 - x(bucket_gens_to_invalid_buckets, 121, 0) \ 136 - x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \ 132 + x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ 133 + x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ 134 + x(bucket_gens_to_invalid_dev, 120, FSCK_AUTOFIX) \ 135 + x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ 136 + x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ 137 137 x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ 138 138 x(need_discard_freespace_key_bad, 124, 0) \ 139 139 x(backpointer_bucket_offset_wrong, 125, 0) \ 140 140 x(backpointer_to_missing_device, 126, 0) \ 141 141 x(backpointer_to_missing_alloc, 127, 0) \ 142 142 x(backpointer_to_missing_ptr, 128, 0) \ 143 - x(lru_entry_at_time_0, 129, 0) \ 144 - x(lru_entry_to_invalid_bucket, 130, 0) \ 145 - x(lru_entry_bad, 131, 0) \ 143 + x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ 144 + x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ 145 + x(lru_entry_bad, 131, FSCK_AUTOFIX) \ 146 146 x(btree_ptr_val_too_big, 132, 0) \ 147 147 x(btree_ptr_v2_val_too_big, 133, 0) \ 148 148 x(btree_ptr_has_non_ptr, 134, 0) \ ··· 158 158 x(ptr_after_last_bucket, 144, 0) \ 159 159 x(ptr_before_first_bucket, 145, 0) \ 160 160 x(ptr_spans_multiple_buckets, 146, 0) \ 161 - x(ptr_to_missing_backpointer, 147, 0) \ 162 - x(ptr_to_missing_alloc_key, 148, 0) \ 163 - x(ptr_to_missing_replicas_entry, 149, 0) \ 161 + x(ptr_to_missing_backpointer, 147, FSCK_AUTOFIX) \ 162 + x(ptr_to_missing_alloc_key, 148, FSCK_AUTOFIX) \ 163 + x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ 164 164 x(ptr_to_missing_stripe, 150, 0) \ 165 165 x(ptr_to_incorrect_stripe, 151, 0) \ 166 166 x(ptr_gen_newer_than_bucket_gen, 152, 0) \ ··· 194 194 x(snapshot_skiplist_not_normalized, 180, 0) \ 195 195 x(snapshot_skiplist_bad, 181, 0) \ 196 196 x(snapshot_should_not_have_subvol, 182, 0) \ 197 - x(snapshot_to_bad_snapshot_tree, 183, 0) \ 197 + x(snapshot_to_bad_snapshot_tree, 183, FSCK_AUTOFIX) \ 198 198 x(snapshot_bad_depth, 184, 0) \ 199 199 x(snapshot_bad_skiplist, 185, 0) \ 200 200 x(subvol_pos_bad, 186, 0) \ ··· 211 211 x(inode_unlinked_but_clean, 197, 0) \ 212 212 x(inode_unlinked_but_nlink_nonzero, 198, 0) \ 213 213 x(inode_unlinked_and_not_open, 281, 0) \ 214 + x(inode_unlinked_but_has_dirent, 285, 0) \ 214 215 x(inode_checksum_type_invalid, 199, 0) \ 215 216 x(inode_compression_type_invalid, 200, 0) \ 216 217 x(inode_subvol_root_but_not_dir, 201, 0) \ ··· 220 219 x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ 221 220 x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ 222 221 x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ 222 + x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ 223 + x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ 223 224 x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ 224 225 x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ 225 226 x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ ··· 298 295 x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ 299 296 x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ 300 297 x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ 301 - x(MAX, 284, 0) 298 + x(MAX, 287, 0) 302 299 303 300 enum bch_sb_error_id { 304 301 #define x(t, n, ...) BCH_FSCK_ERR_##t = n,
+12 -4
fs/bcachefs/subvolume.c
··· 102 102 inode.bi_inum, inode.bi_snapshot, 103 103 inode.bi_subvol, subvol.k->p.offset)) { 104 104 inode.bi_subvol = subvol.k->p.offset; 105 - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); 105 + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); 106 + ret = __bch2_fsck_write_inode(trans, &inode); 106 107 if (ret) 107 108 goto err; 108 109 } ··· 332 331 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); 333 332 } 334 333 335 - int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, 336 - u32 *snapid) 334 + int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, 335 + u32 *snapid, bool warn) 337 336 { 338 337 struct btree_iter iter; 339 338 struct bkey_s_c_subvolume subvol; ··· 344 343 BTREE_ITER_cached|BTREE_ITER_with_updates, 345 344 subvolume); 346 345 ret = bkey_err(subvol); 347 - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, 346 + 347 + bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c, 348 348 "missing subvolume %u", subvolid); 349 349 350 350 if (likely(!ret)) 351 351 *snapid = le32_to_cpu(subvol.v->snapshot); 352 352 bch2_trans_iter_exit(trans, &iter); 353 353 return ret; 354 + } 355 + 356 + int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, 357 + u32 *snapid) 358 + { 359 + return __bch2_subvolume_get_snapshot(trans, subvolid, snapid, true); 354 360 } 355 361 356 362 static int bch2_subvolume_reparent(struct btree_trans *trans,
+2
fs/bcachefs/subvolume.h
··· 26 26 int bch2_subvol_has_children(struct btree_trans *, u32); 27 27 int bch2_subvolume_get(struct btree_trans *, unsigned, 28 28 bool, int, struct bch_subvolume *); 29 + int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, 30 + u32 *, bool); 29 31 int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); 30 32 31 33 int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
+1 -1
fs/bcachefs/util.c
··· 222 222 break; 223 223 } 224 224 225 - ret |= 1 << flag; 225 + ret |= BIT_ULL(flag); 226 226 } 227 227 228 228 kfree(d);