Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Improve io option handling in data move path

The data move path now correctly picks IO options when inodes in
different snapshots have different options applied.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+108 -51
+82 -51
fs/bcachefs/move.c
··· 20 20 #include "keylist.h" 21 21 #include "move.h" 22 22 #include "replicas.h" 23 + #include "snapshot.h" 23 24 #include "super-io.h" 24 25 #include "trace.h" 25 26 ··· 414 413 return ret; 415 414 } 416 415 417 - static int lookup_inode(struct btree_trans *trans, struct bpos pos, 418 - struct bch_inode_unpacked *inode) 416 + struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, 417 + struct per_snapshot_io_opts *io_opts, 418 + struct bkey_s_c extent_k) 419 + { 420 + struct bch_fs *c = trans->c; 421 + u32 restart_count = trans->restart_count; 422 + int ret = 0; 423 + 424 + if (io_opts->cur_inum != extent_k.k->p.inode) { 425 + struct btree_iter iter; 426 + struct bkey_s_c k; 427 + 428 + io_opts->d.nr = 0; 429 + 430 + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), 431 + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { 432 + if (k.k->p.offset != extent_k.k->p.inode) 433 + break; 434 + 435 + if (!bkey_is_inode(k.k)) 436 + continue; 437 + 438 + struct bch_inode_unpacked inode; 439 + BUG_ON(bch2_inode_unpack(k, &inode)); 440 + 441 + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; 442 + bch2_inode_opts_get(&e.io_opts, trans->c, &inode); 443 + 444 + ret = darray_push(&io_opts->d, e); 445 + if (ret) 446 + break; 447 + } 448 + bch2_trans_iter_exit(trans, &iter); 449 + io_opts->cur_inum = extent_k.k->p.inode; 450 + } 451 + 452 + ret = ret ?: trans_was_restarted(trans, restart_count); 453 + if (ret) 454 + return ERR_PTR(ret); 455 + 456 + if (extent_k.k->p.snapshot) { 457 + struct snapshot_io_opts_entry *i; 458 + darray_for_each(io_opts->d, i) 459 + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) 460 + return &i->io_opts; 461 + } 462 + 463 + return &io_opts->fs_io_opts; 464 + } 465 + 466 + static int bch2_move_get_io_opts_one(struct btree_trans *trans, 467 + struct bch_io_opts *io_opts, 468 + struct bkey_s_c extent_k) 419 469 { 420 470 struct btree_iter iter; 421 471 struct bkey_s_c k; 422 472 int ret; 423 473 424 - bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, 425 - BTREE_ITER_ALL_SNAPSHOTS); 426 - k = bch2_btree_iter_peek(&iter); 427 - ret = bkey_err(k); 428 - if (ret) 429 - goto err; 430 - 431 - if (!k.k || !bkey_eq(k.k->p, pos)) { 432 - ret = -BCH_ERR_ENOENT_inode; 433 - goto err; 474 + /* reflink btree? */ 475 + if (!extent_k.k->p.inode) { 476 + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); 477 + return 0; 434 478 } 435 479 436 - ret = bkey_is_inode(k.k) ? 0 : -EIO; 437 - if (ret) 438 - goto err; 480 + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 481 + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), 482 + BTREE_ITER_CACHED); 483 + ret = bkey_err(k); 484 + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 485 + return ret; 439 486 440 - ret = bch2_inode_unpack(k, inode); 441 - if (ret) 442 - goto err; 443 - err: 487 + if (!ret && bkey_is_inode(k.k)) { 488 + struct bch_inode_unpacked inode; 489 + bch2_inode_unpack(k, &inode); 490 + bch2_inode_opts_get(io_opts, trans->c, &inode); 491 + } else { 492 + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); 493 + } 494 + 444 495 bch2_trans_iter_exit(trans, &iter); 445 - return ret; 496 + return 0; 446 497 } 447 498 448 499 static int move_ratelimit(struct btree_trans *trans, ··· 545 492 return 0; 546 493 } 547 494 548 - static int move_get_io_opts(struct btree_trans *trans, 549 - struct bch_io_opts *io_opts, 550 - struct bkey_s_c k, u64 *cur_inum) 551 - { 552 - struct bch_inode_unpacked inode; 553 - int ret; 554 - 555 - if (*cur_inum == k.k->p.inode) 556 - return 0; 557 - 558 - ret = lookup_inode(trans, 559 - SPOS(0, k.k->p.inode, k.k->p.snapshot), 560 - &inode); 561 - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 562 - return ret; 563 - 564 - if (!ret) 565 - bch2_inode_opts_get(io_opts, trans->c, &inode); 566 - else 567 - *io_opts = bch2_opts_to_inode_opts(trans->c->opts); 568 - *cur_inum = k.k->p.inode; 569 - return 0; 570 - } 571 - 572 495 static int __bch2_move_data(struct moving_context *ctxt, 573 496 struct bpos start, 574 497 struct bpos end, ··· 552 523 enum btree_id btree_id) 553 524 { 554 525 struct bch_fs *c = ctxt->c; 555 - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); 526 + struct per_snapshot_io_opts snapshot_io_opts; 527 + struct bch_io_opts *io_opts; 556 528 struct bkey_buf sk; 557 529 struct btree_trans *trans = bch2_trans_get(c); 558 530 struct btree_iter iter; 559 531 struct bkey_s_c k; 560 532 struct data_update_opts data_opts; 561 - u64 cur_inum = U64_MAX; 562 533 int ret = 0, ret2; 563 534 535 + per_snapshot_io_opts_init(&snapshot_io_opts, c); 564 536 bch2_bkey_buf_init(&sk); 565 537 566 538 if (ctxt->stats) { ··· 599 569 if (!bkey_extent_is_direct_data(k.k)) 600 570 goto next_nondata; 601 571 602 - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); 572 + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); 573 + ret = PTR_ERR_OR_ZERO(io_opts); 603 574 if (ret) 604 575 continue; 605 576 606 577 memset(&data_opts, 0, sizeof(data_opts)); 607 - if (!pred(c, arg, k, &io_opts, &data_opts)) 578 + if (!pred(c, arg, k, io_opts, &data_opts)) 608 579 goto next; 609 580 610 581 /* ··· 616 585 k = bkey_i_to_s_c(sk.k); 617 586 618 587 ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, 619 - io_opts, btree_id, k, data_opts); 588 + *io_opts, btree_id, k, data_opts); 620 589 if (ret2) { 621 590 if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) 622 591 continue; ··· 643 612 bch2_trans_iter_exit(trans, &iter); 644 613 bch2_trans_put(trans); 645 614 bch2_bkey_buf_exit(&sk, c); 615 + per_snapshot_io_opts_exit(&snapshot_io_opts); 646 616 647 617 return ret; 648 618 } ··· 705 673 struct data_update_opts data_opts; 706 674 unsigned dirty_sectors, bucket_size; 707 675 u64 fragmentation; 708 - u64 cur_inum = U64_MAX; 709 676 struct bpos bp_pos = POS_MIN; 710 677 int ret = 0; 711 678 ··· 768 737 bch2_bkey_buf_reassemble(&sk, c, k); 769 738 k = bkey_i_to_s_c(sk.k); 770 739 771 - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); 740 + ret = bch2_move_get_io_opts_one(trans, &io_opts, k); 772 741 if (ret) { 773 742 bch2_trans_iter_exit(trans, &iter); 774 743 continue;
+26
fs/bcachefs/move.h
··· 62 62 void bch2_moving_ctxt_do_pending_writes(struct moving_context *, 63 63 struct btree_trans *); 64 64 65 + /* Inodes in different snapshots may have different IO options: */ 66 + struct snapshot_io_opts_entry { 67 + u32 snapshot; 68 + struct bch_io_opts io_opts; 69 + }; 70 + 71 + struct per_snapshot_io_opts { 72 + u64 cur_inum; 73 + struct bch_io_opts fs_io_opts; 74 + DARRAY(struct snapshot_io_opts_entry) d; 75 + }; 76 + 77 + static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) 78 + { 79 + memset(io_opts, 0, sizeof(*io_opts)); 80 + io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts); 81 + } 82 + 83 + static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) 84 + { 85 + darray_exit(&io_opts->d); 86 + } 87 + 88 + struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, 89 + struct per_snapshot_io_opts *, struct bkey_s_c); 90 + 65 91 int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); 66 92 67 93 int bch2_move_data(struct bch_fs *,