Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Break up io.c

More reorganization, this splits up io.c into
- io_read.c
- io_misc.c - fallocate, fpunch, truncate
- io_write.c

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+1751 -1692
+3 -1
fs/bcachefs/Makefile
··· 46 46 fs-io-pagecache.o \ 47 47 fsck.o \ 48 48 inode.o \ 49 - io.o \ 49 + io_read.o \ 50 + io_misc.o \ 51 + io_write.o \ 50 52 journal.o \ 51 53 journal_io.o \ 52 54 journal_reclaim.o \
+1 -1
fs/bcachefs/alloc_foreground.c
··· 25 25 #include "disk_groups.h" 26 26 #include "ec.h" 27 27 #include "error.h" 28 - #include "io.h" 28 + #include "io_write.h" 29 29 #include "journal.h" 30 30 #include "movinggc.h" 31 31 #include "nocow_locking.h"
+1 -1
fs/bcachefs/btree_io.c
··· 14 14 #include "debug.h" 15 15 #include "error.h" 16 16 #include "extents.h" 17 - #include "io.h" 17 + #include "io_write.h" 18 18 #include "journal_reclaim.h" 19 19 #include "journal_seq_blacklist.h" 20 20 #include "recovery.h"
+1 -1
fs/bcachefs/btree_io.h
··· 7 7 #include "btree_locking.h" 8 8 #include "checksum.h" 9 9 #include "extents.h" 10 - #include "io_types.h" 10 + #include "io_write_types.h" 11 11 12 12 struct bch_fs; 13 13 struct btree_write;
-1
fs/bcachefs/compress.c
··· 3 3 #include "checksum.h" 4 4 #include "compress.h" 5 5 #include "extents.h" 6 - #include "io.h" 7 6 #include "super-io.h" 8 7 9 8 #include <linux/lz4.h>
+1 -1
fs/bcachefs/data_update.c
··· 9 9 #include "ec.h" 10 10 #include "error.h" 11 11 #include "extents.h" 12 - #include "io.h" 12 + #include "io_write.h" 13 13 #include "keylist.h" 14 14 #include "move.h" 15 15 #include "nocow_locking.h"
+1 -1
fs/bcachefs/data_update.h
··· 4 4 #define _BCACHEFS_DATA_UPDATE_H 5 5 6 6 #include "bkey_buf.h" 7 - #include "io_types.h" 7 + #include "io_write_types.h" 8 8 9 9 struct moving_context; 10 10
-1
fs/bcachefs/debug.c
··· 19 19 #include "extents.h" 20 20 #include "fsck.h" 21 21 #include "inode.h" 22 - #include "io.h" 23 22 #include "super.h" 24 23 25 24 #include <linux/console.h>
+2 -1
fs/bcachefs/ec.c
··· 11 11 #include "btree_update.h" 12 12 #include "btree_write_buffer.h" 13 13 #include "buckets.h" 14 + #include "checksum.h" 14 15 #include "disk_groups.h" 15 16 #include "ec.h" 16 17 #include "error.h" 17 - #include "io.h" 18 + #include "io_read.h" 18 19 #include "keylist.h" 19 20 #include "recovery.h" 20 21 #include "replicas.h"
+7
fs/bcachefs/errcode.c
··· 61 61 62 62 return -err; 63 63 } 64 + 65 + const char *bch2_blk_status_to_str(blk_status_t status) 66 + { 67 + if (status == BLK_STS_REMOVED) 68 + return "device removed"; 69 + return blk_status_to_str(status); 70 + }
+4
fs/bcachefs/errcode.h
··· 249 249 return err < 0 ? __bch2_err_class(err) : err; 250 250 } 251 251 252 + #define BLK_STS_REMOVED ((__force blk_status_t)128) 253 + 254 + const char *bch2_blk_status_to_str(blk_status_t); 255 + 252 256 #endif /* _BCACHFES_ERRCODE_H */
-1
fs/bcachefs/error.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "bcachefs.h" 3 3 #include "error.h" 4 - #include "io.h" 5 4 #include "super.h" 6 5 7 6 #define FSCK_ERR_RATELIMIT_NR 10
+2 -1
fs/bcachefs/fs-io-buffered.c
··· 8 8 #include "fs-io-buffered.h" 9 9 #include "fs-io-direct.h" 10 10 #include "fs-io-pagecache.h" 11 - #include "io.h" 11 + #include "io_read.h" 12 + #include "io_write.h" 12 13 13 14 #include <linux/backing-dev.h> 14 15 #include <linux/pagemap.h>
+2 -1
fs/bcachefs/fs-io-direct.c
··· 7 7 #include "fs-io.h" 8 8 #include "fs-io-direct.h" 9 9 #include "fs-io-pagecache.h" 10 - #include "io.h" 10 + #include "io_read.h" 11 + #include "io_write.h" 11 12 12 13 #include <linux/kthread.h> 13 14 #include <linux/pagemap.h>
+2 -1
fs/bcachefs/fs-io.c
··· 3 3 4 4 #include "bcachefs.h" 5 5 #include "alloc_foreground.h" 6 + #include "bkey_buf.h" 6 7 #include "btree_update.h" 7 8 #include "buckets.h" 8 9 #include "clock.h" ··· 17 16 #include "fsck.h" 18 17 #include "inode.h" 19 18 #include "journal.h" 20 - #include "io.h" 19 + #include "io_misc.h" 21 20 #include "keylist.h" 22 21 #include "quota.h" 23 22 #include "reflink.h"
+1 -1
fs/bcachefs/fs-io.h
··· 6 6 7 7 #include "buckets.h" 8 8 #include "fs.h" 9 - #include "io_types.h" 9 + #include "io_write_types.h" 10 10 #include "quota.h" 11 11 12 12 #include <linux/uio.h>
+1 -1
fs/bcachefs/fs.c
··· 19 19 #include "fs-io-pagecache.h" 20 20 #include "fsck.h" 21 21 #include "inode.h" 22 - #include "io.h" 22 + #include "io_read.h" 23 23 #include "journal.h" 24 24 #include "keylist.h" 25 25 #include "quota.h"
+4 -1385
fs/bcachefs/io.c fs/bcachefs/io_write.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 - * Some low level IO code, and hacks for various block layer limitations 4 - * 5 3 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 4 * Copyright 2012 Google, Inc. 7 5 */ 8 6 9 7 #include "bcachefs.h" 10 - #include "alloc_background.h" 11 8 #include "alloc_foreground.h" 12 9 #include "bkey_buf.h" 13 10 #include "bset.h" 14 11 #include "btree_update.h" 15 12 #include "buckets.h" 16 13 #include "checksum.h" 17 - #include "compress.h" 18 14 #include "clock.h" 19 - #include "data_update.h" 15 + #include "compress.h" 20 16 #include "debug.h" 21 - #include "disk_groups.h" 22 17 #include "ec.h" 23 18 #include "error.h" 24 19 #include "extent_update.h" 25 20 #include "inode.h" 26 - #include "io.h" 21 + #include "io_write.h" 27 22 #include "journal.h" 28 23 #include "keylist.h" 29 24 #include "move.h" ··· 34 39 #include <linux/random.h> 35 40 #include <linux/sched/mm.h> 36 41 37 - const char *bch2_blk_status_to_str(blk_status_t status) 38 - { 39 - if (status == BLK_STS_REMOVED) 40 - return "device removed"; 41 - return blk_status_to_str(status); 42 - } 43 - 44 42 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 45 - 46 - static bool bch2_target_congested(struct bch_fs *c, u16 target) 47 - { 48 - const struct bch_devs_mask *devs; 49 - unsigned d, nr = 0, total = 0; 50 - u64 now = local_clock(), last; 51 - s64 congested; 52 - struct bch_dev *ca; 53 - 54 - if (!target) 55 - return false; 56 - 57 - rcu_read_lock(); 58 - devs = bch2_target_to_mask(c, target) ?: 59 - &c->rw_devs[BCH_DATA_user]; 60 - 61 - for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { 62 - ca = rcu_dereference(c->devs[d]); 63 - if (!ca) 64 - continue; 65 - 66 - congested = atomic_read(&ca->congested); 67 - last = READ_ONCE(ca->congested_last); 68 - if (time_after64(now, last)) 69 - congested -= (now - last) >> 12; 70 - 71 - total += max(congested, 0LL); 72 - nr++; 73 - } 74 - rcu_read_unlock(); 75 - 76 - return bch2_rand_range(nr * CONGESTED_MAX) < total; 77 - } 78 43 79 44 static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, 80 45 u64 now, int rw) ··· 89 134 bch2_congested_acct(ca, io_latency, now, rw); 90 135 91 136 __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now); 92 - } 93 - 94 - #else 95 - 96 - static bool bch2_target_congested(struct bch_fs *c, u16 target) 97 - { 98 - return false; 99 137 } 100 138 101 139 #endif ··· 314 366 *i_sectors_delta_total += i_sectors_delta; 315 367 bch2_btree_iter_set_pos(iter, next_pos); 316 368 return 0; 317 - } 318 - 319 - /* Overwrites whatever was present with zeroes: */ 320 - int bch2_extent_fallocate(struct btree_trans *trans, 321 - subvol_inum inum, 322 - struct btree_iter *iter, 323 - unsigned sectors, 324 - struct bch_io_opts opts, 325 - s64 *i_sectors_delta, 326 - struct write_point_specifier write_point) 327 - { 328 - struct bch_fs *c = trans->c; 329 - struct disk_reservation disk_res = { 0 }; 330 - struct closure cl; 331 - struct open_buckets open_buckets = { 0 }; 332 - struct bkey_s_c k; 333 - struct bkey_buf old, new; 334 - unsigned sectors_allocated = 0; 335 - bool have_reservation = false; 336 - bool unwritten = opts.nocow && 337 - c->sb.version >= bcachefs_metadata_version_unwritten_extents; 338 - int ret; 339 - 340 - bch2_bkey_buf_init(&old); 341 - bch2_bkey_buf_init(&new); 342 - closure_init_stack(&cl); 343 - 344 - k = bch2_btree_iter_peek_slot(iter); 345 - ret = bkey_err(k); 346 - if (ret) 347 - return ret; 348 - 349 - sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); 350 - 351 - if (!have_reservation) { 352 - unsigned new_replicas = 353 - max(0, (int) opts.data_replicas - 354 - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); 355 - /* 356 - * Get a disk reservation before (in the nocow case) calling 357 - * into the allocator: 358 - */ 359 - ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); 360 - if (unlikely(ret)) 361 - goto err; 362 - 363 - bch2_bkey_buf_reassemble(&old, c, k); 364 - } 365 - 366 - if (have_reservation) { 367 - if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) 368 - goto err; 369 - 370 - bch2_key_resize(&new.k->k, sectors); 371 - } else if (!unwritten) { 372 - struct bkey_i_reservation *reservation; 373 - 374 - bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); 375 - reservation = bkey_reservation_init(new.k); 376 - reservation->k.p = iter->pos; 377 - bch2_key_resize(&reservation->k, sectors); 378 - reservation->v.nr_replicas = opts.data_replicas; 379 - } else { 380 - struct bkey_i_extent *e; 381 - struct bch_devs_list devs_have; 382 - struct write_point *wp; 383 - struct bch_extent_ptr *ptr; 384 - 385 - devs_have.nr = 0; 386 - 387 - bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); 388 - 389 - e = bkey_extent_init(new.k); 390 - e->k.p = iter->pos; 391 - 392 - ret = bch2_alloc_sectors_start_trans(trans, 393 - opts.foreground_target, 394 - false, 395 - write_point, 396 - &devs_have, 397 - opts.data_replicas, 398 - opts.data_replicas, 399 - BCH_WATERMARK_normal, 0, &cl, &wp); 400 - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) 401 - ret = -BCH_ERR_transaction_restart_nested; 402 - if (ret) 403 - goto err; 404 - 405 - sectors = min(sectors, wp->sectors_free); 406 - sectors_allocated = sectors; 407 - 408 - bch2_key_resize(&e->k, sectors); 409 - 410 - bch2_open_bucket_get(c, wp, &open_buckets); 411 - bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); 412 - bch2_alloc_sectors_done(c, wp); 413 - 414 - extent_for_each_ptr(extent_i_to_s(e), ptr) 415 - ptr->unwritten = true; 416 - } 417 - 418 - have_reservation = true; 419 - 420 - ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 421 - 0, i_sectors_delta, true); 422 - err: 423 - if (!ret && sectors_allocated) 424 - bch2_increment_clock(c, sectors_allocated, WRITE); 425 - 426 - bch2_open_buckets_put(c, &open_buckets); 427 - bch2_disk_reservation_put(c, &disk_res); 428 - bch2_bkey_buf_exit(&new, c); 429 - bch2_bkey_buf_exit(&old, c); 430 - 431 - if (closure_nr_remaining(&cl) != 1) { 432 - bch2_trans_unlock(trans); 433 - closure_sync(&cl); 434 - } 435 - 436 - return ret; 437 - } 438 - 439 - /* 440 - * Returns -BCH_ERR_transacton_restart if we had to drop locks: 441 - */ 442 - int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, 443 - subvol_inum inum, u64 end, 444 - s64 *i_sectors_delta) 445 - { 446 - struct bch_fs *c = trans->c; 447 - unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); 448 - struct bpos end_pos = POS(inum.inum, end); 449 - struct bkey_s_c k; 450 - int ret = 0, ret2 = 0; 451 - u32 snapshot; 452 - 453 - while (!ret || 454 - bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 455 - struct disk_reservation disk_res = 456 - bch2_disk_reservation_init(c, 0); 457 - struct bkey_i delete; 458 - 459 - if (ret) 460 - ret2 = ret; 461 - 462 - bch2_trans_begin(trans); 463 - 464 - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 465 - if (ret) 466 - continue; 467 - 468 - bch2_btree_iter_set_snapshot(iter, snapshot); 469 - 470 - /* 471 - * peek_upto() doesn't have ideal semantics for extents: 472 - */ 473 - k = bch2_btree_iter_peek_upto(iter, end_pos); 474 - if (!k.k) 475 - break; 476 - 477 - ret = bkey_err(k); 478 - if (ret) 479 - continue; 480 - 481 - bkey_init(&delete.k); 482 - delete.k.p = iter->pos; 483 - 484 - /* create the biggest key we can */ 485 - bch2_key_resize(&delete.k, max_sectors); 486 - bch2_cut_back(end_pos, &delete); 487 - 488 - ret = bch2_extent_update(trans, inum, iter, &delete, 489 - &disk_res, 0, i_sectors_delta, false); 490 - bch2_disk_reservation_put(c, &disk_res); 491 - } 492 - 493 - return ret ?: ret2; 494 - } 495 - 496 - int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, 497 - s64 *i_sectors_delta) 498 - { 499 - struct btree_trans trans; 500 - struct btree_iter iter; 501 - int ret; 502 - 503 - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 504 - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 505 - POS(inum.inum, start), 506 - BTREE_ITER_INTENT); 507 - 508 - ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); 509 - 510 - bch2_trans_iter_exit(&trans, &iter); 511 - bch2_trans_exit(&trans); 512 - 513 - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 514 - ret = 0; 515 - 516 - return ret; 517 369 } 518 370 519 371 static int bch2_write_index_default(struct bch_write_op *op) ··· 1647 1899 printbuf_indent_sub(out, 2); 1648 1900 } 1649 1901 1650 - /* Cache promotion on read */ 1651 - 1652 - struct promote_op { 1653 - struct rcu_head rcu; 1654 - u64 start_time; 1655 - 1656 - struct rhash_head hash; 1657 - struct bpos pos; 1658 - 1659 - struct data_update write; 1660 - struct bio_vec bi_inline_vecs[0]; /* must be last */ 1661 - }; 1662 - 1663 - static const struct rhashtable_params bch_promote_params = { 1664 - .head_offset = offsetof(struct promote_op, hash), 1665 - .key_offset = offsetof(struct promote_op, pos), 1666 - .key_len = sizeof(struct bpos), 1667 - }; 1668 - 1669 - static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, 1670 - struct bpos pos, 1671 - struct bch_io_opts opts, 1672 - unsigned flags) 1902 + void bch2_fs_io_write_exit(struct bch_fs *c) 1673 1903 { 1674 - if (!(flags & BCH_READ_MAY_PROMOTE)) 1675 - return false; 1676 - 1677 - if (!opts.promote_target) 1678 - return false; 1679 - 1680 - if (bch2_bkey_has_target(c, k, opts.promote_target)) 1681 - return false; 1682 - 1683 - if (bkey_extent_is_unwritten(k)) 1684 - return false; 1685 - 1686 - if (bch2_target_congested(c, opts.promote_target)) { 1687 - /* XXX trace this */ 1688 - return false; 1689 - } 1690 - 1691 - if (rhashtable_lookup_fast(&c->promote_table, &pos, 1692 - bch_promote_params)) 1693 - return false; 1694 - 1695 - return true; 1696 - } 1697 - 1698 - static void promote_free(struct bch_fs *c, struct promote_op *op) 1699 - { 1700 - int ret; 1701 - 1702 - bch2_data_update_exit(&op->write); 1703 - 1704 - ret = rhashtable_remove_fast(&c->promote_table, &op->hash, 1705 - bch_promote_params); 1706 - BUG_ON(ret); 1707 - bch2_write_ref_put(c, BCH_WRITE_REF_promote); 1708 - kfree_rcu(op, rcu); 1709 - } 1710 - 1711 - static void promote_done(struct bch_write_op *wop) 1712 - { 1713 - struct promote_op *op = 1714 - container_of(wop, struct promote_op, write.op); 1715 - struct bch_fs *c = op->write.op.c; 1716 - 1717 - bch2_time_stats_update(&c->times[BCH_TIME_data_promote], 1718 - op->start_time); 1719 - promote_free(c, op); 1720 - } 1721 - 1722 - static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) 1723 - { 1724 - struct bio *bio = &op->write.op.wbio.bio; 1725 - 1726 - trace_and_count(op->write.op.c, read_promote, &rbio->bio); 1727 - 1728 - /* we now own pages: */ 1729 - BUG_ON(!rbio->bounce); 1730 - BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); 1731 - 1732 - memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, 1733 - sizeof(struct bio_vec) * rbio->bio.bi_vcnt); 1734 - swap(bio->bi_vcnt, rbio->bio.bi_vcnt); 1735 - 1736 - bch2_data_update_read_done(&op->write, rbio->pick.crc); 1737 - } 1738 - 1739 - static struct promote_op *__promote_alloc(struct btree_trans *trans, 1740 - enum btree_id btree_id, 1741 - struct bkey_s_c k, 1742 - struct bpos pos, 1743 - struct extent_ptr_decoded *pick, 1744 - struct bch_io_opts opts, 1745 - unsigned sectors, 1746 - struct bch_read_bio **rbio) 1747 - { 1748 - struct bch_fs *c = trans->c; 1749 - struct promote_op *op = NULL; 1750 - struct bio *bio; 1751 - unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); 1752 - int ret; 1753 - 1754 - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) 1755 - return NULL; 1756 - 1757 - op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); 1758 - if (!op) 1759 - goto err; 1760 - 1761 - op->start_time = local_clock(); 1762 - op->pos = pos; 1763 - 1764 - /* 1765 - * We don't use the mempool here because extents that aren't 1766 - * checksummed or compressed can be too big for the mempool: 1767 - */ 1768 - *rbio = kzalloc(sizeof(struct bch_read_bio) + 1769 - sizeof(struct bio_vec) * pages, 1770 - GFP_NOFS); 1771 - if (!*rbio) 1772 - goto err; 1773 - 1774 - rbio_init(&(*rbio)->bio, opts); 1775 - bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); 1776 - 1777 - if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, 1778 - GFP_NOFS)) 1779 - goto err; 1780 - 1781 - (*rbio)->bounce = true; 1782 - (*rbio)->split = true; 1783 - (*rbio)->kmalloc = true; 1784 - 1785 - if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, 1786 - bch_promote_params)) 1787 - goto err; 1788 - 1789 - bio = &op->write.op.wbio.bio; 1790 - bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); 1791 - 1792 - ret = bch2_data_update_init(trans, NULL, &op->write, 1793 - writepoint_hashed((unsigned long) current), 1794 - opts, 1795 - (struct data_update_opts) { 1796 - .target = opts.promote_target, 1797 - .extra_replicas = 1, 1798 - .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, 1799 - }, 1800 - btree_id, k); 1801 - /* 1802 - * possible errors: -BCH_ERR_nocow_lock_blocked, 1803 - * -BCH_ERR_ENOSPC_disk_reservation: 1804 - */ 1805 - if (ret) { 1806 - ret = rhashtable_remove_fast(&c->promote_table, &op->hash, 1807 - bch_promote_params); 1808 - BUG_ON(ret); 1809 - goto err; 1810 - } 1811 - 1812 - op->write.op.end_io = promote_done; 1813 - 1814 - return op; 1815 - err: 1816 - if (*rbio) 1817 - bio_free_pages(&(*rbio)->bio); 1818 - kfree(*rbio); 1819 - *rbio = NULL; 1820 - kfree(op); 1821 - bch2_write_ref_put(c, BCH_WRITE_REF_promote); 1822 - return NULL; 1823 - } 1824 - 1825 - noinline 1826 - static struct promote_op *promote_alloc(struct btree_trans *trans, 1827 - struct bvec_iter iter, 1828 - struct bkey_s_c k, 1829 - struct extent_ptr_decoded *pick, 1830 - struct bch_io_opts opts, 1831 - unsigned flags, 1832 - struct bch_read_bio **rbio, 1833 - bool *bounce, 1834 - bool *read_full) 1835 - { 1836 - struct bch_fs *c = trans->c; 1837 - bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); 1838 - /* data might have to be decompressed in the write path: */ 1839 - unsigned sectors = promote_full 1840 - ? max(pick->crc.compressed_size, pick->crc.live_size) 1841 - : bvec_iter_sectors(iter); 1842 - struct bpos pos = promote_full 1843 - ? bkey_start_pos(k.k) 1844 - : POS(k.k->p.inode, iter.bi_sector); 1845 - struct promote_op *promote; 1846 - 1847 - if (!should_promote(c, k, pos, opts, flags)) 1848 - return NULL; 1849 - 1850 - promote = __promote_alloc(trans, 1851 - k.k->type == KEY_TYPE_reflink_v 1852 - ? BTREE_ID_reflink 1853 - : BTREE_ID_extents, 1854 - k, pos, pick, opts, sectors, rbio); 1855 - if (!promote) 1856 - return NULL; 1857 - 1858 - *bounce = true; 1859 - *read_full = promote_full; 1860 - return promote; 1861 - } 1862 - 1863 - /* Read */ 1864 - 1865 - #define READ_RETRY_AVOID 1 1866 - #define READ_RETRY 2 1867 - #define READ_ERR 3 1868 - 1869 - enum rbio_context { 1870 - RBIO_CONTEXT_NULL, 1871 - RBIO_CONTEXT_HIGHPRI, 1872 - RBIO_CONTEXT_UNBOUND, 1873 - }; 1874 - 1875 - static inline struct bch_read_bio * 1876 - bch2_rbio_parent(struct bch_read_bio *rbio) 1877 - { 1878 - return rbio->split ? rbio->parent : rbio; 1879 - } 1880 - 1881 - __always_inline 1882 - static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn, 1883 - enum rbio_context context, 1884 - struct workqueue_struct *wq) 1885 - { 1886 - if (context <= rbio->context) { 1887 - fn(&rbio->work); 1888 - } else { 1889 - rbio->work.func = fn; 1890 - rbio->context = context; 1891 - queue_work(wq, &rbio->work); 1892 - } 1893 - } 1894 - 1895 - static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) 1896 - { 1897 - BUG_ON(rbio->bounce && !rbio->split); 1898 - 1899 - if (rbio->promote) 1900 - promote_free(rbio->c, rbio->promote); 1901 - rbio->promote = NULL; 1902 - 1903 - if (rbio->bounce) 1904 - bch2_bio_free_pages_pool(rbio->c, &rbio->bio); 1905 - 1906 - if (rbio->split) { 1907 - struct bch_read_bio *parent = rbio->parent; 1908 - 1909 - if (rbio->kmalloc) 1910 - kfree(rbio); 1911 - else 1912 - bio_put(&rbio->bio); 1913 - 1914 - rbio = parent; 1915 - } 1916 - 1917 - return rbio; 1918 - } 1919 - 1920 - /* 1921 - * Only called on a top level bch_read_bio to complete an entire read request, 1922 - * not a split: 1923 - */ 1924 - static void bch2_rbio_done(struct bch_read_bio *rbio) 1925 - { 1926 - if (rbio->start_time) 1927 - bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], 1928 - rbio->start_time); 1929 - bio_endio(&rbio->bio); 1930 - } 1931 - 1932 - static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, 1933 - struct bvec_iter bvec_iter, 1934 - struct bch_io_failures *failed, 1935 - unsigned flags) 1936 - { 1937 - struct btree_trans trans; 1938 - struct btree_iter iter; 1939 - struct bkey_buf sk; 1940 - struct bkey_s_c k; 1941 - int ret; 1942 - 1943 - flags &= ~BCH_READ_LAST_FRAGMENT; 1944 - flags |= BCH_READ_MUST_CLONE; 1945 - 1946 - bch2_bkey_buf_init(&sk); 1947 - bch2_trans_init(&trans, c, 0, 0); 1948 - 1949 - bch2_trans_iter_init(&trans, &iter, rbio->data_btree, 1950 - rbio->read_pos, BTREE_ITER_SLOTS); 1951 - retry: 1952 - rbio->bio.bi_status = 0; 1953 - 1954 - k = bch2_btree_iter_peek_slot(&iter); 1955 - if (bkey_err(k)) 1956 - goto err; 1957 - 1958 - bch2_bkey_buf_reassemble(&sk, c, k); 1959 - k = bkey_i_to_s_c(sk.k); 1960 - bch2_trans_unlock(&trans); 1961 - 1962 - if (!bch2_bkey_matches_ptr(c, k, 1963 - rbio->pick.ptr, 1964 - rbio->data_pos.offset - 1965 - rbio->pick.crc.offset)) { 1966 - /* extent we wanted to read no longer exists: */ 1967 - rbio->hole = true; 1968 - goto out; 1969 - } 1970 - 1971 - ret = __bch2_read_extent(&trans, rbio, bvec_iter, 1972 - rbio->read_pos, 1973 - rbio->data_btree, 1974 - k, 0, failed, flags); 1975 - if (ret == READ_RETRY) 1976 - goto retry; 1977 - if (ret) 1978 - goto err; 1979 - out: 1980 - bch2_rbio_done(rbio); 1981 - bch2_trans_iter_exit(&trans, &iter); 1982 - bch2_trans_exit(&trans); 1983 - bch2_bkey_buf_exit(&sk, c); 1984 - return; 1985 - err: 1986 - rbio->bio.bi_status = BLK_STS_IOERR; 1987 - goto out; 1988 - } 1989 - 1990 - static void bch2_rbio_retry(struct work_struct *work) 1991 - { 1992 - struct bch_read_bio *rbio = 1993 - container_of(work, struct bch_read_bio, work); 1994 - struct bch_fs *c = rbio->c; 1995 - struct bvec_iter iter = rbio->bvec_iter; 1996 - unsigned flags = rbio->flags; 1997 - subvol_inum inum = { 1998 - .subvol = rbio->subvol, 1999 - .inum = rbio->read_pos.inode, 2000 - }; 2001 - struct bch_io_failures failed = { .nr = 0 }; 2002 - 2003 - trace_and_count(c, read_retry, &rbio->bio); 2004 - 2005 - if (rbio->retry == READ_RETRY_AVOID) 2006 - bch2_mark_io_failure(&failed, &rbio->pick); 2007 - 2008 - rbio->bio.bi_status = 0; 2009 - 2010 - rbio = bch2_rbio_free(rbio); 2011 - 2012 - flags |= BCH_READ_IN_RETRY; 2013 - flags &= ~BCH_READ_MAY_PROMOTE; 2014 - 2015 - if (flags & BCH_READ_NODECODE) { 2016 - bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); 2017 - } else { 2018 - flags &= ~BCH_READ_LAST_FRAGMENT; 2019 - flags |= BCH_READ_MUST_CLONE; 2020 - 2021 - __bch2_read(c, rbio, iter, inum, &failed, flags); 2022 - } 2023 - } 2024 - 2025 - static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, 2026 - blk_status_t error) 2027 - { 2028 - rbio->retry = retry; 2029 - 2030 - if (rbio->flags & BCH_READ_IN_RETRY) 2031 - return; 2032 - 2033 - if (retry == READ_ERR) { 2034 - rbio = bch2_rbio_free(rbio); 2035 - 2036 - rbio->bio.bi_status = error; 2037 - bch2_rbio_done(rbio); 2038 - } else { 2039 - bch2_rbio_punt(rbio, bch2_rbio_retry, 2040 - RBIO_CONTEXT_UNBOUND, system_unbound_wq); 2041 - } 2042 - } 2043 - 2044 - static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, 2045 - struct bch_read_bio *rbio) 2046 - { 2047 - struct bch_fs *c = rbio->c; 2048 - u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; 2049 - struct bch_extent_crc_unpacked new_crc; 2050 - struct btree_iter iter; 2051 - struct bkey_i *new; 2052 - struct bkey_s_c k; 2053 - int ret = 0; 2054 - 2055 - if (crc_is_compressed(rbio->pick.crc)) 2056 - return 0; 2057 - 2058 - k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, 2059 - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 2060 - if ((ret = bkey_err(k))) 2061 - goto out; 2062 - 2063 - if (bversion_cmp(k.k->version, rbio->version) || 2064 - !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) 2065 - goto out; 2066 - 2067 - /* Extent was merged? */ 2068 - if (bkey_start_offset(k.k) < data_offset || 2069 - k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) 2070 - goto out; 2071 - 2072 - if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, 2073 - rbio->pick.crc, NULL, &new_crc, 2074 - bkey_start_offset(k.k) - data_offset, k.k->size, 2075 - rbio->pick.crc.csum_type)) { 2076 - bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); 2077 - ret = 0; 2078 - goto out; 2079 - } 2080 - 2081 - /* 2082 - * going to be temporarily appending another checksum entry: 2083 - */ 2084 - new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 2085 - sizeof(struct bch_extent_crc128)); 2086 - if ((ret = PTR_ERR_OR_ZERO(new))) 2087 - goto out; 2088 - 2089 - bkey_reassemble(new, k); 2090 - 2091 - if (!bch2_bkey_narrow_crcs(new, new_crc)) 2092 - goto out; 2093 - 2094 - ret = bch2_trans_update(trans, &iter, new, 2095 - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 2096 - out: 2097 - bch2_trans_iter_exit(trans, &iter); 2098 - return ret; 2099 - } 2100 - 2101 - static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) 2102 - { 2103 - bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, 2104 - __bch2_rbio_narrow_crcs(&trans, rbio)); 2105 - } 2106 - 2107 - /* Inner part that may run in process context */ 2108 - static void __bch2_read_endio(struct work_struct *work) 2109 - { 2110 - struct bch_read_bio *rbio = 2111 - container_of(work, struct bch_read_bio, work); 2112 - struct bch_fs *c = rbio->c; 2113 - struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); 2114 - struct bio *src = &rbio->bio; 2115 - struct bio *dst = &bch2_rbio_parent(rbio)->bio; 2116 - struct bvec_iter dst_iter = rbio->bvec_iter; 2117 - struct bch_extent_crc_unpacked crc = rbio->pick.crc; 2118 - struct nonce nonce = extent_nonce(rbio->version, crc); 2119 - unsigned nofs_flags; 2120 - struct bch_csum csum; 2121 - int ret; 2122 - 2123 - nofs_flags = memalloc_nofs_save(); 2124 - 2125 - /* Reset iterator for checksumming and copying bounced data: */ 2126 - if (rbio->bounce) { 2127 - src->bi_iter.bi_size = crc.compressed_size << 9; 2128 - src->bi_iter.bi_idx = 0; 2129 - src->bi_iter.bi_bvec_done = 0; 2130 - } else { 2131 - src->bi_iter = rbio->bvec_iter; 2132 - } 2133 - 2134 - csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); 2135 - if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io) 2136 - goto csum_err; 2137 - 2138 - /* 2139 - * XXX 2140 - * We need to rework the narrow_crcs path to deliver the read completion 2141 - * first, and then punt to a different workqueue, otherwise we're 2142 - * holding up reads while doing btree updates which is bad for memory 2143 - * reclaim. 2144 - */ 2145 - if (unlikely(rbio->narrow_crcs)) 2146 - bch2_rbio_narrow_crcs(rbio); 2147 - 2148 - if (rbio->flags & BCH_READ_NODECODE) 2149 - goto nodecode; 2150 - 2151 - /* Adjust crc to point to subset of data we want: */ 2152 - crc.offset += rbio->offset_into_extent; 2153 - crc.live_size = bvec_iter_sectors(rbio->bvec_iter); 2154 - 2155 - if (crc_is_compressed(crc)) { 2156 - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 2157 - if (ret) 2158 - goto decrypt_err; 2159 - 2160 - if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && 2161 - !c->opts.no_data_io) 2162 - goto decompression_err; 2163 - } else { 2164 - /* don't need to decrypt the entire bio: */ 2165 - nonce = nonce_add(nonce, crc.offset << 9); 2166 - bio_advance(src, crc.offset << 9); 2167 - 2168 - BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); 2169 - src->bi_iter.bi_size = dst_iter.bi_size; 2170 - 2171 - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 2172 - if (ret) 2173 - goto decrypt_err; 2174 - 2175 - if (rbio->bounce) { 2176 - struct bvec_iter src_iter = src->bi_iter; 2177 - 2178 - bio_copy_data_iter(dst, &dst_iter, src, &src_iter); 2179 - } 2180 - } 2181 - 2182 - if (rbio->promote) { 2183 - /* 2184 - * Re encrypt data we decrypted, so it's consistent with 2185 - * rbio->crc: 2186 - */ 2187 - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 2188 - if (ret) 2189 - goto decrypt_err; 2190 - 2191 - promote_start(rbio->promote, rbio); 2192 - rbio->promote = NULL; 2193 - } 2194 - nodecode: 2195 - if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { 2196 - rbio = bch2_rbio_free(rbio); 2197 - bch2_rbio_done(rbio); 2198 - } 2199 - out: 2200 - memalloc_nofs_restore(nofs_flags); 2201 - return; 2202 - csum_err: 2203 - /* 2204 - * Checksum error: if the bio wasn't bounced, we may have been 2205 - * reading into buffers owned by userspace (that userspace can 2206 - * scribble over) - retry the read, bouncing it this time: 2207 - */ 2208 - if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { 2209 - rbio->flags |= BCH_READ_MUST_BOUNCE; 2210 - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); 2211 - goto out; 2212 - } 2213 - 2214 - bch_err_inum_offset_ratelimited(ca, 2215 - rbio->read_pos.inode, 2216 - rbio->read_pos.offset << 9, 2217 - "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", 2218 - rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, 2219 - csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); 2220 - bch2_io_error(ca); 2221 - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 2222 - goto out; 2223 - decompression_err: 2224 - bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, 2225 - rbio->read_pos.offset << 9, 2226 - "decompression error"); 2227 - bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); 2228 - goto out; 2229 - decrypt_err: 2230 - bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, 2231 - rbio->read_pos.offset << 9, 2232 - "decrypt error"); 2233 - bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); 2234 - goto out; 2235 - } 2236 - 2237 - static void bch2_read_endio(struct bio *bio) 2238 - { 2239 - struct bch_read_bio *rbio = 2240 - container_of(bio, struct bch_read_bio, bio); 2241 - struct bch_fs *c = rbio->c; 2242 - struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); 2243 - struct workqueue_struct *wq = NULL; 2244 - enum rbio_context context = RBIO_CONTEXT_NULL; 2245 - 2246 - if (rbio->have_ioref) { 2247 - bch2_latency_acct(ca, rbio->submit_time, READ); 2248 - percpu_ref_put(&ca->io_ref); 2249 - } 2250 - 2251 - if (!rbio->split) 2252 - rbio->bio.bi_end_io = rbio->end_io; 2253 - 2254 - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, 2255 - rbio->read_pos.inode, 2256 - rbio->read_pos.offset, 2257 - "data read error: %s", 2258 - bch2_blk_status_to_str(bio->bi_status))) { 2259 - bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); 2260 - return; 2261 - } 2262 - 2263 - if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || 2264 - ptr_stale(ca, &rbio->pick.ptr)) { 2265 - trace_and_count(c, read_reuse_race, &rbio->bio); 2266 - 2267 - if (rbio->flags & BCH_READ_RETRY_IF_STALE) 2268 - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); 2269 - else 2270 - bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); 2271 - return; 2272 - } 2273 - 2274 - if (rbio->narrow_crcs || 2275 - rbio->promote || 2276 - crc_is_compressed(rbio->pick.crc) || 2277 - bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) 2278 - context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; 2279 - else if (rbio->pick.crc.csum_type) 2280 - context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; 2281 - 2282 - bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); 2283 - } 2284 - 2285 - int __bch2_read_indirect_extent(struct btree_trans *trans, 2286 - unsigned *offset_into_extent, 2287 - struct bkey_buf *orig_k) 2288 - { 2289 - struct btree_iter iter; 2290 - struct bkey_s_c k; 2291 - u64 reflink_offset; 2292 - int ret; 2293 - 2294 - reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + 2295 - *offset_into_extent; 2296 - 2297 - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, 2298 - POS(0, reflink_offset), 0); 2299 - ret = bkey_err(k); 2300 - if (ret) 2301 - goto err; 2302 - 2303 - if (k.k->type != KEY_TYPE_reflink_v && 2304 - k.k->type != KEY_TYPE_indirect_inline_data) { 2305 - bch_err_inum_offset_ratelimited(trans->c, 2306 - orig_k->k->k.p.inode, 2307 - orig_k->k->k.p.offset << 9, 2308 - "%llu len %u points to nonexistent indirect extent %llu", 2309 - orig_k->k->k.p.offset, 2310 - orig_k->k->k.size, 2311 - reflink_offset); 2312 - bch2_inconsistent_error(trans->c); 2313 - ret = -EIO; 2314 - goto err; 2315 - } 2316 - 2317 - *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); 2318 - bch2_bkey_buf_reassemble(orig_k, trans->c, k); 2319 - err: 2320 - bch2_trans_iter_exit(trans, &iter); 2321 - return ret; 2322 - } 2323 - 2324 - static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, 2325 - struct bkey_s_c k, 2326 - struct bch_extent_ptr ptr) 2327 - { 2328 - struct bch_fs *c = trans->c; 2329 - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); 2330 - struct btree_iter iter; 2331 - struct printbuf buf = PRINTBUF; 2332 - int ret; 2333 - 2334 - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, 2335 - PTR_BUCKET_POS(c, &ptr), 2336 - BTREE_ITER_CACHED); 2337 - 2338 - prt_printf(&buf, "Attempting to read from stale dirty pointer:"); 2339 - printbuf_indent_add(&buf, 2); 2340 - prt_newline(&buf); 2341 - 2342 - bch2_bkey_val_to_text(&buf, c, k); 2343 - prt_newline(&buf); 2344 - 2345 - prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); 2346 - 2347 - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); 2348 - if (!ret) { 2349 - prt_newline(&buf); 2350 - bch2_bkey_val_to_text(&buf, c, k); 2351 - } 2352 - 2353 - bch2_fs_inconsistent(c, "%s", buf.buf); 2354 - 2355 - bch2_trans_iter_exit(trans, &iter); 2356 - printbuf_exit(&buf); 2357 - } 2358 - 2359 - int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, 2360 - struct bvec_iter iter, struct bpos read_pos, 2361 - enum btree_id data_btree, struct bkey_s_c k, 2362 - unsigned offset_into_extent, 2363 - struct bch_io_failures *failed, unsigned flags) 2364 - { 2365 - struct bch_fs *c = trans->c; 2366 - struct extent_ptr_decoded pick; 2367 - struct bch_read_bio *rbio = NULL; 2368 - struct bch_dev *ca = NULL; 2369 - struct promote_op *promote = NULL; 2370 - bool bounce = false, read_full = false, narrow_crcs = false; 2371 - struct bpos data_pos = bkey_start_pos(k.k); 2372 - int pick_ret; 2373 - 2374 - if (bkey_extent_is_inline_data(k.k)) { 2375 - unsigned bytes = min_t(unsigned, iter.bi_size, 2376 - bkey_inline_data_bytes(k.k)); 2377 - 2378 - swap(iter.bi_size, bytes); 2379 - memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); 2380 - swap(iter.bi_size, bytes); 2381 - bio_advance_iter(&orig->bio, &iter, bytes); 2382 - zero_fill_bio_iter(&orig->bio, iter); 2383 - goto out_read_done; 2384 - } 2385 - retry_pick: 2386 - pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); 2387 - 2388 - /* hole or reservation - just zero fill: */ 2389 - if (!pick_ret) 2390 - goto hole; 2391 - 2392 - if (pick_ret < 0) { 2393 - bch_err_inum_offset_ratelimited(c, 2394 - read_pos.inode, read_pos.offset << 9, 2395 - "no device to read from"); 2396 - goto err; 2397 - } 2398 - 2399 - ca = bch_dev_bkey_exists(c, pick.ptr.dev); 2400 - 2401 - /* 2402 - * Stale dirty pointers are treated as IO errors, but @failed isn't 2403 - * allocated unless we're in the retry path - so if we're not in the 2404 - * retry path, don't check here, it'll be caught in bch2_read_endio() 2405 - * and we'll end up in the retry path: 2406 - */ 2407 - if ((flags & BCH_READ_IN_RETRY) && 2408 - !pick.ptr.cached && 2409 - unlikely(ptr_stale(ca, &pick.ptr))) { 2410 - read_from_stale_dirty_pointer(trans, k, pick.ptr); 2411 - bch2_mark_io_failure(failed, &pick); 2412 - goto retry_pick; 2413 - } 2414 - 2415 - /* 2416 - * Unlock the iterator while the btree node's lock is still in 2417 - * cache, before doing the IO: 2418 - */ 2419 - bch2_trans_unlock(trans); 2420 - 2421 - if (flags & BCH_READ_NODECODE) { 2422 - /* 2423 - * can happen if we retry, and the extent we were going to read 2424 - * has been merged in the meantime: 2425 - */ 2426 - if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) 2427 - goto hole; 2428 - 2429 - iter.bi_size = pick.crc.compressed_size << 9; 2430 - goto get_bio; 2431 - } 2432 - 2433 - if (!(flags & BCH_READ_LAST_FRAGMENT) || 2434 - bio_flagged(&orig->bio, BIO_CHAIN)) 2435 - flags |= BCH_READ_MUST_CLONE; 2436 - 2437 - narrow_crcs = !(flags & BCH_READ_IN_RETRY) && 2438 - bch2_can_narrow_extent_crcs(k, pick.crc); 2439 - 2440 - if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) 2441 - flags |= BCH_READ_MUST_BOUNCE; 2442 - 2443 - EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); 2444 - 2445 - if (crc_is_compressed(pick.crc) || 2446 - (pick.crc.csum_type != BCH_CSUM_none && 2447 - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || 2448 - (bch2_csum_type_is_encryption(pick.crc.csum_type) && 2449 - (flags & BCH_READ_USER_MAPPED)) || 2450 - (flags & BCH_READ_MUST_BOUNCE)))) { 2451 - read_full = true; 2452 - bounce = true; 2453 - } 2454 - 2455 - if (orig->opts.promote_target) 2456 - promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, 2457 - &rbio, &bounce, &read_full); 2458 - 2459 - if (!read_full) { 2460 - EBUG_ON(crc_is_compressed(pick.crc)); 2461 - EBUG_ON(pick.crc.csum_type && 2462 - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || 2463 - bvec_iter_sectors(iter) != pick.crc.live_size || 2464 - pick.crc.offset || 2465 - offset_into_extent)); 2466 - 2467 - data_pos.offset += offset_into_extent; 2468 - pick.ptr.offset += pick.crc.offset + 2469 - offset_into_extent; 2470 - offset_into_extent = 0; 2471 - pick.crc.compressed_size = bvec_iter_sectors(iter); 2472 - pick.crc.uncompressed_size = bvec_iter_sectors(iter); 2473 - pick.crc.offset = 0; 2474 - pick.crc.live_size = bvec_iter_sectors(iter); 2475 - offset_into_extent = 0; 2476 - } 2477 - get_bio: 2478 - if (rbio) { 2479 - /* 2480 - * promote already allocated bounce rbio: 2481 - * promote needs to allocate a bio big enough for uncompressing 2482 - * data in the write path, but we're not going to use it all 2483 - * here: 2484 - */ 2485 - EBUG_ON(rbio->bio.bi_iter.bi_size < 2486 - pick.crc.compressed_size << 9); 2487 - rbio->bio.bi_iter.bi_size = 2488 - pick.crc.compressed_size << 9; 2489 - } else if (bounce) { 2490 - unsigned sectors = pick.crc.compressed_size; 2491 - 2492 - rbio = rbio_init(bio_alloc_bioset(NULL, 2493 - DIV_ROUND_UP(sectors, PAGE_SECTORS), 2494 - 0, 2495 - GFP_NOFS, 2496 - &c->bio_read_split), 2497 - orig->opts); 2498 - 2499 - bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); 2500 - rbio->bounce = true; 2501 - rbio->split = true; 2502 - } else if (flags & BCH_READ_MUST_CLONE) { 2503 - /* 2504 - * Have to clone if there were any splits, due to error 2505 - * reporting issues (if a split errored, and retrying didn't 2506 - * work, when it reports the error to its parent (us) we don't 2507 - * know if the error was from our bio, and we should retry, or 2508 - * from the whole bio, in which case we don't want to retry and 2509 - * lose the error) 2510 - */ 2511 - rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, 2512 - &c->bio_read_split), 2513 - orig->opts); 2514 - rbio->bio.bi_iter = iter; 2515 - rbio->split = true; 2516 - } else { 2517 - rbio = orig; 2518 - rbio->bio.bi_iter = iter; 2519 - EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); 2520 - } 2521 - 2522 - EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); 2523 - 2524 - rbio->c = c; 2525 - rbio->submit_time = local_clock(); 2526 - if (rbio->split) 2527 - rbio->parent = orig; 2528 - else 2529 - rbio->end_io = orig->bio.bi_end_io; 2530 - rbio->bvec_iter = iter; 2531 - rbio->offset_into_extent= offset_into_extent; 2532 - rbio->flags = flags; 2533 - rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); 2534 - rbio->narrow_crcs = narrow_crcs; 2535 - rbio->hole = 0; 2536 - rbio->retry = 0; 2537 - rbio->context = 0; 2538 - /* XXX: only initialize this if needed */ 2539 - rbio->devs_have = bch2_bkey_devs(k); 2540 - rbio->pick = pick; 2541 - rbio->subvol = orig->subvol; 2542 - rbio->read_pos = read_pos; 2543 - rbio->data_btree = data_btree; 2544 - rbio->data_pos = data_pos; 2545 - rbio->version = k.k->version; 2546 - rbio->promote = promote; 2547 - INIT_WORK(&rbio->work, NULL); 2548 - 2549 - rbio->bio.bi_opf = orig->bio.bi_opf; 2550 - rbio->bio.bi_iter.bi_sector = pick.ptr.offset; 2551 - rbio->bio.bi_end_io = bch2_read_endio; 2552 - 2553 - if (rbio->bounce) 2554 - trace_and_count(c, read_bounce, &rbio->bio); 2555 - 2556 - this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); 2557 - bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); 2558 - 2559 - /* 2560 - * If it's being moved internally, we don't want to flag it as a cache 2561 - * hit: 2562 - */ 2563 - if (pick.ptr.cached && !(flags & BCH_READ_NODECODE)) 2564 - bch2_bucket_io_time_reset(trans, pick.ptr.dev, 2565 - PTR_BUCKET_NR(ca, &pick.ptr), READ); 2566 - 2567 - if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { 2568 - bio_inc_remaining(&orig->bio); 2569 - trace_and_count(c, read_split, &orig->bio); 2570 - } 2571 - 2572 - if (!rbio->pick.idx) { 2573 - if (!rbio->have_ioref) { 2574 - bch_err_inum_offset_ratelimited(c, 2575 - read_pos.inode, 2576 - read_pos.offset << 9, 2577 - "no device to read from"); 2578 - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 2579 - goto out; 2580 - } 2581 - 2582 - this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user], 2583 - bio_sectors(&rbio->bio)); 2584 - bio_set_dev(&rbio->bio, ca->disk_sb.bdev); 2585 - 2586 - if (unlikely(c->opts.no_data_io)) { 2587 - if (likely(!(flags & BCH_READ_IN_RETRY))) 2588 - bio_endio(&rbio->bio); 2589 - } else { 2590 - if (likely(!(flags & BCH_READ_IN_RETRY))) 2591 - submit_bio(&rbio->bio); 2592 - else 2593 - submit_bio_wait(&rbio->bio); 2594 - } 2595 - 2596 - /* 2597 - * We just submitted IO which may block, we expect relock fail 2598 - * events and shouldn't count them: 2599 - */ 2600 - trans->notrace_relock_fail = true; 2601 - } else { 2602 - /* Attempting reconstruct read: */ 2603 - if (bch2_ec_read_extent(c, rbio)) { 2604 - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 2605 - goto out; 2606 - } 2607 - 2608 - if (likely(!(flags & BCH_READ_IN_RETRY))) 2609 - bio_endio(&rbio->bio); 2610 - } 2611 - out: 2612 - if (likely(!(flags & BCH_READ_IN_RETRY))) { 2613 - return 0; 2614 - } else { 2615 - int ret; 2616 - 2617 - rbio->context = RBIO_CONTEXT_UNBOUND; 2618 - bch2_read_endio(&rbio->bio); 2619 - 2620 - ret = rbio->retry; 2621 - rbio = bch2_rbio_free(rbio); 2622 - 2623 - if (ret == READ_RETRY_AVOID) { 2624 - bch2_mark_io_failure(failed, &pick); 2625 - ret = READ_RETRY; 2626 - } 2627 - 2628 - if (!ret) 2629 - goto out_read_done; 2630 - 2631 - return ret; 2632 - } 2633 - 2634 - err: 2635 - if (flags & BCH_READ_IN_RETRY) 2636 - return READ_ERR; 2637 - 2638 - orig->bio.bi_status = BLK_STS_IOERR; 2639 - goto out_read_done; 2640 - 2641 - hole: 2642 - /* 2643 - * won't normally happen in the BCH_READ_NODECODE 2644 - * (bch2_move_extent()) path, but if we retry and the extent we wanted 2645 - * to read no longer exists we have to signal that: 2646 - */ 2647 - if (flags & BCH_READ_NODECODE) 2648 - orig->hole = true; 2649 - 2650 - zero_fill_bio_iter(&orig->bio, iter); 2651 - out_read_done: 2652 - if (flags & BCH_READ_LAST_FRAGMENT) 2653 - bch2_rbio_done(orig); 2654 - return 0; 2655 - } 2656 - 2657 - void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, 2658 - struct bvec_iter bvec_iter, subvol_inum inum, 2659 - struct bch_io_failures *failed, unsigned flags) 2660 - { 2661 - struct btree_trans trans; 2662 - struct btree_iter iter; 2663 - struct bkey_buf sk; 2664 - struct bkey_s_c k; 2665 - u32 snapshot; 2666 - int ret; 2667 - 2668 - BUG_ON(flags & BCH_READ_NODECODE); 2669 - 2670 - bch2_bkey_buf_init(&sk); 2671 - bch2_trans_init(&trans, c, 0, 0); 2672 - retry: 2673 - bch2_trans_begin(&trans); 2674 - iter = (struct btree_iter) { NULL }; 2675 - 2676 - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 2677 - if (ret) 2678 - goto err; 2679 - 2680 - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 2681 - SPOS(inum.inum, bvec_iter.bi_sector, snapshot), 2682 - BTREE_ITER_SLOTS); 2683 - while (1) { 2684 - unsigned bytes, sectors, offset_into_extent; 2685 - enum btree_id data_btree = BTREE_ID_extents; 2686 - 2687 - /* 2688 - * read_extent -> io_time_reset may cause a transaction restart 2689 - * without returning an error, we need to check for that here: 2690 - */ 2691 - ret = bch2_trans_relock(&trans); 2692 - if (ret) 2693 - break; 2694 - 2695 - bch2_btree_iter_set_pos(&iter, 2696 - POS(inum.inum, bvec_iter.bi_sector)); 2697 - 2698 - k = bch2_btree_iter_peek_slot(&iter); 2699 - ret = bkey_err(k); 2700 - if (ret) 2701 - break; 2702 - 2703 - offset_into_extent = iter.pos.offset - 2704 - bkey_start_offset(k.k); 2705 - sectors = k.k->size - offset_into_extent; 2706 - 2707 - bch2_bkey_buf_reassemble(&sk, c, k); 2708 - 2709 - ret = bch2_read_indirect_extent(&trans, &data_btree, 2710 - &offset_into_extent, &sk); 2711 - if (ret) 2712 - break; 2713 - 2714 - k = bkey_i_to_s_c(sk.k); 2715 - 2716 - /* 2717 - * With indirect extents, the amount of data to read is the min 2718 - * of the original extent and the indirect extent: 2719 - */ 2720 - sectors = min(sectors, k.k->size - offset_into_extent); 2721 - 2722 - bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; 2723 - swap(bvec_iter.bi_size, bytes); 2724 - 2725 - if (bvec_iter.bi_size == bytes) 2726 - flags |= BCH_READ_LAST_FRAGMENT; 2727 - 2728 - ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, 2729 - data_btree, k, 2730 - offset_into_extent, failed, flags); 2731 - if (ret) 2732 - break; 2733 - 2734 - if (flags & BCH_READ_LAST_FRAGMENT) 2735 - break; 2736 - 2737 - swap(bvec_iter.bi_size, bytes); 2738 - bio_advance_iter(&rbio->bio, &bvec_iter, bytes); 2739 - 2740 - ret = btree_trans_too_many_iters(&trans); 2741 - if (ret) 2742 - break; 2743 - } 2744 - err: 2745 - bch2_trans_iter_exit(&trans, &iter); 2746 - 2747 - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || 2748 - ret == READ_RETRY || 2749 - ret == READ_RETRY_AVOID) 2750 - goto retry; 2751 - 2752 - bch2_trans_exit(&trans); 2753 - bch2_bkey_buf_exit(&sk, c); 2754 - 2755 - if (ret) { 2756 - bch_err_inum_offset_ratelimited(c, inum.inum, 2757 - bvec_iter.bi_sector << 9, 2758 - "read error %i from btree lookup", ret); 2759 - rbio->bio.bi_status = BLK_STS_IOERR; 2760 - bch2_rbio_done(rbio); 2761 - } 2762 - } 2763 - 2764 - void bch2_fs_io_exit(struct bch_fs *c) 2765 - { 2766 - if (c->promote_table.tbl) 2767 - rhashtable_destroy(&c->promote_table); 2768 1904 mempool_exit(&c->bio_bounce_pages); 2769 1905 bioset_exit(&c->bio_write); 2770 - bioset_exit(&c->bio_read_split); 2771 - bioset_exit(&c->bio_read); 2772 1906 } 2773 1907 2774 - int bch2_fs_io_init(struct bch_fs *c) 1908 + int bch2_fs_io_write_init(struct bch_fs *c) 2775 1909 { 2776 - if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), 2777 - BIOSET_NEED_BVECS)) 2778 - return -BCH_ERR_ENOMEM_bio_read_init; 2779 - 2780 - if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), 2781 - BIOSET_NEED_BVECS)) 2782 - return -BCH_ERR_ENOMEM_bio_read_split_init; 2783 - 2784 1910 if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), 2785 1911 BIOSET_NEED_BVECS)) 2786 1912 return -BCH_ERR_ENOMEM_bio_write_init; ··· 1665 3043 c->opts.encoded_extent_max) / 1666 3044 PAGE_SIZE, 0)) 1667 3045 return -BCH_ERR_ENOMEM_bio_bounce_pages_init; 1668 - 1669 - if (rhashtable_init(&c->promote_table, &bch_promote_params)) 1670 - return -BCH_ERR_ENOMEM_promote_table_init; 1671 3046 1672 3047 return 0; 1673 3048 }
-202
fs/bcachefs/io.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _BCACHEFS_IO_H 3 - #define _BCACHEFS_IO_H 4 - 5 - #include "checksum.h" 6 - #include "bkey_buf.h" 7 - #include "io_types.h" 8 - 9 - #define to_wbio(_bio) \ 10 - container_of((_bio), struct bch_write_bio, bio) 11 - 12 - #define to_rbio(_bio) \ 13 - container_of((_bio), struct bch_read_bio, bio) 14 - 15 - void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); 16 - void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); 17 - 18 - #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 19 - void bch2_latency_acct(struct bch_dev *, u64, int); 20 - #else 21 - static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} 22 - #endif 23 - 24 - void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, 25 - enum bch_data_type, const struct bkey_i *, bool); 26 - 27 - #define BLK_STS_REMOVED ((__force blk_status_t)128) 28 - 29 - const char *bch2_blk_status_to_str(blk_status_t); 30 - 31 - #define BCH_WRITE_FLAGS() \ 32 - x(ALLOC_NOWAIT) \ 33 - x(CACHED) \ 34 - x(DATA_ENCODED) \ 35 - x(PAGES_STABLE) \ 36 - x(PAGES_OWNED) \ 37 - x(ONLY_SPECIFIED_DEVS) \ 38 - x(WROTE_DATA_INLINE) \ 39 - x(FROM_INTERNAL) \ 40 - x(CHECK_ENOSPC) \ 41 - x(SYNC) \ 42 - x(MOVE) \ 43 - x(IN_WORKER) \ 44 - x(DONE) \ 45 - x(IO_ERROR) \ 46 - x(CONVERT_UNWRITTEN) 47 - 48 - enum __bch_write_flags { 49 - #define x(f) __BCH_WRITE_##f, 50 - BCH_WRITE_FLAGS() 51 - #undef x 52 - }; 53 - 54 - enum bch_write_flags { 55 - #define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f), 56 - BCH_WRITE_FLAGS() 57 - #undef x 58 - }; 59 - 60 - static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) 61 - { 62 - return op->watermark == BCH_WATERMARK_copygc 63 - ? op->c->copygc_wq 64 - : op->c->btree_update_wq; 65 - } 66 - 67 - int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, 68 - struct bkey_i *, bool *, s64 *, s64 *); 69 - int bch2_extent_update(struct btree_trans *, subvol_inum, 70 - struct btree_iter *, struct bkey_i *, 71 - struct disk_reservation *, u64, s64 *, bool); 72 - int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, 73 - unsigned, struct bch_io_opts, s64 *, 74 - struct write_point_specifier); 75 - 76 - int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, 77 - subvol_inum, u64, s64 *); 78 - int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); 79 - 80 - static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, 81 - struct bch_io_opts opts) 82 - { 83 - op->c = c; 84 - op->end_io = NULL; 85 - op->flags = 0; 86 - op->written = 0; 87 - op->error = 0; 88 - op->csum_type = bch2_data_checksum_type(c, opts); 89 - op->compression_opt = opts.compression; 90 - op->nr_replicas = 0; 91 - op->nr_replicas_required = c->opts.data_replicas_required; 92 - op->watermark = BCH_WATERMARK_normal; 93 - op->incompressible = 0; 94 - op->open_buckets.nr = 0; 95 - op->devs_have.nr = 0; 96 - op->target = 0; 97 - op->opts = opts; 98 - op->subvol = 0; 99 - op->pos = POS_MAX; 100 - op->version = ZERO_VERSION; 101 - op->write_point = (struct write_point_specifier) { 0 }; 102 - op->res = (struct disk_reservation) { 0 }; 103 - op->new_i_size = U64_MAX; 104 - op->i_sectors_delta = 0; 105 - op->devs_need_flush = NULL; 106 - } 107 - 108 - void bch2_write(struct closure *); 109 - 110 - void bch2_write_point_do_index_updates(struct work_struct *); 111 - 112 - static inline struct bch_write_bio *wbio_init(struct bio *bio) 113 - { 114 - struct bch_write_bio *wbio = to_wbio(bio); 115 - 116 - memset(&wbio->wbio, 0, sizeof(wbio->wbio)); 117 - return wbio; 118 - } 119 - 120 - void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); 121 - 122 - struct bch_devs_mask; 123 - struct cache_promote_op; 124 - struct extent_ptr_decoded; 125 - 126 - int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, 127 - struct bkey_buf *); 128 - 129 - static inline int bch2_read_indirect_extent(struct btree_trans *trans, 130 - enum btree_id *data_btree, 131 - unsigned *offset_into_extent, 132 - struct bkey_buf *k) 133 - { 134 - if (k->k->k.type != KEY_TYPE_reflink_p) 135 - return 0; 136 - 137 - *data_btree = BTREE_ID_reflink; 138 - return __bch2_read_indirect_extent(trans, offset_into_extent, k); 139 - } 140 - 141 - enum bch_read_flags { 142 - BCH_READ_RETRY_IF_STALE = 1 << 0, 143 - BCH_READ_MAY_PROMOTE = 1 << 1, 144 - BCH_READ_USER_MAPPED = 1 << 2, 145 - BCH_READ_NODECODE = 1 << 3, 146 - BCH_READ_LAST_FRAGMENT = 1 << 4, 147 - 148 - /* internal: */ 149 - BCH_READ_MUST_BOUNCE = 1 << 5, 150 - BCH_READ_MUST_CLONE = 1 << 6, 151 - BCH_READ_IN_RETRY = 1 << 7, 152 - }; 153 - 154 - int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, 155 - struct bvec_iter, struct bpos, enum btree_id, 156 - struct bkey_s_c, unsigned, 157 - struct bch_io_failures *, unsigned); 158 - 159 - static inline void bch2_read_extent(struct btree_trans *trans, 160 - struct bch_read_bio *rbio, struct bpos read_pos, 161 - enum btree_id data_btree, struct bkey_s_c k, 162 - unsigned offset_into_extent, unsigned flags) 163 - { 164 - __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, 165 - data_btree, k, offset_into_extent, NULL, flags); 166 - } 167 - 168 - void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, 169 - subvol_inum, struct bch_io_failures *, unsigned flags); 170 - 171 - static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, 172 - subvol_inum inum) 173 - { 174 - struct bch_io_failures failed = { .nr = 0 }; 175 - 176 - BUG_ON(rbio->_state); 177 - 178 - rbio->c = c; 179 - rbio->start_time = local_clock(); 180 - rbio->subvol = inum.subvol; 181 - 182 - __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, 183 - BCH_READ_RETRY_IF_STALE| 184 - BCH_READ_MAY_PROMOTE| 185 - BCH_READ_USER_MAPPED); 186 - } 187 - 188 - static inline struct bch_read_bio *rbio_init(struct bio *bio, 189 - struct bch_io_opts opts) 190 - { 191 - struct bch_read_bio *rbio = to_rbio(bio); 192 - 193 - rbio->_state = 0; 194 - rbio->promote = NULL; 195 - rbio->opts = opts; 196 - return rbio; 197 - } 198 - 199 - void bch2_fs_io_exit(struct bch_fs *); 200 - int bch2_fs_io_init(struct bch_fs *); 201 - 202 - #endif /* _BCACHEFS_IO_H */
+215
fs/bcachefs/io_misc.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * io_misc.c - fallocate, fpunch, truncate: 4 + */ 5 + 6 + #include "bcachefs.h" 7 + #include "alloc_foreground.h" 8 + #include "bkey_buf.h" 9 + #include "btree_update.h" 10 + #include "buckets.h" 11 + #include "clock.h" 12 + #include "extents.h" 13 + #include "io_misc.h" 14 + #include "io_write.h" 15 + #include "subvolume.h" 16 + 17 + /* Overwrites whatever was present with zeroes: */ 18 + int bch2_extent_fallocate(struct btree_trans *trans, 19 + subvol_inum inum, 20 + struct btree_iter *iter, 21 + unsigned sectors, 22 + struct bch_io_opts opts, 23 + s64 *i_sectors_delta, 24 + struct write_point_specifier write_point) 25 + { 26 + struct bch_fs *c = trans->c; 27 + struct disk_reservation disk_res = { 0 }; 28 + struct closure cl; 29 + struct open_buckets open_buckets = { 0 }; 30 + struct bkey_s_c k; 31 + struct bkey_buf old, new; 32 + unsigned sectors_allocated = 0; 33 + bool have_reservation = false; 34 + bool unwritten = opts.nocow && 35 + c->sb.version >= bcachefs_metadata_version_unwritten_extents; 36 + int ret; 37 + 38 + bch2_bkey_buf_init(&old); 39 + bch2_bkey_buf_init(&new); 40 + closure_init_stack(&cl); 41 + 42 + k = bch2_btree_iter_peek_slot(iter); 43 + ret = bkey_err(k); 44 + if (ret) 45 + return ret; 46 + 47 + sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); 48 + 49 + if (!have_reservation) { 50 + unsigned new_replicas = 51 + max(0, (int) opts.data_replicas - 52 + (int) bch2_bkey_nr_ptrs_fully_allocated(k)); 53 + /* 54 + * Get a disk reservation before (in the nocow case) calling 55 + * into the allocator: 56 + */ 57 + ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); 58 + if (unlikely(ret)) 59 + goto err; 60 + 61 + bch2_bkey_buf_reassemble(&old, c, k); 62 + } 63 + 64 + if (have_reservation) { 65 + if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) 66 + goto err; 67 + 68 + bch2_key_resize(&new.k->k, sectors); 69 + } else if (!unwritten) { 70 + struct bkey_i_reservation *reservation; 71 + 72 + bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); 73 + reservation = bkey_reservation_init(new.k); 74 + reservation->k.p = iter->pos; 75 + bch2_key_resize(&reservation->k, sectors); 76 + reservation->v.nr_replicas = opts.data_replicas; 77 + } else { 78 + struct bkey_i_extent *e; 79 + struct bch_devs_list devs_have; 80 + struct write_point *wp; 81 + struct bch_extent_ptr *ptr; 82 + 83 + devs_have.nr = 0; 84 + 85 + bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); 86 + 87 + e = bkey_extent_init(new.k); 88 + e->k.p = iter->pos; 89 + 90 + ret = bch2_alloc_sectors_start_trans(trans, 91 + opts.foreground_target, 92 + false, 93 + write_point, 94 + &devs_have, 95 + opts.data_replicas, 96 + opts.data_replicas, 97 + BCH_WATERMARK_normal, 0, &cl, &wp); 98 + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) 99 + ret = -BCH_ERR_transaction_restart_nested; 100 + if (ret) 101 + goto err; 102 + 103 + sectors = min(sectors, wp->sectors_free); 104 + sectors_allocated = sectors; 105 + 106 + bch2_key_resize(&e->k, sectors); 107 + 108 + bch2_open_bucket_get(c, wp, &open_buckets); 109 + bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); 110 + bch2_alloc_sectors_done(c, wp); 111 + 112 + extent_for_each_ptr(extent_i_to_s(e), ptr) 113 + ptr->unwritten = true; 114 + } 115 + 116 + have_reservation = true; 117 + 118 + ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 119 + 0, i_sectors_delta, true); 120 + err: 121 + if (!ret && sectors_allocated) 122 + bch2_increment_clock(c, sectors_allocated, WRITE); 123 + 124 + bch2_open_buckets_put(c, &open_buckets); 125 + bch2_disk_reservation_put(c, &disk_res); 126 + bch2_bkey_buf_exit(&new, c); 127 + bch2_bkey_buf_exit(&old, c); 128 + 129 + if (closure_nr_remaining(&cl) != 1) { 130 + bch2_trans_unlock(trans); 131 + closure_sync(&cl); 132 + } 133 + 134 + return ret; 135 + } 136 + 137 + /* 138 + * Returns -BCH_ERR_transacton_restart if we had to drop locks: 139 + */ 140 + int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, 141 + subvol_inum inum, u64 end, 142 + s64 *i_sectors_delta) 143 + { 144 + struct bch_fs *c = trans->c; 145 + unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); 146 + struct bpos end_pos = POS(inum.inum, end); 147 + struct bkey_s_c k; 148 + int ret = 0, ret2 = 0; 149 + u32 snapshot; 150 + 151 + while (!ret || 152 + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 153 + struct disk_reservation disk_res = 154 + bch2_disk_reservation_init(c, 0); 155 + struct bkey_i delete; 156 + 157 + if (ret) 158 + ret2 = ret; 159 + 160 + bch2_trans_begin(trans); 161 + 162 + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 163 + if (ret) 164 + continue; 165 + 166 + bch2_btree_iter_set_snapshot(iter, snapshot); 167 + 168 + /* 169 + * peek_upto() doesn't have ideal semantics for extents: 170 + */ 171 + k = bch2_btree_iter_peek_upto(iter, end_pos); 172 + if (!k.k) 173 + break; 174 + 175 + ret = bkey_err(k); 176 + if (ret) 177 + continue; 178 + 179 + bkey_init(&delete.k); 180 + delete.k.p = iter->pos; 181 + 182 + /* create the biggest key we can */ 183 + bch2_key_resize(&delete.k, max_sectors); 184 + bch2_cut_back(end_pos, &delete); 185 + 186 + ret = bch2_extent_update(trans, inum, iter, &delete, 187 + &disk_res, 0, i_sectors_delta, false); 188 + bch2_disk_reservation_put(c, &disk_res); 189 + } 190 + 191 + return ret ?: ret2; 192 + } 193 + 194 + int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, 195 + s64 *i_sectors_delta) 196 + { 197 + struct btree_trans trans; 198 + struct btree_iter iter; 199 + int ret; 200 + 201 + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 202 + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 203 + POS(inum.inum, start), 204 + BTREE_ITER_INTENT); 205 + 206 + ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); 207 + 208 + bch2_trans_iter_exit(&trans, &iter); 209 + bch2_trans_exit(&trans); 210 + 211 + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 212 + ret = 0; 213 + 214 + return ret; 215 + }
+12
fs/bcachefs/io_misc.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_IO_MISC_H 3 + #define _BCACHEFS_IO_MISC_H 4 + 5 + int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, 6 + unsigned, struct bch_io_opts, s64 *, 7 + struct write_point_specifier); 8 + int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, 9 + subvol_inum, u64, s64 *); 10 + int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); 11 + 12 + #endif /* _BCACHEFS_IO_MISC_H */
+1207
fs/bcachefs/io_read.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Some low level IO code, and hacks for various block layer limitations 4 + * 5 + * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 + * Copyright 2012 Google, Inc. 7 + */ 8 + 9 + #include "bcachefs.h" 10 + #include "alloc_background.h" 11 + #include "alloc_foreground.h" 12 + #include "btree_update.h" 13 + #include "buckets.h" 14 + #include "checksum.h" 15 + #include "clock.h" 16 + #include "compress.h" 17 + #include "data_update.h" 18 + #include "disk_groups.h" 19 + #include "ec.h" 20 + #include "error.h" 21 + #include "io_read.h" 22 + #include "io_misc.h" 23 + #include "io_write.h" 24 + #include "subvolume.h" 25 + #include "trace.h" 26 + 27 + #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 28 + 29 + static bool bch2_target_congested(struct bch_fs *c, u16 target) 30 + { 31 + const struct bch_devs_mask *devs; 32 + unsigned d, nr = 0, total = 0; 33 + u64 now = local_clock(), last; 34 + s64 congested; 35 + struct bch_dev *ca; 36 + 37 + if (!target) 38 + return false; 39 + 40 + rcu_read_lock(); 41 + devs = bch2_target_to_mask(c, target) ?: 42 + &c->rw_devs[BCH_DATA_user]; 43 + 44 + for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { 45 + ca = rcu_dereference(c->devs[d]); 46 + if (!ca) 47 + continue; 48 + 49 + congested = atomic_read(&ca->congested); 50 + last = READ_ONCE(ca->congested_last); 51 + if (time_after64(now, last)) 52 + congested -= (now - last) >> 12; 53 + 54 + total += max(congested, 0LL); 55 + nr++; 56 + } 57 + rcu_read_unlock(); 58 + 59 + return bch2_rand_range(nr * CONGESTED_MAX) < total; 60 + } 61 + 62 + #else 63 + 64 + static bool bch2_target_congested(struct bch_fs *c, u16 target) 65 + { 66 + return false; 67 + } 68 + 69 + #endif 70 + 71 + /* Cache promotion on read */ 72 + 73 + struct promote_op { 74 + struct rcu_head rcu; 75 + u64 start_time; 76 + 77 + struct rhash_head hash; 78 + struct bpos pos; 79 + 80 + struct data_update write; 81 + struct bio_vec bi_inline_vecs[0]; /* must be last */ 82 + }; 83 + 84 + static const struct rhashtable_params bch_promote_params = { 85 + .head_offset = offsetof(struct promote_op, hash), 86 + .key_offset = offsetof(struct promote_op, pos), 87 + .key_len = sizeof(struct bpos), 88 + }; 89 + 90 + static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, 91 + struct bpos pos, 92 + struct bch_io_opts opts, 93 + unsigned flags) 94 + { 95 + if (!(flags & BCH_READ_MAY_PROMOTE)) 96 + return false; 97 + 98 + if (!opts.promote_target) 99 + return false; 100 + 101 + if (bch2_bkey_has_target(c, k, opts.promote_target)) 102 + return false; 103 + 104 + if (bkey_extent_is_unwritten(k)) 105 + return false; 106 + 107 + if (bch2_target_congested(c, opts.promote_target)) { 108 + /* XXX trace this */ 109 + return false; 110 + } 111 + 112 + if (rhashtable_lookup_fast(&c->promote_table, &pos, 113 + bch_promote_params)) 114 + return false; 115 + 116 + return true; 117 + } 118 + 119 + static void promote_free(struct bch_fs *c, struct promote_op *op) 120 + { 121 + int ret; 122 + 123 + bch2_data_update_exit(&op->write); 124 + 125 + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, 126 + bch_promote_params); 127 + BUG_ON(ret); 128 + bch2_write_ref_put(c, BCH_WRITE_REF_promote); 129 + kfree_rcu(op, rcu); 130 + } 131 + 132 + static void promote_done(struct bch_write_op *wop) 133 + { 134 + struct promote_op *op = 135 + container_of(wop, struct promote_op, write.op); 136 + struct bch_fs *c = op->write.op.c; 137 + 138 + bch2_time_stats_update(&c->times[BCH_TIME_data_promote], 139 + op->start_time); 140 + promote_free(c, op); 141 + } 142 + 143 + static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) 144 + { 145 + struct bio *bio = &op->write.op.wbio.bio; 146 + 147 + trace_and_count(op->write.op.c, read_promote, &rbio->bio); 148 + 149 + /* we now own pages: */ 150 + BUG_ON(!rbio->bounce); 151 + BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); 152 + 153 + memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, 154 + sizeof(struct bio_vec) * rbio->bio.bi_vcnt); 155 + swap(bio->bi_vcnt, rbio->bio.bi_vcnt); 156 + 157 + bch2_data_update_read_done(&op->write, rbio->pick.crc); 158 + } 159 + 160 + static struct promote_op *__promote_alloc(struct btree_trans *trans, 161 + enum btree_id btree_id, 162 + struct bkey_s_c k, 163 + struct bpos pos, 164 + struct extent_ptr_decoded *pick, 165 + struct bch_io_opts opts, 166 + unsigned sectors, 167 + struct bch_read_bio **rbio) 168 + { 169 + struct bch_fs *c = trans->c; 170 + struct promote_op *op = NULL; 171 + struct bio *bio; 172 + unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); 173 + int ret; 174 + 175 + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) 176 + return NULL; 177 + 178 + op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); 179 + if (!op) 180 + goto err; 181 + 182 + op->start_time = local_clock(); 183 + op->pos = pos; 184 + 185 + /* 186 + * We don't use the mempool here because extents that aren't 187 + * checksummed or compressed can be too big for the mempool: 188 + */ 189 + *rbio = kzalloc(sizeof(struct bch_read_bio) + 190 + sizeof(struct bio_vec) * pages, 191 + GFP_NOFS); 192 + if (!*rbio) 193 + goto err; 194 + 195 + rbio_init(&(*rbio)->bio, opts); 196 + bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); 197 + 198 + if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, 199 + GFP_NOFS)) 200 + goto err; 201 + 202 + (*rbio)->bounce = true; 203 + (*rbio)->split = true; 204 + (*rbio)->kmalloc = true; 205 + 206 + if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, 207 + bch_promote_params)) 208 + goto err; 209 + 210 + bio = &op->write.op.wbio.bio; 211 + bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); 212 + 213 + ret = bch2_data_update_init(trans, NULL, &op->write, 214 + writepoint_hashed((unsigned long) current), 215 + opts, 216 + (struct data_update_opts) { 217 + .target = opts.promote_target, 218 + .extra_replicas = 1, 219 + .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, 220 + }, 221 + btree_id, k); 222 + /* 223 + * possible errors: -BCH_ERR_nocow_lock_blocked, 224 + * -BCH_ERR_ENOSPC_disk_reservation: 225 + */ 226 + if (ret) { 227 + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, 228 + bch_promote_params); 229 + BUG_ON(ret); 230 + goto err; 231 + } 232 + 233 + op->write.op.end_io = promote_done; 234 + 235 + return op; 236 + err: 237 + if (*rbio) 238 + bio_free_pages(&(*rbio)->bio); 239 + kfree(*rbio); 240 + *rbio = NULL; 241 + kfree(op); 242 + bch2_write_ref_put(c, BCH_WRITE_REF_promote); 243 + return NULL; 244 + } 245 + 246 + noinline 247 + static struct promote_op *promote_alloc(struct btree_trans *trans, 248 + struct bvec_iter iter, 249 + struct bkey_s_c k, 250 + struct extent_ptr_decoded *pick, 251 + struct bch_io_opts opts, 252 + unsigned flags, 253 + struct bch_read_bio **rbio, 254 + bool *bounce, 255 + bool *read_full) 256 + { 257 + struct bch_fs *c = trans->c; 258 + bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); 259 + /* data might have to be decompressed in the write path: */ 260 + unsigned sectors = promote_full 261 + ? max(pick->crc.compressed_size, pick->crc.live_size) 262 + : bvec_iter_sectors(iter); 263 + struct bpos pos = promote_full 264 + ? bkey_start_pos(k.k) 265 + : POS(k.k->p.inode, iter.bi_sector); 266 + struct promote_op *promote; 267 + 268 + if (!should_promote(c, k, pos, opts, flags)) 269 + return NULL; 270 + 271 + promote = __promote_alloc(trans, 272 + k.k->type == KEY_TYPE_reflink_v 273 + ? BTREE_ID_reflink 274 + : BTREE_ID_extents, 275 + k, pos, pick, opts, sectors, rbio); 276 + if (!promote) 277 + return NULL; 278 + 279 + *bounce = true; 280 + *read_full = promote_full; 281 + return promote; 282 + } 283 + 284 + /* Read */ 285 + 286 + #define READ_RETRY_AVOID 1 287 + #define READ_RETRY 2 288 + #define READ_ERR 3 289 + 290 + enum rbio_context { 291 + RBIO_CONTEXT_NULL, 292 + RBIO_CONTEXT_HIGHPRI, 293 + RBIO_CONTEXT_UNBOUND, 294 + }; 295 + 296 + static inline struct bch_read_bio * 297 + bch2_rbio_parent(struct bch_read_bio *rbio) 298 + { 299 + return rbio->split ? rbio->parent : rbio; 300 + } 301 + 302 + __always_inline 303 + static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn, 304 + enum rbio_context context, 305 + struct workqueue_struct *wq) 306 + { 307 + if (context <= rbio->context) { 308 + fn(&rbio->work); 309 + } else { 310 + rbio->work.func = fn; 311 + rbio->context = context; 312 + queue_work(wq, &rbio->work); 313 + } 314 + } 315 + 316 + static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) 317 + { 318 + BUG_ON(rbio->bounce && !rbio->split); 319 + 320 + if (rbio->promote) 321 + promote_free(rbio->c, rbio->promote); 322 + rbio->promote = NULL; 323 + 324 + if (rbio->bounce) 325 + bch2_bio_free_pages_pool(rbio->c, &rbio->bio); 326 + 327 + if (rbio->split) { 328 + struct bch_read_bio *parent = rbio->parent; 329 + 330 + if (rbio->kmalloc) 331 + kfree(rbio); 332 + else 333 + bio_put(&rbio->bio); 334 + 335 + rbio = parent; 336 + } 337 + 338 + return rbio; 339 + } 340 + 341 + /* 342 + * Only called on a top level bch_read_bio to complete an entire read request, 343 + * not a split: 344 + */ 345 + static void bch2_rbio_done(struct bch_read_bio *rbio) 346 + { 347 + if (rbio->start_time) 348 + bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], 349 + rbio->start_time); 350 + bio_endio(&rbio->bio); 351 + } 352 + 353 + static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, 354 + struct bvec_iter bvec_iter, 355 + struct bch_io_failures *failed, 356 + unsigned flags) 357 + { 358 + struct btree_trans trans; 359 + struct btree_iter iter; 360 + struct bkey_buf sk; 361 + struct bkey_s_c k; 362 + int ret; 363 + 364 + flags &= ~BCH_READ_LAST_FRAGMENT; 365 + flags |= BCH_READ_MUST_CLONE; 366 + 367 + bch2_bkey_buf_init(&sk); 368 + bch2_trans_init(&trans, c, 0, 0); 369 + 370 + bch2_trans_iter_init(&trans, &iter, rbio->data_btree, 371 + rbio->read_pos, BTREE_ITER_SLOTS); 372 + retry: 373 + rbio->bio.bi_status = 0; 374 + 375 + k = bch2_btree_iter_peek_slot(&iter); 376 + if (bkey_err(k)) 377 + goto err; 378 + 379 + bch2_bkey_buf_reassemble(&sk, c, k); 380 + k = bkey_i_to_s_c(sk.k); 381 + bch2_trans_unlock(&trans); 382 + 383 + if (!bch2_bkey_matches_ptr(c, k, 384 + rbio->pick.ptr, 385 + rbio->data_pos.offset - 386 + rbio->pick.crc.offset)) { 387 + /* extent we wanted to read no longer exists: */ 388 + rbio->hole = true; 389 + goto out; 390 + } 391 + 392 + ret = __bch2_read_extent(&trans, rbio, bvec_iter, 393 + rbio->read_pos, 394 + rbio->data_btree, 395 + k, 0, failed, flags); 396 + if (ret == READ_RETRY) 397 + goto retry; 398 + if (ret) 399 + goto err; 400 + out: 401 + bch2_rbio_done(rbio); 402 + bch2_trans_iter_exit(&trans, &iter); 403 + bch2_trans_exit(&trans); 404 + bch2_bkey_buf_exit(&sk, c); 405 + return; 406 + err: 407 + rbio->bio.bi_status = BLK_STS_IOERR; 408 + goto out; 409 + } 410 + 411 + static void bch2_rbio_retry(struct work_struct *work) 412 + { 413 + struct bch_read_bio *rbio = 414 + container_of(work, struct bch_read_bio, work); 415 + struct bch_fs *c = rbio->c; 416 + struct bvec_iter iter = rbio->bvec_iter; 417 + unsigned flags = rbio->flags; 418 + subvol_inum inum = { 419 + .subvol = rbio->subvol, 420 + .inum = rbio->read_pos.inode, 421 + }; 422 + struct bch_io_failures failed = { .nr = 0 }; 423 + 424 + trace_and_count(c, read_retry, &rbio->bio); 425 + 426 + if (rbio->retry == READ_RETRY_AVOID) 427 + bch2_mark_io_failure(&failed, &rbio->pick); 428 + 429 + rbio->bio.bi_status = 0; 430 + 431 + rbio = bch2_rbio_free(rbio); 432 + 433 + flags |= BCH_READ_IN_RETRY; 434 + flags &= ~BCH_READ_MAY_PROMOTE; 435 + 436 + if (flags & BCH_READ_NODECODE) { 437 + bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); 438 + } else { 439 + flags &= ~BCH_READ_LAST_FRAGMENT; 440 + flags |= BCH_READ_MUST_CLONE; 441 + 442 + __bch2_read(c, rbio, iter, inum, &failed, flags); 443 + } 444 + } 445 + 446 + static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, 447 + blk_status_t error) 448 + { 449 + rbio->retry = retry; 450 + 451 + if (rbio->flags & BCH_READ_IN_RETRY) 452 + return; 453 + 454 + if (retry == READ_ERR) { 455 + rbio = bch2_rbio_free(rbio); 456 + 457 + rbio->bio.bi_status = error; 458 + bch2_rbio_done(rbio); 459 + } else { 460 + bch2_rbio_punt(rbio, bch2_rbio_retry, 461 + RBIO_CONTEXT_UNBOUND, system_unbound_wq); 462 + } 463 + } 464 + 465 + static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, 466 + struct bch_read_bio *rbio) 467 + { 468 + struct bch_fs *c = rbio->c; 469 + u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; 470 + struct bch_extent_crc_unpacked new_crc; 471 + struct btree_iter iter; 472 + struct bkey_i *new; 473 + struct bkey_s_c k; 474 + int ret = 0; 475 + 476 + if (crc_is_compressed(rbio->pick.crc)) 477 + return 0; 478 + 479 + k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, 480 + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 481 + if ((ret = bkey_err(k))) 482 + goto out; 483 + 484 + if (bversion_cmp(k.k->version, rbio->version) || 485 + !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) 486 + goto out; 487 + 488 + /* Extent was merged? */ 489 + if (bkey_start_offset(k.k) < data_offset || 490 + k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) 491 + goto out; 492 + 493 + if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, 494 + rbio->pick.crc, NULL, &new_crc, 495 + bkey_start_offset(k.k) - data_offset, k.k->size, 496 + rbio->pick.crc.csum_type)) { 497 + bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); 498 + ret = 0; 499 + goto out; 500 + } 501 + 502 + /* 503 + * going to be temporarily appending another checksum entry: 504 + */ 505 + new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 506 + sizeof(struct bch_extent_crc128)); 507 + if ((ret = PTR_ERR_OR_ZERO(new))) 508 + goto out; 509 + 510 + bkey_reassemble(new, k); 511 + 512 + if (!bch2_bkey_narrow_crcs(new, new_crc)) 513 + goto out; 514 + 515 + ret = bch2_trans_update(trans, &iter, new, 516 + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 517 + out: 518 + bch2_trans_iter_exit(trans, &iter); 519 + return ret; 520 + } 521 + 522 + static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) 523 + { 524 + bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, 525 + __bch2_rbio_narrow_crcs(&trans, rbio)); 526 + } 527 + 528 + /* Inner part that may run in process context */ 529 + static void __bch2_read_endio(struct work_struct *work) 530 + { 531 + struct bch_read_bio *rbio = 532 + container_of(work, struct bch_read_bio, work); 533 + struct bch_fs *c = rbio->c; 534 + struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); 535 + struct bio *src = &rbio->bio; 536 + struct bio *dst = &bch2_rbio_parent(rbio)->bio; 537 + struct bvec_iter dst_iter = rbio->bvec_iter; 538 + struct bch_extent_crc_unpacked crc = rbio->pick.crc; 539 + struct nonce nonce = extent_nonce(rbio->version, crc); 540 + unsigned nofs_flags; 541 + struct bch_csum csum; 542 + int ret; 543 + 544 + nofs_flags = memalloc_nofs_save(); 545 + 546 + /* Reset iterator for checksumming and copying bounced data: */ 547 + if (rbio->bounce) { 548 + src->bi_iter.bi_size = crc.compressed_size << 9; 549 + src->bi_iter.bi_idx = 0; 550 + src->bi_iter.bi_bvec_done = 0; 551 + } else { 552 + src->bi_iter = rbio->bvec_iter; 553 + } 554 + 555 + csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); 556 + if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io) 557 + goto csum_err; 558 + 559 + /* 560 + * XXX 561 + * We need to rework the narrow_crcs path to deliver the read completion 562 + * first, and then punt to a different workqueue, otherwise we're 563 + * holding up reads while doing btree updates which is bad for memory 564 + * reclaim. 565 + */ 566 + if (unlikely(rbio->narrow_crcs)) 567 + bch2_rbio_narrow_crcs(rbio); 568 + 569 + if (rbio->flags & BCH_READ_NODECODE) 570 + goto nodecode; 571 + 572 + /* Adjust crc to point to subset of data we want: */ 573 + crc.offset += rbio->offset_into_extent; 574 + crc.live_size = bvec_iter_sectors(rbio->bvec_iter); 575 + 576 + if (crc_is_compressed(crc)) { 577 + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 578 + if (ret) 579 + goto decrypt_err; 580 + 581 + if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && 582 + !c->opts.no_data_io) 583 + goto decompression_err; 584 + } else { 585 + /* don't need to decrypt the entire bio: */ 586 + nonce = nonce_add(nonce, crc.offset << 9); 587 + bio_advance(src, crc.offset << 9); 588 + 589 + BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); 590 + src->bi_iter.bi_size = dst_iter.bi_size; 591 + 592 + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 593 + if (ret) 594 + goto decrypt_err; 595 + 596 + if (rbio->bounce) { 597 + struct bvec_iter src_iter = src->bi_iter; 598 + 599 + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); 600 + } 601 + } 602 + 603 + if (rbio->promote) { 604 + /* 605 + * Re encrypt data we decrypted, so it's consistent with 606 + * rbio->crc: 607 + */ 608 + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 609 + if (ret) 610 + goto decrypt_err; 611 + 612 + promote_start(rbio->promote, rbio); 613 + rbio->promote = NULL; 614 + } 615 + nodecode: 616 + if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { 617 + rbio = bch2_rbio_free(rbio); 618 + bch2_rbio_done(rbio); 619 + } 620 + out: 621 + memalloc_nofs_restore(nofs_flags); 622 + return; 623 + csum_err: 624 + /* 625 + * Checksum error: if the bio wasn't bounced, we may have been 626 + * reading into buffers owned by userspace (that userspace can 627 + * scribble over) - retry the read, bouncing it this time: 628 + */ 629 + if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { 630 + rbio->flags |= BCH_READ_MUST_BOUNCE; 631 + bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); 632 + goto out; 633 + } 634 + 635 + bch_err_inum_offset_ratelimited(ca, 636 + rbio->read_pos.inode, 637 + rbio->read_pos.offset << 9, 638 + "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", 639 + rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, 640 + csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); 641 + bch2_io_error(ca); 642 + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 643 + goto out; 644 + decompression_err: 645 + bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, 646 + rbio->read_pos.offset << 9, 647 + "decompression error"); 648 + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); 649 + goto out; 650 + decrypt_err: 651 + bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, 652 + rbio->read_pos.offset << 9, 653 + "decrypt error"); 654 + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); 655 + goto out; 656 + } 657 + 658 + static void bch2_read_endio(struct bio *bio) 659 + { 660 + struct bch_read_bio *rbio = 661 + container_of(bio, struct bch_read_bio, bio); 662 + struct bch_fs *c = rbio->c; 663 + struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); 664 + struct workqueue_struct *wq = NULL; 665 + enum rbio_context context = RBIO_CONTEXT_NULL; 666 + 667 + if (rbio->have_ioref) { 668 + bch2_latency_acct(ca, rbio->submit_time, READ); 669 + percpu_ref_put(&ca->io_ref); 670 + } 671 + 672 + if (!rbio->split) 673 + rbio->bio.bi_end_io = rbio->end_io; 674 + 675 + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, 676 + rbio->read_pos.inode, 677 + rbio->read_pos.offset, 678 + "data read error: %s", 679 + bch2_blk_status_to_str(bio->bi_status))) { 680 + bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); 681 + return; 682 + } 683 + 684 + if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || 685 + ptr_stale(ca, &rbio->pick.ptr)) { 686 + trace_and_count(c, read_reuse_race, &rbio->bio); 687 + 688 + if (rbio->flags & BCH_READ_RETRY_IF_STALE) 689 + bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); 690 + else 691 + bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); 692 + return; 693 + } 694 + 695 + if (rbio->narrow_crcs || 696 + rbio->promote || 697 + crc_is_compressed(rbio->pick.crc) || 698 + bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) 699 + context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; 700 + else if (rbio->pick.crc.csum_type) 701 + context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; 702 + 703 + bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); 704 + } 705 + 706 + int __bch2_read_indirect_extent(struct btree_trans *trans, 707 + unsigned *offset_into_extent, 708 + struct bkey_buf *orig_k) 709 + { 710 + struct btree_iter iter; 711 + struct bkey_s_c k; 712 + u64 reflink_offset; 713 + int ret; 714 + 715 + reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + 716 + *offset_into_extent; 717 + 718 + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, 719 + POS(0, reflink_offset), 0); 720 + ret = bkey_err(k); 721 + if (ret) 722 + goto err; 723 + 724 + if (k.k->type != KEY_TYPE_reflink_v && 725 + k.k->type != KEY_TYPE_indirect_inline_data) { 726 + bch_err_inum_offset_ratelimited(trans->c, 727 + orig_k->k->k.p.inode, 728 + orig_k->k->k.p.offset << 9, 729 + "%llu len %u points to nonexistent indirect extent %llu", 730 + orig_k->k->k.p.offset, 731 + orig_k->k->k.size, 732 + reflink_offset); 733 + bch2_inconsistent_error(trans->c); 734 + ret = -EIO; 735 + goto err; 736 + } 737 + 738 + *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); 739 + bch2_bkey_buf_reassemble(orig_k, trans->c, k); 740 + err: 741 + bch2_trans_iter_exit(trans, &iter); 742 + return ret; 743 + } 744 + 745 + static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, 746 + struct bkey_s_c k, 747 + struct bch_extent_ptr ptr) 748 + { 749 + struct bch_fs *c = trans->c; 750 + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); 751 + struct btree_iter iter; 752 + struct printbuf buf = PRINTBUF; 753 + int ret; 754 + 755 + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, 756 + PTR_BUCKET_POS(c, &ptr), 757 + BTREE_ITER_CACHED); 758 + 759 + prt_printf(&buf, "Attempting to read from stale dirty pointer:"); 760 + printbuf_indent_add(&buf, 2); 761 + prt_newline(&buf); 762 + 763 + bch2_bkey_val_to_text(&buf, c, k); 764 + prt_newline(&buf); 765 + 766 + prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); 767 + 768 + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); 769 + if (!ret) { 770 + prt_newline(&buf); 771 + bch2_bkey_val_to_text(&buf, c, k); 772 + } 773 + 774 + bch2_fs_inconsistent(c, "%s", buf.buf); 775 + 776 + bch2_trans_iter_exit(trans, &iter); 777 + printbuf_exit(&buf); 778 + } 779 + 780 + int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, 781 + struct bvec_iter iter, struct bpos read_pos, 782 + enum btree_id data_btree, struct bkey_s_c k, 783 + unsigned offset_into_extent, 784 + struct bch_io_failures *failed, unsigned flags) 785 + { 786 + struct bch_fs *c = trans->c; 787 + struct extent_ptr_decoded pick; 788 + struct bch_read_bio *rbio = NULL; 789 + struct bch_dev *ca = NULL; 790 + struct promote_op *promote = NULL; 791 + bool bounce = false, read_full = false, narrow_crcs = false; 792 + struct bpos data_pos = bkey_start_pos(k.k); 793 + int pick_ret; 794 + 795 + if (bkey_extent_is_inline_data(k.k)) { 796 + unsigned bytes = min_t(unsigned, iter.bi_size, 797 + bkey_inline_data_bytes(k.k)); 798 + 799 + swap(iter.bi_size, bytes); 800 + memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); 801 + swap(iter.bi_size, bytes); 802 + bio_advance_iter(&orig->bio, &iter, bytes); 803 + zero_fill_bio_iter(&orig->bio, iter); 804 + goto out_read_done; 805 + } 806 + retry_pick: 807 + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); 808 + 809 + /* hole or reservation - just zero fill: */ 810 + if (!pick_ret) 811 + goto hole; 812 + 813 + if (pick_ret < 0) { 814 + bch_err_inum_offset_ratelimited(c, 815 + read_pos.inode, read_pos.offset << 9, 816 + "no device to read from"); 817 + goto err; 818 + } 819 + 820 + ca = bch_dev_bkey_exists(c, pick.ptr.dev); 821 + 822 + /* 823 + * Stale dirty pointers are treated as IO errors, but @failed isn't 824 + * allocated unless we're in the retry path - so if we're not in the 825 + * retry path, don't check here, it'll be caught in bch2_read_endio() 826 + * and we'll end up in the retry path: 827 + */ 828 + if ((flags & BCH_READ_IN_RETRY) && 829 + !pick.ptr.cached && 830 + unlikely(ptr_stale(ca, &pick.ptr))) { 831 + read_from_stale_dirty_pointer(trans, k, pick.ptr); 832 + bch2_mark_io_failure(failed, &pick); 833 + goto retry_pick; 834 + } 835 + 836 + /* 837 + * Unlock the iterator while the btree node's lock is still in 838 + * cache, before doing the IO: 839 + */ 840 + bch2_trans_unlock(trans); 841 + 842 + if (flags & BCH_READ_NODECODE) { 843 + /* 844 + * can happen if we retry, and the extent we were going to read 845 + * has been merged in the meantime: 846 + */ 847 + if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) 848 + goto hole; 849 + 850 + iter.bi_size = pick.crc.compressed_size << 9; 851 + goto get_bio; 852 + } 853 + 854 + if (!(flags & BCH_READ_LAST_FRAGMENT) || 855 + bio_flagged(&orig->bio, BIO_CHAIN)) 856 + flags |= BCH_READ_MUST_CLONE; 857 + 858 + narrow_crcs = !(flags & BCH_READ_IN_RETRY) && 859 + bch2_can_narrow_extent_crcs(k, pick.crc); 860 + 861 + if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) 862 + flags |= BCH_READ_MUST_BOUNCE; 863 + 864 + EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); 865 + 866 + if (crc_is_compressed(pick.crc) || 867 + (pick.crc.csum_type != BCH_CSUM_none && 868 + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || 869 + (bch2_csum_type_is_encryption(pick.crc.csum_type) && 870 + (flags & BCH_READ_USER_MAPPED)) || 871 + (flags & BCH_READ_MUST_BOUNCE)))) { 872 + read_full = true; 873 + bounce = true; 874 + } 875 + 876 + if (orig->opts.promote_target) 877 + promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, 878 + &rbio, &bounce, &read_full); 879 + 880 + if (!read_full) { 881 + EBUG_ON(crc_is_compressed(pick.crc)); 882 + EBUG_ON(pick.crc.csum_type && 883 + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || 884 + bvec_iter_sectors(iter) != pick.crc.live_size || 885 + pick.crc.offset || 886 + offset_into_extent)); 887 + 888 + data_pos.offset += offset_into_extent; 889 + pick.ptr.offset += pick.crc.offset + 890 + offset_into_extent; 891 + offset_into_extent = 0; 892 + pick.crc.compressed_size = bvec_iter_sectors(iter); 893 + pick.crc.uncompressed_size = bvec_iter_sectors(iter); 894 + pick.crc.offset = 0; 895 + pick.crc.live_size = bvec_iter_sectors(iter); 896 + offset_into_extent = 0; 897 + } 898 + get_bio: 899 + if (rbio) { 900 + /* 901 + * promote already allocated bounce rbio: 902 + * promote needs to allocate a bio big enough for uncompressing 903 + * data in the write path, but we're not going to use it all 904 + * here: 905 + */ 906 + EBUG_ON(rbio->bio.bi_iter.bi_size < 907 + pick.crc.compressed_size << 9); 908 + rbio->bio.bi_iter.bi_size = 909 + pick.crc.compressed_size << 9; 910 + } else if (bounce) { 911 + unsigned sectors = pick.crc.compressed_size; 912 + 913 + rbio = rbio_init(bio_alloc_bioset(NULL, 914 + DIV_ROUND_UP(sectors, PAGE_SECTORS), 915 + 0, 916 + GFP_NOFS, 917 + &c->bio_read_split), 918 + orig->opts); 919 + 920 + bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); 921 + rbio->bounce = true; 922 + rbio->split = true; 923 + } else if (flags & BCH_READ_MUST_CLONE) { 924 + /* 925 + * Have to clone if there were any splits, due to error 926 + * reporting issues (if a split errored, and retrying didn't 927 + * work, when it reports the error to its parent (us) we don't 928 + * know if the error was from our bio, and we should retry, or 929 + * from the whole bio, in which case we don't want to retry and 930 + * lose the error) 931 + */ 932 + rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, 933 + &c->bio_read_split), 934 + orig->opts); 935 + rbio->bio.bi_iter = iter; 936 + rbio->split = true; 937 + } else { 938 + rbio = orig; 939 + rbio->bio.bi_iter = iter; 940 + EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); 941 + } 942 + 943 + EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); 944 + 945 + rbio->c = c; 946 + rbio->submit_time = local_clock(); 947 + if (rbio->split) 948 + rbio->parent = orig; 949 + else 950 + rbio->end_io = orig->bio.bi_end_io; 951 + rbio->bvec_iter = iter; 952 + rbio->offset_into_extent= offset_into_extent; 953 + rbio->flags = flags; 954 + rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); 955 + rbio->narrow_crcs = narrow_crcs; 956 + rbio->hole = 0; 957 + rbio->retry = 0; 958 + rbio->context = 0; 959 + /* XXX: only initialize this if needed */ 960 + rbio->devs_have = bch2_bkey_devs(k); 961 + rbio->pick = pick; 962 + rbio->subvol = orig->subvol; 963 + rbio->read_pos = read_pos; 964 + rbio->data_btree = data_btree; 965 + rbio->data_pos = data_pos; 966 + rbio->version = k.k->version; 967 + rbio->promote = promote; 968 + INIT_WORK(&rbio->work, NULL); 969 + 970 + rbio->bio.bi_opf = orig->bio.bi_opf; 971 + rbio->bio.bi_iter.bi_sector = pick.ptr.offset; 972 + rbio->bio.bi_end_io = bch2_read_endio; 973 + 974 + if (rbio->bounce) 975 + trace_and_count(c, read_bounce, &rbio->bio); 976 + 977 + this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); 978 + bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); 979 + 980 + /* 981 + * If it's being moved internally, we don't want to flag it as a cache 982 + * hit: 983 + */ 984 + if (pick.ptr.cached && !(flags & BCH_READ_NODECODE)) 985 + bch2_bucket_io_time_reset(trans, pick.ptr.dev, 986 + PTR_BUCKET_NR(ca, &pick.ptr), READ); 987 + 988 + if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { 989 + bio_inc_remaining(&orig->bio); 990 + trace_and_count(c, read_split, &orig->bio); 991 + } 992 + 993 + if (!rbio->pick.idx) { 994 + if (!rbio->have_ioref) { 995 + bch_err_inum_offset_ratelimited(c, 996 + read_pos.inode, 997 + read_pos.offset << 9, 998 + "no device to read from"); 999 + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 1000 + goto out; 1001 + } 1002 + 1003 + this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user], 1004 + bio_sectors(&rbio->bio)); 1005 + bio_set_dev(&rbio->bio, ca->disk_sb.bdev); 1006 + 1007 + if (unlikely(c->opts.no_data_io)) { 1008 + if (likely(!(flags & BCH_READ_IN_RETRY))) 1009 + bio_endio(&rbio->bio); 1010 + } else { 1011 + if (likely(!(flags & BCH_READ_IN_RETRY))) 1012 + submit_bio(&rbio->bio); 1013 + else 1014 + submit_bio_wait(&rbio->bio); 1015 + } 1016 + 1017 + /* 1018 + * We just submitted IO which may block, we expect relock fail 1019 + * events and shouldn't count them: 1020 + */ 1021 + trans->notrace_relock_fail = true; 1022 + } else { 1023 + /* Attempting reconstruct read: */ 1024 + if (bch2_ec_read_extent(c, rbio)) { 1025 + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 1026 + goto out; 1027 + } 1028 + 1029 + if (likely(!(flags & BCH_READ_IN_RETRY))) 1030 + bio_endio(&rbio->bio); 1031 + } 1032 + out: 1033 + if (likely(!(flags & BCH_READ_IN_RETRY))) { 1034 + return 0; 1035 + } else { 1036 + int ret; 1037 + 1038 + rbio->context = RBIO_CONTEXT_UNBOUND; 1039 + bch2_read_endio(&rbio->bio); 1040 + 1041 + ret = rbio->retry; 1042 + rbio = bch2_rbio_free(rbio); 1043 + 1044 + if (ret == READ_RETRY_AVOID) { 1045 + bch2_mark_io_failure(failed, &pick); 1046 + ret = READ_RETRY; 1047 + } 1048 + 1049 + if (!ret) 1050 + goto out_read_done; 1051 + 1052 + return ret; 1053 + } 1054 + 1055 + err: 1056 + if (flags & BCH_READ_IN_RETRY) 1057 + return READ_ERR; 1058 + 1059 + orig->bio.bi_status = BLK_STS_IOERR; 1060 + goto out_read_done; 1061 + 1062 + hole: 1063 + /* 1064 + * won't normally happen in the BCH_READ_NODECODE 1065 + * (bch2_move_extent()) path, but if we retry and the extent we wanted 1066 + * to read no longer exists we have to signal that: 1067 + */ 1068 + if (flags & BCH_READ_NODECODE) 1069 + orig->hole = true; 1070 + 1071 + zero_fill_bio_iter(&orig->bio, iter); 1072 + out_read_done: 1073 + if (flags & BCH_READ_LAST_FRAGMENT) 1074 + bch2_rbio_done(orig); 1075 + return 0; 1076 + } 1077 + 1078 + void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, 1079 + struct bvec_iter bvec_iter, subvol_inum inum, 1080 + struct bch_io_failures *failed, unsigned flags) 1081 + { 1082 + struct btree_trans trans; 1083 + struct btree_iter iter; 1084 + struct bkey_buf sk; 1085 + struct bkey_s_c k; 1086 + u32 snapshot; 1087 + int ret; 1088 + 1089 + BUG_ON(flags & BCH_READ_NODECODE); 1090 + 1091 + bch2_bkey_buf_init(&sk); 1092 + bch2_trans_init(&trans, c, 0, 0); 1093 + retry: 1094 + bch2_trans_begin(&trans); 1095 + iter = (struct btree_iter) { NULL }; 1096 + 1097 + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 1098 + if (ret) 1099 + goto err; 1100 + 1101 + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, 1102 + SPOS(inum.inum, bvec_iter.bi_sector, snapshot), 1103 + BTREE_ITER_SLOTS); 1104 + while (1) { 1105 + unsigned bytes, sectors, offset_into_extent; 1106 + enum btree_id data_btree = BTREE_ID_extents; 1107 + 1108 + /* 1109 + * read_extent -> io_time_reset may cause a transaction restart 1110 + * without returning an error, we need to check for that here: 1111 + */ 1112 + ret = bch2_trans_relock(&trans); 1113 + if (ret) 1114 + break; 1115 + 1116 + bch2_btree_iter_set_pos(&iter, 1117 + POS(inum.inum, bvec_iter.bi_sector)); 1118 + 1119 + k = bch2_btree_iter_peek_slot(&iter); 1120 + ret = bkey_err(k); 1121 + if (ret) 1122 + break; 1123 + 1124 + offset_into_extent = iter.pos.offset - 1125 + bkey_start_offset(k.k); 1126 + sectors = k.k->size - offset_into_extent; 1127 + 1128 + bch2_bkey_buf_reassemble(&sk, c, k); 1129 + 1130 + ret = bch2_read_indirect_extent(&trans, &data_btree, 1131 + &offset_into_extent, &sk); 1132 + if (ret) 1133 + break; 1134 + 1135 + k = bkey_i_to_s_c(sk.k); 1136 + 1137 + /* 1138 + * With indirect extents, the amount of data to read is the min 1139 + * of the original extent and the indirect extent: 1140 + */ 1141 + sectors = min(sectors, k.k->size - offset_into_extent); 1142 + 1143 + bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; 1144 + swap(bvec_iter.bi_size, bytes); 1145 + 1146 + if (bvec_iter.bi_size == bytes) 1147 + flags |= BCH_READ_LAST_FRAGMENT; 1148 + 1149 + ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, 1150 + data_btree, k, 1151 + offset_into_extent, failed, flags); 1152 + if (ret) 1153 + break; 1154 + 1155 + if (flags & BCH_READ_LAST_FRAGMENT) 1156 + break; 1157 + 1158 + swap(bvec_iter.bi_size, bytes); 1159 + bio_advance_iter(&rbio->bio, &bvec_iter, bytes); 1160 + 1161 + ret = btree_trans_too_many_iters(&trans); 1162 + if (ret) 1163 + break; 1164 + } 1165 + err: 1166 + bch2_trans_iter_exit(&trans, &iter); 1167 + 1168 + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || 1169 + ret == READ_RETRY || 1170 + ret == READ_RETRY_AVOID) 1171 + goto retry; 1172 + 1173 + bch2_trans_exit(&trans); 1174 + bch2_bkey_buf_exit(&sk, c); 1175 + 1176 + if (ret) { 1177 + bch_err_inum_offset_ratelimited(c, inum.inum, 1178 + bvec_iter.bi_sector << 9, 1179 + "read error %i from btree lookup", ret); 1180 + rbio->bio.bi_status = BLK_STS_IOERR; 1181 + bch2_rbio_done(rbio); 1182 + } 1183 + } 1184 + 1185 + void bch2_fs_io_read_exit(struct bch_fs *c) 1186 + { 1187 + if (c->promote_table.tbl) 1188 + rhashtable_destroy(&c->promote_table); 1189 + bioset_exit(&c->bio_read_split); 1190 + bioset_exit(&c->bio_read); 1191 + } 1192 + 1193 + int bch2_fs_io_read_init(struct bch_fs *c) 1194 + { 1195 + if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), 1196 + BIOSET_NEED_BVECS)) 1197 + return -BCH_ERR_ENOMEM_bio_read_init; 1198 + 1199 + if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), 1200 + BIOSET_NEED_BVECS)) 1201 + return -BCH_ERR_ENOMEM_bio_read_split_init; 1202 + 1203 + if (rhashtable_init(&c->promote_table, &bch_promote_params)) 1204 + return -BCH_ERR_ENOMEM_promote_table_init; 1205 + 1206 + return 0; 1207 + }
+158
fs/bcachefs/io_read.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_IO_READ_H 3 + #define _BCACHEFS_IO_READ_H 4 + 5 + #include "bkey_buf.h" 6 + 7 + struct bch_read_bio { 8 + struct bch_fs *c; 9 + u64 start_time; 10 + u64 submit_time; 11 + 12 + /* 13 + * Reads will often have to be split, and if the extent being read from 14 + * was checksummed or compressed we'll also have to allocate bounce 15 + * buffers and copy the data back into the original bio. 16 + * 17 + * If we didn't have to split, we have to save and restore the original 18 + * bi_end_io - @split below indicates which: 19 + */ 20 + union { 21 + struct bch_read_bio *parent; 22 + bio_end_io_t *end_io; 23 + }; 24 + 25 + /* 26 + * Saved copy of bio->bi_iter, from submission time - allows us to 27 + * resubmit on IO error, and also to copy data back to the original bio 28 + * when we're bouncing: 29 + */ 30 + struct bvec_iter bvec_iter; 31 + 32 + unsigned offset_into_extent; 33 + 34 + u16 flags; 35 + union { 36 + struct { 37 + u16 bounce:1, 38 + split:1, 39 + kmalloc:1, 40 + have_ioref:1, 41 + narrow_crcs:1, 42 + hole:1, 43 + retry:2, 44 + context:2; 45 + }; 46 + u16 _state; 47 + }; 48 + 49 + struct bch_devs_list devs_have; 50 + 51 + struct extent_ptr_decoded pick; 52 + 53 + /* 54 + * pos we read from - different from data_pos for indirect extents: 55 + */ 56 + u32 subvol; 57 + struct bpos read_pos; 58 + 59 + /* 60 + * start pos of data we read (may not be pos of data we want) - for 61 + * promote, narrow extents paths: 62 + */ 63 + enum btree_id data_btree; 64 + struct bpos data_pos; 65 + struct bversion version; 66 + 67 + struct promote_op *promote; 68 + 69 + struct bch_io_opts opts; 70 + 71 + struct work_struct work; 72 + 73 + struct bio bio; 74 + }; 75 + 76 + #define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio) 77 + 78 + struct bch_devs_mask; 79 + struct cache_promote_op; 80 + struct extent_ptr_decoded; 81 + 82 + int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, 83 + struct bkey_buf *); 84 + 85 + static inline int bch2_read_indirect_extent(struct btree_trans *trans, 86 + enum btree_id *data_btree, 87 + unsigned *offset_into_extent, 88 + struct bkey_buf *k) 89 + { 90 + if (k->k->k.type != KEY_TYPE_reflink_p) 91 + return 0; 92 + 93 + *data_btree = BTREE_ID_reflink; 94 + return __bch2_read_indirect_extent(trans, offset_into_extent, k); 95 + } 96 + 97 + enum bch_read_flags { 98 + BCH_READ_RETRY_IF_STALE = 1 << 0, 99 + BCH_READ_MAY_PROMOTE = 1 << 1, 100 + BCH_READ_USER_MAPPED = 1 << 2, 101 + BCH_READ_NODECODE = 1 << 3, 102 + BCH_READ_LAST_FRAGMENT = 1 << 4, 103 + 104 + /* internal: */ 105 + BCH_READ_MUST_BOUNCE = 1 << 5, 106 + BCH_READ_MUST_CLONE = 1 << 6, 107 + BCH_READ_IN_RETRY = 1 << 7, 108 + }; 109 + 110 + int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, 111 + struct bvec_iter, struct bpos, enum btree_id, 112 + struct bkey_s_c, unsigned, 113 + struct bch_io_failures *, unsigned); 114 + 115 + static inline void bch2_read_extent(struct btree_trans *trans, 116 + struct bch_read_bio *rbio, struct bpos read_pos, 117 + enum btree_id data_btree, struct bkey_s_c k, 118 + unsigned offset_into_extent, unsigned flags) 119 + { 120 + __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, 121 + data_btree, k, offset_into_extent, NULL, flags); 122 + } 123 + 124 + void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, 125 + subvol_inum, struct bch_io_failures *, unsigned flags); 126 + 127 + static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, 128 + subvol_inum inum) 129 + { 130 + struct bch_io_failures failed = { .nr = 0 }; 131 + 132 + BUG_ON(rbio->_state); 133 + 134 + rbio->c = c; 135 + rbio->start_time = local_clock(); 136 + rbio->subvol = inum.subvol; 137 + 138 + __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, 139 + BCH_READ_RETRY_IF_STALE| 140 + BCH_READ_MAY_PROMOTE| 141 + BCH_READ_USER_MAPPED); 142 + } 143 + 144 + static inline struct bch_read_bio *rbio_init(struct bio *bio, 145 + struct bch_io_opts opts) 146 + { 147 + struct bch_read_bio *rbio = to_rbio(bio); 148 + 149 + rbio->_state = 0; 150 + rbio->promote = NULL; 151 + rbio->opts = opts; 152 + return rbio; 153 + } 154 + 155 + void bch2_fs_io_read_exit(struct bch_fs *); 156 + int bch2_fs_io_read_init(struct bch_fs *); 157 + 158 + #endif /* _BCACHEFS_IO_READ_H */
+3 -72
fs/bcachefs/io_types.h fs/bcachefs/io_write_types.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _BCACHEFS_IO_TYPES_H 3 - #define _BCACHEFS_IO_TYPES_H 2 + #ifndef _BCACHEFS_IO_WRITE_TYPES_H 3 + #define _BCACHEFS_IO_WRITE_TYPES_H 4 4 5 5 #include "alloc_types.h" 6 6 #include "btree_types.h" ··· 12 12 13 13 #include <linux/llist.h> 14 14 #include <linux/workqueue.h> 15 - 16 - struct bch_read_bio { 17 - struct bch_fs *c; 18 - u64 start_time; 19 - u64 submit_time; 20 - 21 - /* 22 - * Reads will often have to be split, and if the extent being read from 23 - * was checksummed or compressed we'll also have to allocate bounce 24 - * buffers and copy the data back into the original bio. 25 - * 26 - * If we didn't have to split, we have to save and restore the original 27 - * bi_end_io - @split below indicates which: 28 - */ 29 - union { 30 - struct bch_read_bio *parent; 31 - bio_end_io_t *end_io; 32 - }; 33 - 34 - /* 35 - * Saved copy of bio->bi_iter, from submission time - allows us to 36 - * resubmit on IO error, and also to copy data back to the original bio 37 - * when we're bouncing: 38 - */ 39 - struct bvec_iter bvec_iter; 40 - 41 - unsigned offset_into_extent; 42 - 43 - u16 flags; 44 - union { 45 - struct { 46 - u16 bounce:1, 47 - split:1, 48 - kmalloc:1, 49 - have_ioref:1, 50 - narrow_crcs:1, 51 - hole:1, 52 - retry:2, 53 - context:2; 54 - }; 55 - u16 _state; 56 - }; 57 - 58 - struct bch_devs_list devs_have; 59 - 60 - struct extent_ptr_decoded pick; 61 - 62 - /* 63 - * pos we read from - different from data_pos for indirect extents: 64 - */ 65 - u32 subvol; 66 - struct bpos read_pos; 67 - 68 - /* 69 - * start pos of data we read (may not be pos of data we want) - for 70 - * promote, narrow extents paths: 71 - */ 72 - enum btree_id data_btree; 73 - struct bpos data_pos; 74 - struct bversion version; 75 - 76 - struct promote_op *promote; 77 - 78 - struct bch_io_opts opts; 79 - 80 - struct work_struct work; 81 - 82 - struct bio bio; 83 - }; 84 15 85 16 struct bch_write_bio { 86 17 struct_group(wbio, ··· 93 162 struct bch_write_bio wbio; 94 163 }; 95 164 96 - #endif /* _BCACHEFS_IO_TYPES_H */ 165 + #endif /* _BCACHEFS_IO_WRITE_TYPES_H */
+110
fs/bcachefs/io_write.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_IO_WRITE_H 3 + #define _BCACHEFS_IO_WRITE_H 4 + 5 + #include "checksum.h" 6 + #include "io_write_types.h" 7 + 8 + #define to_wbio(_bio) \ 9 + container_of((_bio), struct bch_write_bio, bio) 10 + 11 + void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); 12 + void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); 13 + 14 + #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 15 + void bch2_latency_acct(struct bch_dev *, u64, int); 16 + #else 17 + static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} 18 + #endif 19 + 20 + void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, 21 + enum bch_data_type, const struct bkey_i *, bool); 22 + 23 + #define BCH_WRITE_FLAGS() \ 24 + x(ALLOC_NOWAIT) \ 25 + x(CACHED) \ 26 + x(DATA_ENCODED) \ 27 + x(PAGES_STABLE) \ 28 + x(PAGES_OWNED) \ 29 + x(ONLY_SPECIFIED_DEVS) \ 30 + x(WROTE_DATA_INLINE) \ 31 + x(FROM_INTERNAL) \ 32 + x(CHECK_ENOSPC) \ 33 + x(SYNC) \ 34 + x(MOVE) \ 35 + x(IN_WORKER) \ 36 + x(DONE) \ 37 + x(IO_ERROR) \ 38 + x(CONVERT_UNWRITTEN) 39 + 40 + enum __bch_write_flags { 41 + #define x(f) __BCH_WRITE_##f, 42 + BCH_WRITE_FLAGS() 43 + #undef x 44 + }; 45 + 46 + enum bch_write_flags { 47 + #define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f), 48 + BCH_WRITE_FLAGS() 49 + #undef x 50 + }; 51 + 52 + static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) 53 + { 54 + return op->watermark == BCH_WATERMARK_copygc 55 + ? op->c->copygc_wq 56 + : op->c->btree_update_wq; 57 + } 58 + 59 + int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, 60 + struct bkey_i *, bool *, s64 *, s64 *); 61 + int bch2_extent_update(struct btree_trans *, subvol_inum, 62 + struct btree_iter *, struct bkey_i *, 63 + struct disk_reservation *, u64, s64 *, bool); 64 + 65 + static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, 66 + struct bch_io_opts opts) 67 + { 68 + op->c = c; 69 + op->end_io = NULL; 70 + op->flags = 0; 71 + op->written = 0; 72 + op->error = 0; 73 + op->csum_type = bch2_data_checksum_type(c, opts); 74 + op->compression_opt = opts.compression; 75 + op->nr_replicas = 0; 76 + op->nr_replicas_required = c->opts.data_replicas_required; 77 + op->watermark = BCH_WATERMARK_normal; 78 + op->incompressible = 0; 79 + op->open_buckets.nr = 0; 80 + op->devs_have.nr = 0; 81 + op->target = 0; 82 + op->opts = opts; 83 + op->subvol = 0; 84 + op->pos = POS_MAX; 85 + op->version = ZERO_VERSION; 86 + op->write_point = (struct write_point_specifier) { 0 }; 87 + op->res = (struct disk_reservation) { 0 }; 88 + op->new_i_size = U64_MAX; 89 + op->i_sectors_delta = 0; 90 + op->devs_need_flush = NULL; 91 + } 92 + 93 + void bch2_write(struct closure *); 94 + 95 + void bch2_write_point_do_index_updates(struct work_struct *); 96 + 97 + static inline struct bch_write_bio *wbio_init(struct bio *bio) 98 + { 99 + struct bch_write_bio *wbio = to_wbio(bio); 100 + 101 + memset(&wbio->wbio, 0, sizeof(wbio->wbio)); 102 + return wbio; 103 + } 104 + 105 + void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); 106 + 107 + void bch2_fs_io_write_exit(struct bch_fs *); 108 + int bch2_fs_io_write_init(struct bch_fs *); 109 + 110 + #endif /* _BCACHEFS_IO_WRITE_H */
-1
fs/bcachefs/journal_io.c
··· 8 8 #include "checksum.h" 9 9 #include "disk_groups.h" 10 10 #include "error.h" 11 - #include "io.h" 12 11 #include "journal.h" 13 12 #include "journal_io.h" 14 13 #include "journal_reclaim.h"
+1 -1
fs/bcachefs/migrate.c
··· 10 10 #include "buckets.h" 11 11 #include "errcode.h" 12 12 #include "extents.h" 13 - #include "io.h" 13 + #include "io_write.h" 14 14 #include "journal.h" 15 15 #include "keylist.h" 16 16 #include "migrate.h"
+2 -1
fs/bcachefs/move.c
··· 14 14 #include "errcode.h" 15 15 #include "error.h" 16 16 #include "inode.h" 17 - #include "io.h" 17 + #include "io_read.h" 18 + #include "io_write.h" 18 19 #include "journal_reclaim.h" 19 20 #include "keylist.h" 20 21 #include "move.h"
+1
fs/bcachefs/move.h
··· 2 2 #ifndef _BCACHEFS_MOVE_H 3 3 #define _BCACHEFS_MOVE_H 4 4 5 + #include "bcachefs_ioctl.h" 5 6 #include "btree_iter.h" 6 7 #include "buckets.h" 7 8 #include "data_update.h"
-8
fs/bcachefs/movinggc.c
··· 13 13 #include "btree_write_buffer.h" 14 14 #include "buckets.h" 15 15 #include "clock.h" 16 - #include "disk_groups.h" 17 16 #include "errcode.h" 18 17 #include "error.h" 19 - #include "extents.h" 20 - #include "eytzinger.h" 21 - #include "io.h" 22 - #include "keylist.h" 23 18 #include "lru.h" 24 19 #include "move.h" 25 20 #include "movinggc.h" 26 - #include "super-io.h" 27 21 #include "trace.h" 28 22 29 - #include <linux/bsearch.h> 30 23 #include <linux/freezer.h> 31 24 #include <linux/kthread.h> 32 25 #include <linux/math64.h> 33 26 #include <linux/sched/task.h> 34 - #include <linux/sort.h> 35 27 #include <linux/wait.h> 36 28 37 29 struct buckets_in_flight {
-2
fs/bcachefs/rebalance.c
··· 8 8 #include "compress.h" 9 9 #include "disk_groups.h" 10 10 #include "errcode.h" 11 - #include "extents.h" 12 - #include "io.h" 13 11 #include "move.h" 14 12 #include "rebalance.h" 15 13 #include "super-io.h"
+3 -1
fs/bcachefs/reflink.c
··· 5 5 #include "buckets.h" 6 6 #include "extents.h" 7 7 #include "inode.h" 8 - #include "io.h" 8 + #include "io_misc.h" 9 + #include "io_write.h" 9 10 #include "reflink.h" 10 11 #include "subvolume.h" 12 + #include "super-io.h" 11 13 12 14 #include <linux/sched/signal.h> 13 15
-1
fs/bcachefs/super-io.c
··· 6 6 #include "disk_groups.h" 7 7 #include "ec.h" 8 8 #include "error.h" 9 - #include "io.h" 10 9 #include "journal.h" 11 10 #include "journal_sb.h" 12 11 #include "journal_seq_blacklist.h"
+6 -3
fs/bcachefs/super.c
··· 35 35 #include "fs-io-direct.h" 36 36 #include "fsck.h" 37 37 #include "inode.h" 38 - #include "io.h" 38 + #include "io_read.h" 39 + #include "io_write.h" 39 40 #include "journal.h" 40 41 #include "journal_reclaim.h" 41 42 #include "journal_seq_blacklist.h" ··· 484 483 bch2_fs_fsio_exit(c); 485 484 bch2_fs_ec_exit(c); 486 485 bch2_fs_encryption_exit(c); 487 - bch2_fs_io_exit(c); 486 + bch2_fs_io_write_exit(c); 487 + bch2_fs_io_read_exit(c); 488 488 bch2_fs_buckets_waiting_for_journal_exit(c); 489 489 bch2_fs_btree_interior_update_exit(c); 490 490 bch2_fs_btree_iter_exit(c); ··· 850 848 bch2_fs_buckets_waiting_for_journal_init(c) ?: 851 849 bch2_fs_btree_write_buffer_init(c) ?: 852 850 bch2_fs_subvolumes_init(c) ?: 853 - bch2_fs_io_init(c) ?: 851 + bch2_fs_io_read_init(c) ?: 852 + bch2_fs_io_write_init(c) ?: 854 853 bch2_fs_nocow_locking_init(c) ?: 855 854 bch2_fs_encryption_init(c) ?: 856 855 bch2_fs_compress_init(c) ?: