Merge tag 'for-6.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

+1

fs/btrfs/accessors.h

··· 12 12 #include <linux/string.h> 13 13 #include <linux/mm.h> 14 14 #include <uapi/linux/btrfs_tree.h> 15 + #include "extent_io.h" 15 16 16 17 struct extent_buffer; 17 18

+2

fs/btrfs/acl.h

··· 3 3 #ifndef BTRFS_ACL_H 4 4 #define BTRFS_ACL_H 5 5 6 + #include <linux/types.h> 7 + 6 8 struct posix_acl; 7 9 struct inode; 8 10 struct btrfs_trans_handle;

+5 -6

fs/btrfs/async-thread.c

··· 168 168 { 169 169 int new_current_active; 170 170 long pending; 171 - int need_change = 0; 171 + bool need_change = false; 172 172 173 173 if (wq->thresh == NO_THRESHOLD) 174 174 return; ··· 196 196 new_current_active--; 197 197 new_current_active = clamp_val(new_current_active, 1, wq->limit_active); 198 198 if (new_current_active != wq->current_active) { 199 - need_change = 1; 199 + need_change = true; 200 200 wq->current_active = new_current_active; 201 201 } 202 202 out: 203 203 spin_unlock(&wq->thres_lock); 204 204 205 - if (need_change) { 205 + if (need_change) 206 206 workqueue_set_max_active(wq->normal_wq, wq->current_active); 207 - } 208 207 } 209 208 210 209 static void run_ordered_work(struct btrfs_workqueue *wq, ··· 295 296 struct btrfs_work *work = container_of(normal_work, struct btrfs_work, 296 297 normal_work); 297 298 struct btrfs_workqueue *wq = work->wq; 298 - int need_order = 0; 299 + bool need_order = false; 299 300 300 301 /* 301 302 * We should not touch things inside work in the following cases: ··· 306 307 * So we save the needed things here. 307 308 */ 308 309 if (work->ordered_func) 309 - need_order = 1; 310 + need_order = true; 310 311 311 312 trace_btrfs_work_sched(work); 312 313 thresh_exec_hook(wq);

+2 -2

fs/btrfs/backref.c

··· 1399 1399 ASSERT(ctx->roots == NULL); 1400 1400 1401 1401 key.objectid = ctx->bytenr; 1402 - key.offset = (u64)-1; 1403 1402 if (btrfs_fs_incompat(ctx->fs_info, SKINNY_METADATA)) 1404 1403 key.type = BTRFS_METADATA_ITEM_KEY; 1405 1404 else 1406 1405 key.type = BTRFS_EXTENT_ITEM_KEY; 1406 + key.offset = (u64)-1; 1407 1407 1408 1408 path = btrfs_alloc_path(); 1409 1409 if (!path) ··· 2206 2206 struct btrfs_extent_item *ei; 2207 2207 struct btrfs_key key; 2208 2208 2209 + key.objectid = logical; 2209 2210 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 2210 2211 key.type = BTRFS_METADATA_ITEM_KEY; 2211 2212 else 2212 2213 key.type = BTRFS_EXTENT_ITEM_KEY; 2213 - key.objectid = logical; 2214 2214 key.offset = (u64)-1; 2215 2215 2216 2216 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);

+15 -23

fs/btrfs/bio.c

··· 97 97 return bbio; 98 98 } 99 99 100 - /* Free a bio that was never submitted to the underlying device. */ 101 - static void btrfs_cleanup_bio(struct btrfs_bio *bbio) 102 - { 103 - if (bbio_has_ordered_extent(bbio)) 104 - btrfs_put_ordered_extent(bbio->ordered); 105 - bio_put(&bbio->bio); 106 - } 107 - 108 - static void __btrfs_bio_end_io(struct btrfs_bio *bbio) 109 - { 110 - if (bbio_has_ordered_extent(bbio)) { 111 - struct btrfs_ordered_extent *ordered = bbio->ordered; 112 - 113 - bbio->end_io(bbio); 114 - btrfs_put_ordered_extent(ordered); 115 - } else { 116 - bbio->end_io(bbio); 117 - } 118 - } 119 - 120 100 void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) 121 101 { 122 102 bbio->bio.bi_status = status; 123 103 if (bbio->bio.bi_pool == &btrfs_clone_bioset) { 124 104 struct btrfs_bio *orig_bbio = bbio->private; 125 105 126 - btrfs_cleanup_bio(bbio); 106 + /* Free bio that was never submitted to the underlying device. */ 107 + if (bbio_has_ordered_extent(bbio)) 108 + btrfs_put_ordered_extent(bbio->ordered); 109 + bio_put(&bbio->bio); 110 + 127 111 bbio = orig_bbio; 128 112 } 129 113 ··· 122 138 /* Load split bio's error which might be set above. */ 123 139 if (status == BLK_STS_OK) 124 140 bbio->bio.bi_status = READ_ONCE(bbio->status); 125 - __btrfs_bio_end_io(bbio); 141 + 142 + if (bbio_has_ordered_extent(bbio)) { 143 + struct btrfs_ordered_extent *ordered = bbio->ordered; 144 + 145 + bbio->end_io(bbio); 146 + btrfs_put_ordered_extent(ordered); 147 + } else { 148 + bbio->end_io(bbio); 149 + } 126 150 } 127 151 } 128 152 ··· 573 581 574 582 /* If an error occurred we just want to clean up the bio and move on. */ 575 583 if (bio->bi_status) { 576 - btrfs_bio_end_io(async->bbio, async->bbio->bio.bi_status); 584 + btrfs_bio_end_io(async->bbio, bio->bi_status); 577 585 return; 578 586 } 579 587

+95 -60

fs/btrfs/block-group.c

··· 191 191 /* 192 192 * This adds the block group to the fs_info rb tree for the block group cache 193 193 */ 194 - static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 195 - struct btrfs_block_group *block_group) 194 + static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group) 196 195 { 196 + struct btrfs_fs_info *fs_info = block_group->fs_info; 197 197 struct rb_node *exist; 198 198 int ret = 0; 199 199 200 200 ASSERT(block_group->length != 0); 201 201 202 - write_lock(&info->block_group_cache_lock); 202 + write_lock(&fs_info->block_group_cache_lock); 203 203 204 204 exist = rb_find_add_cached(&block_group->cache_node, 205 - &info->block_group_cache_tree, btrfs_bg_start_cmp); 205 + &fs_info->block_group_cache_tree, btrfs_bg_start_cmp); 206 206 if (exist) 207 207 ret = -EEXIST; 208 - write_unlock(&info->block_group_cache_lock); 208 + write_unlock(&fs_info->block_group_cache_lock); 209 209 210 210 return ret; 211 211 } ··· 584 584 struct btrfs_root *extent_root; 585 585 u64 search_offset; 586 586 u64 search_end = block_group->start + block_group->length; 587 - struct btrfs_path *path; 587 + BTRFS_PATH_AUTO_FREE(path); 588 588 struct btrfs_key search_key; 589 589 int ret = 0; 590 590 ··· 626 626 627 627 lockdep_assert_held(&caching_ctl->mutex); 628 628 lockdep_assert_held_read(&fs_info->commit_root_sem); 629 - btrfs_free_path(path); 630 629 return ret; 631 630 } 632 631 ··· 737 738 path->reada = READA_FORWARD; 738 739 739 740 key.objectid = last; 740 - key.offset = 0; 741 741 key.type = BTRFS_EXTENT_ITEM_KEY; 742 + key.offset = 0; 742 743 743 744 next: 744 745 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); ··· 784 785 785 786 if (key.objectid < last) { 786 787 key.objectid = last; 787 - key.offset = 0; 788 788 key.type = BTRFS_EXTENT_ITEM_KEY; 789 + key.offset = 0; 789 790 btrfs_release_path(path); 790 791 goto next; 791 792 } ··· 1456 1457 } 1457 1458 1458 1459 /* 1460 + * Link the block_group to a list via bg_list. 1461 + * 1462 + * @bg: The block_group to link to the list. 1463 + * @list: The list to link it to. 1464 + * 1465 + * Use this rather than list_add_tail() directly to ensure proper respect 1466 + * to locking and refcounting. 1467 + * 1468 + * Returns: true if the bg was linked with a refcount bump and false otherwise. 1469 + */ 1470 + static bool btrfs_link_bg_list(struct btrfs_block_group *bg, struct list_head *list) 1471 + { 1472 + struct btrfs_fs_info *fs_info = bg->fs_info; 1473 + bool added = false; 1474 + 1475 + spin_lock(&fs_info->unused_bgs_lock); 1476 + if (list_empty(&bg->bg_list)) { 1477 + btrfs_get_block_group(bg); 1478 + list_add_tail(&bg->bg_list, list); 1479 + added = true; 1480 + } 1481 + spin_unlock(&fs_info->unused_bgs_lock); 1482 + return added; 1483 + } 1484 + 1485 + /* 1459 1486 * Process the unused_bgs list and remove any that don't have any allocated 1460 1487 * space inside of them. 1461 1488 */ ··· 1596 1571 * drop under the "next" label for the 1597 1572 * fs_info->unused_bgs list. 1598 1573 */ 1599 - btrfs_get_block_group(block_group); 1600 - list_add_tail(&block_group->bg_list, &retry_list); 1574 + btrfs_link_bg_list(block_group, &retry_list); 1601 1575 1602 1576 trace_btrfs_skip_unused_block_group(block_group); 1603 1577 spin_unlock(&block_group->lock); ··· 1847 1823 list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); 1848 1824 while (!list_empty(&fs_info->reclaim_bgs)) { 1849 1825 u64 zone_unusable; 1850 - u64 reclaimed; 1826 + u64 used; 1827 + u64 reserved; 1851 1828 int ret = 0; 1852 1829 1853 1830 bg = list_first_entry(&fs_info->reclaim_bgs, ··· 1912 1887 up_write(&space_info->groups_sem); 1913 1888 goto next; 1914 1889 } 1890 + 1891 + /* 1892 + * Cache the zone_unusable value before turning the block group 1893 + * to read only. As soon as the block group is read only it's 1894 + * zone_unusable value gets moved to the block group's read-only 1895 + * bytes and isn't available for calculations anymore. We also 1896 + * cache it before unlocking the block group, to prevent races 1897 + * (reports from KCSAN and such tools) with tasks updating it. 1898 + */ 1899 + zone_unusable = bg->zone_unusable; 1900 + 1915 1901 spin_unlock(&bg->lock); 1916 1902 spin_unlock(&space_info->lock); 1917 1903 ··· 1939 1903 goto next; 1940 1904 } 1941 1905 1942 - /* 1943 - * Cache the zone_unusable value before turning the block group 1944 - * to read only. As soon as the blog group is read only it's 1945 - * zone_unusable value gets moved to the block group's read-only 1946 - * bytes and isn't available for calculations anymore. 1947 - */ 1948 - zone_unusable = bg->zone_unusable; 1949 1906 ret = inc_block_group_ro(bg, 0); 1950 1907 up_write(&space_info->groups_sem); 1951 1908 if (ret < 0) 1952 1909 goto next; 1953 1910 1911 + /* 1912 + * The amount of bytes reclaimed corresponds to the sum of the 1913 + * "used" and "reserved" counters. We have set the block group 1914 + * to RO above, which prevents reservations from happening but 1915 + * we may have existing reservations for which allocation has 1916 + * not yet been done - btrfs_update_block_group() was not yet 1917 + * called, which is where we will transfer a reserved extent's 1918 + * size from the "reserved" counter to the "used" counter - this 1919 + * happens when running delayed references. When we relocate the 1920 + * chunk below, relocation first flushes dellaloc, waits for 1921 + * ordered extent completion (which is where we create delayed 1922 + * references for data extents) and commits the current 1923 + * transaction (which runs delayed references), and only after 1924 + * it does the actual work to move extents out of the block 1925 + * group. So the reported amount of reclaimed bytes is 1926 + * effectively the sum of the 'used' and 'reserved' counters. 1927 + */ 1928 + spin_lock(&bg->lock); 1929 + used = bg->used; 1930 + reserved = bg->reserved; 1931 + spin_unlock(&bg->lock); 1932 + 1954 1933 btrfs_info(fs_info, 1955 - "reclaiming chunk %llu with %llu%% used %llu%% unusable", 1934 + "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable", 1956 1935 bg->start, 1957 - div64_u64(bg->used * 100, bg->length), 1936 + div64_u64(used * 100, bg->length), 1937 + div64_u64(reserved * 100, bg->length), 1958 1938 div64_u64(zone_unusable * 100, bg->length)); 1959 1939 trace_btrfs_reclaim_block_group(bg); 1960 - reclaimed = bg->used; 1961 1940 ret = btrfs_relocate_chunk(fs_info, bg->start); 1962 1941 if (ret) { 1963 1942 btrfs_dec_block_group_ro(bg); 1964 1943 btrfs_err(fs_info, "error relocating chunk %llu", 1965 1944 bg->start); 1966 - reclaimed = 0; 1945 + used = 0; 1946 + reserved = 0; 1967 1947 spin_lock(&space_info->lock); 1968 1948 space_info->reclaim_errors++; 1969 1949 if (READ_ONCE(space_info->periodic_reclaim)) ··· 1988 1936 } 1989 1937 spin_lock(&space_info->lock); 1990 1938 space_info->reclaim_count++; 1991 - space_info->reclaim_bytes += reclaimed; 1939 + space_info->reclaim_bytes += used; 1940 + space_info->reclaim_bytes += reserved; 1992 1941 spin_unlock(&space_info->lock); 1993 1942 1994 1943 next: 1995 - if (ret && !READ_ONCE(space_info->periodic_reclaim)) { 1996 - /* Refcount held by the reclaim_bgs list after splice. */ 1997 - spin_lock(&fs_info->unused_bgs_lock); 1998 - /* 1999 - * This block group might be added to the unused list 2000 - * during the above process. Move it back to the 2001 - * reclaim list otherwise. 2002 - */ 2003 - if (list_empty(&bg->bg_list)) { 2004 - btrfs_get_block_group(bg); 2005 - list_add_tail(&bg->bg_list, &retry_list); 2006 - } 2007 - spin_unlock(&fs_info->unused_bgs_lock); 2008 - } 1944 + if (ret && !READ_ONCE(space_info->periodic_reclaim)) 1945 + btrfs_link_bg_list(bg, &retry_list); 2009 1946 btrfs_put_block_group(bg); 2010 1947 2011 1948 mutex_unlock(&fs_info->reclaim_bgs_lock); ··· 2034 1993 { 2035 1994 struct btrfs_fs_info *fs_info = bg->fs_info; 2036 1995 2037 - spin_lock(&fs_info->unused_bgs_lock); 2038 - if (list_empty(&bg->bg_list)) { 2039 - btrfs_get_block_group(bg); 1996 + if (btrfs_link_bg_list(bg, &fs_info->reclaim_bgs)) 2040 1997 trace_btrfs_add_reclaim_block_group(bg); 2041 - list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs); 2042 - } 2043 - spin_unlock(&fs_info->unused_bgs_lock); 2044 1998 } 2045 1999 2046 2000 static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key, ··· 2446 2410 goto error; 2447 2411 } 2448 2412 2449 - ret = btrfs_add_block_group_cache(info, cache); 2413 + ret = btrfs_add_block_group_cache(cache); 2450 2414 if (ret) { 2451 2415 btrfs_remove_free_space_cache(cache); 2452 2416 goto error; ··· 2495 2459 bg->cached = BTRFS_CACHE_FINISHED; 2496 2460 bg->used = map->chunk_len; 2497 2461 bg->flags = map->type; 2498 - ret = btrfs_add_block_group_cache(fs_info, bg); 2462 + ret = btrfs_add_block_group_cache(bg); 2499 2463 /* 2500 2464 * We may have some valid block group cache added already, in 2501 2465 * that case we skip to the next one. ··· 2545 2509 return fill_dummy_bgs(info); 2546 2510 2547 2511 key.objectid = 0; 2548 - key.offset = 0; 2549 2512 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 2513 + key.offset = 0; 2550 2514 path = btrfs_alloc_path(); 2551 2515 if (!path) 2552 2516 return -ENOMEM; ··· 2677 2641 { 2678 2642 struct btrfs_fs_info *fs_info = device->fs_info; 2679 2643 struct btrfs_root *root = fs_info->dev_root; 2680 - struct btrfs_path *path; 2644 + BTRFS_PATH_AUTO_FREE(path); 2681 2645 struct btrfs_dev_extent *extent; 2682 2646 struct extent_buffer *leaf; 2683 2647 struct btrfs_key key; ··· 2694 2658 key.offset = start; 2695 2659 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); 2696 2660 if (ret) 2697 - goto out; 2661 + return ret; 2698 2662 2699 2663 leaf = path->nodes[0]; 2700 2664 extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); ··· 2702 2666 btrfs_set_dev_extent_chunk_objectid(leaf, extent, 2703 2667 BTRFS_FIRST_CHUNK_TREE_OBJECTID); 2704 2668 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); 2705 - 2706 2669 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 2707 - out: 2708 - btrfs_free_path(path); 2670 + 2709 2671 return ret; 2710 2672 } 2711 2673 ··· 2805 2771 /* Already aborted the transaction if it failed. */ 2806 2772 next: 2807 2773 btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info); 2774 + 2775 + spin_lock(&fs_info->unused_bgs_lock); 2808 2776 list_del_init(&block_group->bg_list); 2809 2777 clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); 2778 + btrfs_put_block_group(block_group); 2779 + spin_unlock(&fs_info->unused_bgs_lock); 2810 2780 2811 2781 /* 2812 2782 * If the block group is still unused, add it to the list of ··· 2926 2888 cache->space_info = btrfs_find_space_info(fs_info, cache->flags); 2927 2889 ASSERT(cache->space_info); 2928 2890 2929 - ret = btrfs_add_block_group_cache(fs_info, cache); 2891 + ret = btrfs_add_block_group_cache(cache); 2930 2892 if (ret) { 2931 2893 btrfs_remove_free_space_cache(cache); 2932 2894 btrfs_put_block_group(cache); ··· 2948 2910 } 2949 2911 #endif 2950 2912 2951 - list_add_tail(&cache->bg_list, &trans->new_bgs); 2913 + btrfs_link_bg_list(cache, &trans->new_bgs); 2952 2914 btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info); 2953 2915 2954 2916 set_avail_alloc_bits(fs_info, type); ··· 3344 3306 struct btrfs_fs_info *fs_info = trans->fs_info; 3345 3307 struct btrfs_block_group *cache, *tmp; 3346 3308 struct btrfs_transaction *cur_trans = trans->transaction; 3347 - struct btrfs_path *path; 3309 + BTRFS_PATH_AUTO_FREE(path); 3348 3310 3349 3311 if (list_empty(&cur_trans->dirty_bgs) || 3350 3312 !btrfs_test_opt(fs_info, SPACE_CACHE)) ··· 3361 3323 cache_save_setup(cache, trans, path); 3362 3324 } 3363 3325 3364 - btrfs_free_path(path); 3365 3326 return 0; 3366 3327 } 3367 3328 ··· 3383 3346 struct btrfs_transaction *cur_trans = trans->transaction; 3384 3347 int ret = 0; 3385 3348 int should_put; 3386 - struct btrfs_path *path = NULL; 3349 + BTRFS_PATH_AUTO_FREE(path); 3387 3350 LIST_HEAD(dirty); 3388 3351 struct list_head *io = &cur_trans->io_bgs; 3389 3352 int loops = 0; ··· 3538 3501 btrfs_cleanup_dirty_bgs(cur_trans, fs_info); 3539 3502 } 3540 3503 3541 - btrfs_free_path(path); 3542 3504 return ret; 3543 3505 } 3544 3506 ··· 3548 3512 struct btrfs_transaction *cur_trans = trans->transaction; 3549 3513 int ret = 0; 3550 3514 int should_put; 3551 - struct btrfs_path *path; 3515 + BTRFS_PATH_AUTO_FREE(path); 3552 3516 struct list_head *io = &cur_trans->io_bgs; 3553 3517 3554 3518 path = btrfs_alloc_path(); ··· 3660 3624 btrfs_put_block_group(cache); 3661 3625 } 3662 3626 3663 - btrfs_free_path(path); 3664 3627 return ret; 3665 3628 } 3666 3629

+13 -4

fs/btrfs/btrfs_inode.h

··· 145 145 * different from prop_compress and takes precedence if set. 146 146 */ 147 147 u8 defrag_compress; 148 + s8 defrag_compress_level; 148 149 149 150 /* 150 151 * Lock for counters and all fields used to determine if the inode is in ··· 517 516 lockdep_assert_held(&inode->vfs_inode.i_rwsem); 518 517 } 519 518 519 + static inline void btrfs_update_inode_mapping_flags(struct btrfs_inode *inode) 520 + { 521 + if (inode->flags & BTRFS_INODE_NODATASUM) 522 + mapping_clear_stable_writes(inode->vfs_inode.i_mapping); 523 + else 524 + mapping_set_stable_writes(inode->vfs_inode.i_mapping); 525 + } 526 + 520 527 /* Array of bytes with variable length, hexadecimal format 0x1234 */ 521 528 #define CSUM_FMT "0x%*phN" 522 529 #define CSUM_FMT_VALUE(size, bytes) size, bytes ··· 533 524 u32 pgoff, u8 *csum, const u8 * const csum_expected); 534 525 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, 535 526 u32 bio_offset, struct bio_vec *bv); 536 - noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, 527 + noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len, 537 528 struct btrfs_file_extent *file_extent, 538 529 bool nowait); 539 530 ··· 593 584 int btrfs_drop_inode(struct inode *inode); 594 585 int __init btrfs_init_cachep(void); 595 586 void __cold btrfs_destroy_cachep(void); 596 - struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root, 597 - struct btrfs_path *path); 598 - struct inode *btrfs_iget(u64 ino, struct btrfs_root *root); 587 + struct btrfs_inode *btrfs_iget_path(u64 ino, struct btrfs_root *root, 588 + struct btrfs_path *path); 589 + struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root); 599 590 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, 600 591 struct folio *folio, u64 start, u64 len); 601 592 int btrfs_update_inode(struct btrfs_trans_handle *trans,

+20 -11

fs/btrfs/compression.c

··· 740 740 &btrfs_zstd_compress, 741 741 }; 742 742 743 - static struct list_head *alloc_workspace(int type, unsigned int level) 743 + static struct list_head *alloc_workspace(int type, int level) 744 744 { 745 745 switch (type) { 746 746 case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(); ··· 818 818 * Preallocation makes a forward progress guarantees and we do not return 819 819 * errors. 820 820 */ 821 - struct list_head *btrfs_get_workspace(int type, unsigned int level) 821 + struct list_head *btrfs_get_workspace(int type, int level) 822 822 { 823 823 struct workspace_manager *wsm; 824 824 struct list_head *workspace; ··· 968 968 * Adjust @level according to the limits of the compression algorithm or 969 969 * fallback to default 970 970 */ 971 - static unsigned int btrfs_compress_set_level(int type, unsigned level) 971 + static int btrfs_compress_set_level(unsigned int type, int level) 972 972 { 973 973 const struct btrfs_compress_op *ops = btrfs_compress_op[type]; 974 974 975 975 if (level == 0) 976 976 level = ops->default_level; 977 977 else 978 - level = min(level, ops->max_level); 978 + level = min(max(level, ops->min_level), ops->max_level); 979 979 980 980 return level; 981 + } 982 + 983 + /* 984 + * Check whether the @level is within the valid range for the given type. 985 + */ 986 + bool btrfs_compress_level_valid(unsigned int type, int level) 987 + { 988 + const struct btrfs_compress_op *ops = btrfs_compress_op[type]; 989 + 990 + return ops->min_level <= level && level <= ops->max_level; 981 991 } 982 992 983 993 /* Wrapper around find_get_page(), with extra error message. */ ··· 1033 1023 * @total_out is an in/out parameter, must be set to the input length and will 1034 1024 * be also used to return the total number of compressed bytes 1035 1025 */ 1036 - int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping, 1026 + int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping, 1037 1027 u64 start, struct folio **folios, unsigned long *out_folios, 1038 1028 unsigned long *total_in, unsigned long *total_out) 1039 1029 { 1040 - int type = btrfs_compress_type(type_level); 1041 - int level = btrfs_compress_level(type_level); 1042 1030 const unsigned long orig_len = *total_out; 1043 1031 struct list_head *workspace; 1044 1032 int ret; ··· 1598 1590 1599 1591 /* 1600 1592 * Convert the compression suffix (eg. after "zlib" starting with ":") to 1601 - * level, unrecognized string will set the default level 1593 + * level, unrecognized string will set the default level. Negative level 1594 + * numbers are allowed. 1602 1595 */ 1603 - unsigned int btrfs_compress_str2level(unsigned int type, const char *str) 1596 + int btrfs_compress_str2level(unsigned int type, const char *str) 1604 1597 { 1605 - unsigned int level = 0; 1598 + int level = 0; 1606 1599 int ret; 1607 1600 1608 1601 if (!type) 1609 1602 return 0; 1610 1603 1611 1604 if (str[0] == ':') { 1612 - ret = kstrtouint(str + 1, 10, &level); 1605 + ret = kstrtoint(str + 1, 10, &level); 1613 1606 if (ret) 1614 1607 level = 0; 1615 1608 }

+9 -17

fs/btrfs/compression.h

··· 72 72 struct btrfs_bio bbio; 73 73 }; 74 74 75 - static inline unsigned int btrfs_compress_type(unsigned int type_level) 76 - { 77 - return (type_level & 0xF); 78 - } 79 - 80 - static inline unsigned int btrfs_compress_level(unsigned int type_level) 81 - { 82 - return ((type_level & 0xF0) >> 4); 83 - } 84 - 85 75 /* @range_end must be exclusive. */ 86 76 static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur) 87 77 { ··· 83 93 int __init btrfs_init_compress(void); 84 94 void __cold btrfs_exit_compress(void); 85 95 86 - int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping, 96 + bool btrfs_compress_level_valid(unsigned int type, int level); 97 + int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping, 87 98 u64 start, struct folio **folios, unsigned long *out_folios, 88 99 unsigned long *total_in, unsigned long *total_out); 89 100 int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, ··· 98 107 bool writeback); 99 108 void btrfs_submit_compressed_read(struct btrfs_bio *bbio); 100 109 101 - unsigned int btrfs_compress_str2level(unsigned int type, const char *str); 110 + int btrfs_compress_str2level(unsigned int type, const char *str); 102 111 103 112 struct folio *btrfs_alloc_compr_folio(void); 104 113 void btrfs_free_compr_folio(struct folio *folio); ··· 122 131 wait_queue_head_t ws_wait; 123 132 }; 124 133 125 - struct list_head *btrfs_get_workspace(int type, unsigned int level); 134 + struct list_head *btrfs_get_workspace(int type, int level); 126 135 void btrfs_put_workspace(int type, struct list_head *ws); 127 136 128 137 struct btrfs_compress_op { 129 138 struct workspace_manager *workspace_manager; 130 139 /* Maximum level supported by the compression algorithm */ 131 - unsigned int max_level; 132 - unsigned int default_level; 140 + int min_level; 141 + int max_level; 142 + int default_level; 133 143 }; 134 144 135 145 /* The heuristic workspaces are managed via the 0th workspace manager */ ··· 179 187 size_t destlen); 180 188 void zstd_init_workspace_manager(void); 181 189 void zstd_cleanup_workspace_manager(void); 182 - struct list_head *zstd_alloc_workspace(unsigned int level); 190 + struct list_head *zstd_alloc_workspace(int level); 183 191 void zstd_free_workspace(struct list_head *ws); 184 - struct list_head *zstd_get_workspace(unsigned int level); 192 + struct list_head *zstd_get_workspace(int level); 185 193 void zstd_put_workspace(struct list_head *ws); 186 194 187 195 #endif

+7 -11

fs/btrfs/ctree.c

··· 4306 4306 u32 data_size) 4307 4307 { 4308 4308 int ret = 0; 4309 - struct btrfs_path *path; 4309 + BTRFS_PATH_AUTO_FREE(path); 4310 4310 struct extent_buffer *leaf; 4311 4311 unsigned long ptr; 4312 4312 ··· 4320 4320 write_extent_buffer(leaf, data, ptr, data_size); 4321 4321 btrfs_mark_buffer_dirty(trans, leaf); 4322 4322 } 4323 - btrfs_free_path(path); 4324 4323 return ret; 4325 4324 } 4326 4325 ··· 4607 4608 u64 min_trans) 4608 4609 { 4609 4610 struct extent_buffer *cur; 4610 - struct btrfs_key found_key; 4611 4611 int slot; 4612 4612 int sret; 4613 4613 u32 nritems; ··· 4642 4644 goto find_next_key; 4643 4645 ret = 0; 4644 4646 path->slots[level] = slot; 4645 - btrfs_item_key_to_cpu(cur, &found_key, slot); 4647 + /* Save our key for returning back. */ 4648 + btrfs_item_key_to_cpu(cur, min_key, slot); 4646 4649 goto out; 4647 4650 } 4648 4651 if (sret && slot > 0) ··· 4667 4668 * we didn't find a candidate key in this node, walk forward 4668 4669 * and find another one 4669 4670 */ 4671 + path->slots[level] = slot; 4670 4672 if (slot >= nritems) { 4671 - path->slots[level] = slot; 4672 4673 sret = btrfs_find_next_key(root, path, min_key, level, 4673 4674 min_trans); 4674 4675 if (sret == 0) { ··· 4678 4679 goto out; 4679 4680 } 4680 4681 } 4681 - /* save our key for returning back */ 4682 - btrfs_node_key_to_cpu(cur, &found_key, slot); 4683 - path->slots[level] = slot; 4684 4682 if (level == path->lowest_level) { 4685 4683 ret = 0; 4684 + /* Save our key for returning back. */ 4685 + btrfs_node_key_to_cpu(cur, min_key, slot); 4686 4686 goto out; 4687 4687 } 4688 4688 cur = btrfs_read_node_slot(cur, slot); ··· 4698 4700 } 4699 4701 out: 4700 4702 path->keep_locks = keep_locks; 4701 - if (ret == 0) { 4703 + if (ret == 0) 4702 4704 btrfs_unlock_up_safe(path, path->lowest_level + 1); 4703 - memcpy(min_key, &found_key, sizeof(found_key)); 4704 - } 4705 4705 return ret; 4706 4706 } 4707 4707

+1 -1

fs/btrfs/ctree.h

··· 6 6 #ifndef BTRFS_CTREE_H 7 7 #define BTRFS_CTREE_H 8 8 9 - #include "linux/cleanup.h" 9 + #include <linux/cleanup.h> 10 10 #include <linux/spinlock.h> 11 11 #include <linux/rbtree.h> 12 12 #include <linux/mutex.h>

+46 -32

fs/btrfs/defrag.c

··· 225 225 struct file_ra_state *ra) 226 226 { 227 227 struct btrfs_root *inode_root; 228 - struct inode *inode; 228 + struct btrfs_inode *inode; 229 229 struct btrfs_ioctl_defrag_range_args range; 230 230 int ret = 0; 231 231 u64 cur = 0; ··· 250 250 goto cleanup; 251 251 } 252 252 253 - if (cur >= i_size_read(inode)) { 254 - iput(inode); 253 + if (cur >= i_size_read(&inode->vfs_inode)) { 254 + iput(&inode->vfs_inode); 255 255 goto cleanup; 256 256 } 257 257 258 258 /* Do a chunk of defrag */ 259 - clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 259 + clear_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags); 260 260 memset(&range, 0, sizeof(range)); 261 261 range.len = (u64)-1; 262 262 range.start = cur; 263 263 range.extent_thresh = defrag->extent_thresh; 264 - file_ra_state_init(ra, inode->i_mapping); 264 + file_ra_state_init(ra, inode->vfs_inode.i_mapping); 265 265 266 266 sb_start_write(fs_info->sb); 267 267 ret = btrfs_defrag_file(inode, ra, &range, defrag->transid, 268 - BTRFS_DEFRAG_BATCH); 268 + BTRFS_DEFRAG_BATCH); 269 269 sb_end_write(fs_info->sb); 270 - iput(inode); 270 + iput(&inode->vfs_inode); 271 271 272 272 if (ret < 0) 273 273 goto cleanup; ··· 1352 1352 * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without 1353 1353 * defragging all the range). 1354 1354 */ 1355 - int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, 1355 + int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra, 1356 1356 struct btrfs_ioctl_defrag_range_args *range, 1357 1357 u64 newer_than, unsigned long max_to_defrag) 1358 1358 { 1359 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 1359 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1360 1360 unsigned long sectors_defragged = 0; 1361 - u64 isize = i_size_read(inode); 1361 + u64 isize = i_size_read(&inode->vfs_inode); 1362 1362 u64 cur; 1363 1363 u64 last_byte; 1364 1364 bool do_compress = (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS); 1365 1365 int compress_type = BTRFS_COMPRESS_ZLIB; 1366 + int compress_level = 0; 1366 1367 int ret = 0; 1367 1368 u32 extent_thresh = range->extent_thresh; 1368 1369 pgoff_t start_index; ··· 1377 1376 return -EINVAL; 1378 1377 1379 1378 if (do_compress) { 1380 - if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES) 1381 - return -EINVAL; 1382 - if (range->compress_type) 1383 - compress_type = range->compress_type; 1379 + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL) { 1380 + if (range->compress.type >= BTRFS_NR_COMPRESS_TYPES) 1381 + return -EINVAL; 1382 + if (range->compress.type) { 1383 + compress_type = range->compress.type; 1384 + compress_level = range->compress.level; 1385 + if (!btrfs_compress_level_valid(compress_type, compress_level)) 1386 + return -EINVAL; 1387 + } 1388 + } else { 1389 + if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES) 1390 + return -EINVAL; 1391 + if (range->compress_type) 1392 + compress_type = range->compress_type; 1393 + } 1384 1394 } 1385 1395 1386 1396 if (extent_thresh == 0) ··· 1414 1402 * defrag range can be written sequentially. 1415 1403 */ 1416 1404 start_index = cur >> PAGE_SHIFT; 1417 - if (start_index < inode->i_mapping->writeback_index) 1418 - inode->i_mapping->writeback_index = start_index; 1405 + if (start_index < inode->vfs_inode.i_mapping->writeback_index) 1406 + inode->vfs_inode.i_mapping->writeback_index = start_index; 1419 1407 1420 1408 while (cur < last_byte) { 1421 1409 const unsigned long prev_sectors_defragged = sectors_defragged; ··· 1432 1420 (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1; 1433 1421 cluster_end = min(cluster_end, last_byte); 1434 1422 1435 - btrfs_inode_lock(BTRFS_I(inode), 0); 1436 - if (IS_SWAPFILE(inode)) { 1423 + btrfs_inode_lock(inode, 0); 1424 + if (IS_SWAPFILE(&inode->vfs_inode)) { 1437 1425 ret = -ETXTBSY; 1438 - btrfs_inode_unlock(BTRFS_I(inode), 0); 1426 + btrfs_inode_unlock(inode, 0); 1439 1427 break; 1440 1428 } 1441 - if (!(inode->i_sb->s_flags & SB_ACTIVE)) { 1442 - btrfs_inode_unlock(BTRFS_I(inode), 0); 1429 + if (!(inode->vfs_inode.i_sb->s_flags & SB_ACTIVE)) { 1430 + btrfs_inode_unlock(inode, 0); 1443 1431 break; 1444 1432 } 1445 - if (do_compress) 1446 - BTRFS_I(inode)->defrag_compress = compress_type; 1447 - ret = defrag_one_cluster(BTRFS_I(inode), ra, cur, 1433 + if (do_compress) { 1434 + inode->defrag_compress = compress_type; 1435 + inode->defrag_compress_level = compress_level; 1436 + } 1437 + ret = defrag_one_cluster(inode, ra, cur, 1448 1438 cluster_end + 1 - cur, extent_thresh, 1449 1439 newer_than, do_compress, &sectors_defragged, 1450 1440 max_to_defrag, &last_scanned); 1451 1441 1452 1442 if (sectors_defragged > prev_sectors_defragged) 1453 - balance_dirty_pages_ratelimited(inode->i_mapping); 1443 + balance_dirty_pages_ratelimited(inode->vfs_inode.i_mapping); 1454 1444 1455 - btrfs_inode_unlock(BTRFS_I(inode), 0); 1445 + btrfs_inode_unlock(inode, 0); 1456 1446 if (ret < 0) 1457 1447 break; 1458 1448 cur = max(cluster_end + 1, last_scanned); ··· 1476 1462 * need to be written back immediately. 1477 1463 */ 1478 1464 if (range->flags & BTRFS_DEFRAG_RANGE_START_IO) { 1479 - filemap_flush(inode->i_mapping); 1465 + filemap_flush(inode->vfs_inode.i_mapping); 1480 1466 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 1481 - &BTRFS_I(inode)->runtime_flags)) 1482 - filemap_flush(inode->i_mapping); 1467 + &inode->runtime_flags)) 1468 + filemap_flush(inode->vfs_inode.i_mapping); 1483 1469 } 1484 1470 if (range->compress_type == BTRFS_COMPRESS_LZO) 1485 1471 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); ··· 1488 1474 ret = sectors_defragged; 1489 1475 } 1490 1476 if (do_compress) { 1491 - btrfs_inode_lock(BTRFS_I(inode), 0); 1492 - BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; 1493 - btrfs_inode_unlock(BTRFS_I(inode), 0); 1477 + btrfs_inode_lock(inode, 0); 1478 + inode->defrag_compress = BTRFS_COMPRESS_NONE; 1479 + btrfs_inode_unlock(inode, 0); 1494 1480 } 1495 1481 return ret; 1496 1482 }

+2 -2

fs/btrfs/defrag.h

··· 6 6 #include <linux/types.h> 7 7 #include <linux/compiler_types.h> 8 8 9 - struct inode; 10 9 struct file_ra_state; 10 + struct btrfs_inode; 11 11 struct btrfs_fs_info; 12 12 struct btrfs_root; 13 13 struct btrfs_trans_handle; 14 14 struct btrfs_ioctl_defrag_range_args; 15 15 16 - int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, 16 + int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra, 17 17 struct btrfs_ioctl_defrag_range_args *range, 18 18 u64 newer_than, unsigned long max_to_defrag); 19 19 int __init btrfs_auto_defrag_init(void);

+48 -51

fs/btrfs/delayed-inode.c

··· 1211 1211 struct btrfs_inode *inode) 1212 1212 { 1213 1213 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1214 - struct btrfs_path *path; 1214 + BTRFS_PATH_AUTO_FREE(path); 1215 1215 struct btrfs_block_rsv *block_rsv; 1216 1216 int ret; 1217 1217 ··· 1238 1238 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1239 1239 1240 1240 btrfs_release_delayed_node(delayed_node); 1241 - btrfs_free_path(path); 1242 1241 trans->block_rsv = block_rsv; 1243 1242 1244 1243 return ret; ··· 1816 1817 1817 1818 static void fill_stack_inode_item(struct btrfs_trans_handle *trans, 1818 1819 struct btrfs_inode_item *inode_item, 1819 - struct inode *inode) 1820 + struct btrfs_inode *inode) 1820 1821 { 1822 + struct inode *vfs_inode = &inode->vfs_inode; 1821 1823 u64 flags; 1822 1824 1823 - btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); 1824 - btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); 1825 - btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); 1826 - btrfs_set_stack_inode_mode(inode_item, inode->i_mode); 1827 - btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); 1828 - btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); 1829 - btrfs_set_stack_inode_generation(inode_item, 1830 - BTRFS_I(inode)->generation); 1825 + btrfs_set_stack_inode_uid(inode_item, i_uid_read(vfs_inode)); 1826 + btrfs_set_stack_inode_gid(inode_item, i_gid_read(vfs_inode)); 1827 + btrfs_set_stack_inode_size(inode_item, inode->disk_i_size); 1828 + btrfs_set_stack_inode_mode(inode_item, vfs_inode->i_mode); 1829 + btrfs_set_stack_inode_nlink(inode_item, vfs_inode->i_nlink); 1830 + btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(vfs_inode)); 1831 + btrfs_set_stack_inode_generation(inode_item, inode->generation); 1831 1832 btrfs_set_stack_inode_sequence(inode_item, 1832 - inode_peek_iversion(inode)); 1833 + inode_peek_iversion(vfs_inode)); 1833 1834 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1834 - btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1835 - flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags, 1836 - BTRFS_I(inode)->ro_flags); 1835 + btrfs_set_stack_inode_rdev(inode_item, vfs_inode->i_rdev); 1836 + flags = btrfs_inode_combine_flags(inode->flags, inode->ro_flags); 1837 1837 btrfs_set_stack_inode_flags(inode_item, flags); 1838 1838 btrfs_set_stack_inode_block_group(inode_item, 0); 1839 1839 1840 1840 btrfs_set_stack_timespec_sec(&inode_item->atime, 1841 - inode_get_atime_sec(inode)); 1841 + inode_get_atime_sec(vfs_inode)); 1842 1842 btrfs_set_stack_timespec_nsec(&inode_item->atime, 1843 - inode_get_atime_nsec(inode)); 1843 + inode_get_atime_nsec(vfs_inode)); 1844 1844 1845 1845 btrfs_set_stack_timespec_sec(&inode_item->mtime, 1846 - inode_get_mtime_sec(inode)); 1846 + inode_get_mtime_sec(vfs_inode)); 1847 1847 btrfs_set_stack_timespec_nsec(&inode_item->mtime, 1848 - inode_get_mtime_nsec(inode)); 1848 + inode_get_mtime_nsec(vfs_inode)); 1849 1849 1850 1850 btrfs_set_stack_timespec_sec(&inode_item->ctime, 1851 - inode_get_ctime_sec(inode)); 1851 + inode_get_ctime_sec(vfs_inode)); 1852 1852 btrfs_set_stack_timespec_nsec(&inode_item->ctime, 1853 - inode_get_ctime_nsec(inode)); 1853 + inode_get_ctime_nsec(vfs_inode)); 1854 1854 1855 - btrfs_set_stack_timespec_sec(&inode_item->otime, BTRFS_I(inode)->i_otime_sec); 1856 - btrfs_set_stack_timespec_nsec(&inode_item->otime, BTRFS_I(inode)->i_otime_nsec); 1855 + btrfs_set_stack_timespec_sec(&inode_item->otime, inode->i_otime_sec); 1856 + btrfs_set_stack_timespec_nsec(&inode_item->otime, inode->i_otime_nsec); 1857 1857 } 1858 1858 1859 - int btrfs_fill_inode(struct inode *inode, u32 *rdev) 1859 + int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev) 1860 1860 { 1861 - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 1861 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1862 1862 struct btrfs_delayed_node *delayed_node; 1863 1863 struct btrfs_inode_item *inode_item; 1864 + struct inode *vfs_inode = &inode->vfs_inode; 1864 1865 1865 - delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); 1866 + delayed_node = btrfs_get_delayed_node(inode); 1866 1867 if (!delayed_node) 1867 1868 return -ENOENT; 1868 1869 ··· 1875 1876 1876 1877 inode_item = &delayed_node->inode_item; 1877 1878 1878 - i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); 1879 - i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); 1880 - btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item)); 1881 - btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0, 1882 - round_up(i_size_read(inode), fs_info->sectorsize)); 1883 - inode->i_mode = btrfs_stack_inode_mode(inode_item); 1884 - set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1885 - inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1886 - BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1887 - BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); 1879 + i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item)); 1880 + i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item)); 1881 + btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); 1882 + btrfs_inode_set_file_extent_range(inode, 0, 1883 + round_up(i_size_read(vfs_inode), fs_info->sectorsize)); 1884 + vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item); 1885 + set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item)); 1886 + inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item)); 1887 + inode->generation = btrfs_stack_inode_generation(inode_item); 1888 + inode->last_trans = btrfs_stack_inode_transid(inode_item); 1888 1889 1889 - inode_set_iversion_queried(inode, 1890 - btrfs_stack_inode_sequence(inode_item)); 1891 - inode->i_rdev = 0; 1890 + inode_set_iversion_queried(vfs_inode, btrfs_stack_inode_sequence(inode_item)); 1891 + vfs_inode->i_rdev = 0; 1892 1892 *rdev = btrfs_stack_inode_rdev(inode_item); 1893 1893 btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item), 1894 - &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags); 1894 + &inode->flags, &inode->ro_flags); 1895 1895 1896 - inode_set_atime(inode, btrfs_stack_timespec_sec(&inode_item->atime), 1896 + inode_set_atime(vfs_inode, btrfs_stack_timespec_sec(&inode_item->atime), 1897 1897 btrfs_stack_timespec_nsec(&inode_item->atime)); 1898 1898 1899 - inode_set_mtime(inode, btrfs_stack_timespec_sec(&inode_item->mtime), 1899 + inode_set_mtime(vfs_inode, btrfs_stack_timespec_sec(&inode_item->mtime), 1900 1900 btrfs_stack_timespec_nsec(&inode_item->mtime)); 1901 1901 1902 - inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime), 1902 + inode_set_ctime(vfs_inode, btrfs_stack_timespec_sec(&inode_item->ctime), 1903 1903 btrfs_stack_timespec_nsec(&inode_item->ctime)); 1904 1904 1905 - BTRFS_I(inode)->i_otime_sec = btrfs_stack_timespec_sec(&inode_item->otime); 1906 - BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime); 1905 + inode->i_otime_sec = btrfs_stack_timespec_sec(&inode_item->otime); 1906 + inode->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime); 1907 1907 1908 - inode->i_generation = BTRFS_I(inode)->generation; 1909 - if (S_ISDIR(inode->i_mode)) 1910 - BTRFS_I(inode)->index_cnt = (u64)-1; 1908 + vfs_inode->i_generation = inode->generation; 1909 + if (S_ISDIR(vfs_inode->i_mode)) 1910 + inode->index_cnt = (u64)-1; 1911 1911 1912 1912 mutex_unlock(&delayed_node->mutex); 1913 1913 btrfs_release_delayed_node(delayed_node); ··· 1926 1928 1927 1929 mutex_lock(&delayed_node->mutex); 1928 1930 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1929 - fill_stack_inode_item(trans, &delayed_node->inode_item, 1930 - &inode->vfs_inode); 1931 + fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1931 1932 goto release_node; 1932 1933 } 1933 1934 ··· 1934 1937 if (ret) 1935 1938 goto release_node; 1936 1939 1937 - fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode); 1940 + fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1938 1941 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); 1939 1942 delayed_node->count++; 1940 1943 atomic_inc(&root->fs_info->delayed_root->items);

+1 -1

fs/btrfs/delayed-inode.h

··· 133 133 134 134 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 135 135 struct btrfs_inode *inode); 136 - int btrfs_fill_inode(struct inode *inode, u32 *rdev); 136 + int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev); 137 137 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode); 138 138 139 139 /* Used for drop dead root */

+2

fs/btrfs/delayed-ref.h

··· 14 14 #include <linux/spinlock.h> 15 15 #include <linux/slab.h> 16 16 #include <uapi/linux/btrfs_tree.h> 17 + #include "fs.h" 18 + #include "messages.h" 17 19 18 20 struct btrfs_trans_handle; 19 21 struct btrfs_fs_info;

+12 -21

fs/btrfs/dev-replace.c

··· 76 76 struct extent_buffer *eb; 77 77 int slot; 78 78 int ret = 0; 79 - struct btrfs_path *path = NULL; 79 + BTRFS_PATH_AUTO_FREE(path); 80 80 int item_size; 81 81 struct btrfs_dev_replace_item *ptr; 82 82 u64 src_devid; ··· 85 85 return 0; 86 86 87 87 path = btrfs_alloc_path(); 88 - if (!path) { 89 - ret = -ENOMEM; 90 - goto out; 91 - } 88 + if (!path) 89 + return -ENOMEM; 92 90 93 91 key.objectid = 0; 94 92 key.type = BTRFS_DEV_REPLACE_KEY; ··· 101 103 if (btrfs_find_device(fs_info->fs_devices, &args)) { 102 104 btrfs_err(fs_info, 103 105 "found replace target device without a valid replace item"); 104 - ret = -EUCLEAN; 105 - goto out; 106 + return -EUCLEAN; 106 107 } 107 - ret = 0; 108 108 dev_replace->replace_state = 109 109 BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; 110 110 dev_replace->cont_reading_from_srcdev_mode = ··· 119 123 dev_replace->tgtdev = NULL; 120 124 dev_replace->is_valid = 0; 121 125 dev_replace->item_needs_writeback = 0; 122 - goto out; 126 + return 0; 123 127 } 124 128 slot = path->slots[0]; 125 129 eb = path->nodes[0]; ··· 222 226 break; 223 227 } 224 228 225 - out: 226 - btrfs_free_path(path); 227 229 return ret; 228 230 } 229 231 ··· 340 346 struct btrfs_fs_info *fs_info = trans->fs_info; 341 347 int ret; 342 348 struct btrfs_root *dev_root = fs_info->dev_root; 343 - struct btrfs_path *path; 349 + BTRFS_PATH_AUTO_FREE(path); 344 350 struct btrfs_key key; 345 351 struct extent_buffer *eb; 346 352 struct btrfs_dev_replace_item *ptr; ··· 359 365 key.offset = 0; 360 366 361 367 path = btrfs_alloc_path(); 362 - if (!path) { 363 - ret = -ENOMEM; 364 - goto out; 365 - } 368 + if (!path) 369 + return -ENOMEM; 370 + 366 371 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 367 372 if (ret < 0) { 368 373 btrfs_warn(fs_info, 369 374 "error %d while searching for dev_replace item!", 370 375 ret); 371 - goto out; 376 + return ret; 372 377 } 373 378 374 379 if (ret == 0 && ··· 388 395 btrfs_warn(fs_info, 389 396 "delete too small dev_replace item failed %d!", 390 397 ret); 391 - goto out; 398 + return ret; 392 399 } 393 400 ret = 1; 394 401 } ··· 401 408 if (ret < 0) { 402 409 btrfs_warn(fs_info, 403 410 "insert dev_replace item failed %d!", ret); 404 - goto out; 411 + return ret; 405 412 } 406 413 } 407 414 ··· 433 440 dev_replace->cursor_right); 434 441 dev_replace->item_needs_writeback = 0; 435 442 up_write(&dev_replace->rwsem); 436 - out: 437 - btrfs_free_path(path); 438 443 439 444 return ret; 440 445 }

+9 -15

fs/btrfs/dir-item.c

··· 236 236 int data_size; 237 237 struct extent_buffer *leaf; 238 238 int slot; 239 - struct btrfs_path *path; 239 + BTRFS_PATH_AUTO_FREE(path); 240 240 241 241 path = btrfs_alloc_path(); 242 242 if (!path) ··· 251 251 if (IS_ERR(di)) { 252 252 ret = PTR_ERR(di); 253 253 /* Nothing found, we're safe */ 254 - if (ret == -ENOENT) { 255 - ret = 0; 256 - goto out; 257 - } 254 + if (ret == -ENOENT) 255 + return 0; 258 256 259 257 if (ret < 0) 260 - goto out; 258 + return ret; 261 259 } 262 260 263 261 /* we found an item, look for our name in the item */ 264 262 if (di) { 265 263 /* our exact name was found */ 266 - ret = -EEXIST; 267 - goto out; 264 + return -EEXIST; 268 265 } 269 266 270 267 /* See if there is room in the item to insert this name. */ ··· 270 273 slot = path->slots[0]; 271 274 if (data_size + btrfs_item_size(leaf, slot) + 272 275 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root->fs_info)) { 273 - ret = -EOVERFLOW; 274 - } else { 275 - /* plenty of insertion room */ 276 - ret = 0; 276 + return -EOVERFLOW; 277 277 } 278 - out: 279 - btrfs_free_path(path); 280 - return ret; 278 + 279 + /* Plenty of insertion room. */ 280 + return 0; 281 281 } 282 282 283 283 /*

+1

fs/btrfs/dir-item.h

··· 10 10 struct btrfs_fs_info; 11 11 struct btrfs_key; 12 12 struct btrfs_path; 13 + struct btrfs_inode; 13 14 struct btrfs_root; 14 15 struct btrfs_trans_handle; 15 16

+18 -1

fs/btrfs/direct-io.c

··· 248 248 len = min(len, em->len - (start - em->start)); 249 249 block_start = extent_map_block_start(em) + (start - em->start); 250 250 251 - if (can_nocow_extent(inode, start, &len, &file_extent, false) == 1) { 251 + if (can_nocow_extent(BTRFS_I(inode), start, &len, &file_extent, 252 + false) == 1) { 252 253 bg = btrfs_inc_nocow_writers(fs_info, block_start); 253 254 if (bg) 254 255 can_nocow = true; ··· 853 852 } 854 853 855 854 if (check_direct_IO(fs_info, from, pos)) { 855 + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); 856 + goto buffered; 857 + } 858 + /* 859 + * We can't control the folios being passed in, applications can write 860 + * to them while a direct IO write is in progress. This means the 861 + * content might change after we calculated the data checksum. 862 + * Therefore we can end up storing a checksum that doesn't match the 863 + * persisted data. 864 + * 865 + * To be extra safe and avoid false data checksum mismatch, if the 866 + * inode requires data checksum, just fallback to buffered IO. 867 + * For buffered IO we have full control of page cache and can ensure 868 + * no one is modifying the content during writeback. 869 + */ 870 + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 856 871 btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); 857 872 goto buffered; 858 873 }

+2

fs/btrfs/direct-io.h

··· 5 5 6 6 #include <linux/types.h> 7 7 8 + struct kiocb; 9 + 8 10 int __init btrfs_init_dio(void); 9 11 void __cold btrfs_destroy_dio(void); 10 12

+16 -18

fs/btrfs/discard.c

··· 167 167 block_group->discard_eligible_time = 0; 168 168 queued = !list_empty(&block_group->discard_list); 169 169 list_del_init(&block_group->discard_list); 170 - /* 171 - * If the block group is currently running in the discard workfn, we 172 - * don't want to deref it, since it's still being used by the workfn. 173 - * The workfn will notice this case and deref the block group when it is 174 - * finished. 175 - */ 176 - if (queued && !running) 170 + if (queued) 177 171 btrfs_put_block_group(block_group); 178 172 179 173 spin_unlock(&discard_ctl->lock); ··· 254 260 block_group->discard_cursor = block_group->start; 255 261 block_group->discard_state = BTRFS_DISCARD_EXTENTS; 256 262 } 257 - discard_ctl->block_group = block_group; 258 263 } 259 264 if (block_group) { 265 + btrfs_get_block_group(block_group); 266 + discard_ctl->block_group = block_group; 260 267 *discard_state = block_group->discard_state; 261 268 *discard_index = block_group->discard_index; 262 269 } ··· 488 493 489 494 block_group = peek_discard_list(discard_ctl, &discard_state, 490 495 &discard_index, now); 491 - if (!block_group || !btrfs_run_discard_work(discard_ctl)) 496 + if (!block_group) 492 497 return; 498 + if (!btrfs_run_discard_work(discard_ctl)) { 499 + spin_lock(&discard_ctl->lock); 500 + btrfs_put_block_group(block_group); 501 + discard_ctl->block_group = NULL; 502 + spin_unlock(&discard_ctl->lock); 503 + return; 504 + } 493 505 if (now < block_group->discard_eligible_time) { 506 + spin_lock(&discard_ctl->lock); 507 + btrfs_put_block_group(block_group); 508 + discard_ctl->block_group = NULL; 509 + spin_unlock(&discard_ctl->lock); 494 510 btrfs_discard_schedule_work(discard_ctl, false); 495 511 return; 496 512 } ··· 553 547 spin_lock(&discard_ctl->lock); 554 548 discard_ctl->prev_discard = trimmed; 555 549 discard_ctl->prev_discard_time = now; 556 - /* 557 - * If the block group was removed from the discard list while it was 558 - * running in this workfn, then we didn't deref it, since this function 559 - * still owned that reference. But we set the discard_ctl->block_group 560 - * back to NULL, so we can use that condition to know that now we need 561 - * to deref the block_group. 562 - */ 563 - if (discard_ctl->block_group == NULL) 564 - btrfs_put_block_group(block_group); 550 + btrfs_put_block_group(block_group); 565 551 discard_ctl->block_group = NULL; 566 552 __btrfs_discard_schedule_work(discard_ctl, now, false); 567 553 spin_unlock(&discard_ctl->lock);

+1

fs/btrfs/discard.h

··· 3 3 #ifndef BTRFS_DISCARD_H 4 4 #define BTRFS_DISCARD_H 5 5 6 + #include <linux/types.h> 6 7 #include <linux/sizes.h> 7 8 8 9 struct btrfs_fs_info;

+66 -43

fs/btrfs/disk-io.c

··· 182 182 int mirror_num) 183 183 { 184 184 struct btrfs_fs_info *fs_info = eb->fs_info; 185 - int num_folios = num_extent_folios(eb); 186 185 int ret = 0; 187 186 188 187 if (sb_rdonly(fs_info->sb)) 189 188 return -EROFS; 190 189 191 - for (int i = 0; i < num_folios; i++) { 190 + for (int i = 0; i < num_extent_folios(eb); i++) { 192 191 struct folio *folio = eb->folios[i]; 193 192 u64 start = max_t(u64, eb->start, folio_pos(folio)); 194 193 u64 end = min_t(u64, eb->start + eb->len, ··· 283 284 284 285 if (WARN_ON_ONCE(found_start != eb->start)) 285 286 return BLK_STS_IOERR; 286 - if (WARN_ON(!btrfs_folio_test_uptodate(fs_info, eb->folios[0], 287 - eb->start, eb->len))) 287 + if (WARN_ON(!btrfs_meta_folio_test_uptodate(eb->folios[0], eb))) 288 288 return BLK_STS_IOERR; 289 289 290 290 ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, ··· 1087 1089 const struct btrfs_key *key) 1088 1090 { 1089 1091 struct btrfs_root *root; 1090 - struct btrfs_path *path; 1092 + BTRFS_PATH_AUTO_FREE(path); 1091 1093 1092 1094 path = btrfs_alloc_path(); 1093 1095 if (!path) 1094 1096 return ERR_PTR(-ENOMEM); 1095 1097 root = read_tree_root_path(tree_root, path, key); 1096 - btrfs_free_path(path); 1097 1098 1098 1099 return root; 1099 1100 } 1100 1101 1101 1102 /* 1102 - * Initialize subvolume root in-memory structure 1103 + * Initialize subvolume root in-memory structure. 1103 1104 * 1104 1105 * @anon_dev: anonymous device to attach to the root, if zero, allocate new 1106 + * 1107 + * In case of failure the caller is responsible to call btrfs_free_fs_root() 1105 1108 */ 1106 1109 static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) 1107 1110 { ··· 1126 1127 if (!anon_dev) { 1127 1128 ret = get_anon_bdev(&root->anon_dev); 1128 1129 if (ret) 1129 - goto fail; 1130 + return ret; 1130 1131 } else { 1131 1132 root->anon_dev = anon_dev; 1132 1133 } ··· 1136 1137 ret = btrfs_init_root_free_objectid(root); 1137 1138 if (ret) { 1138 1139 mutex_unlock(&root->objectid_mutex); 1139 - goto fail; 1140 + return ret; 1140 1141 } 1141 1142 1142 1143 ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID); ··· 1144 1145 mutex_unlock(&root->objectid_mutex); 1145 1146 1146 1147 return 0; 1147 - fail: 1148 - /* The caller is responsible to call btrfs_free_fs_root */ 1149 - return ret; 1150 1148 } 1151 1149 1152 1150 static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, ··· 2196 2200 2197 2201 static int load_global_roots(struct btrfs_root *tree_root) 2198 2202 { 2199 - struct btrfs_path *path; 2200 - int ret = 0; 2203 + BTRFS_PATH_AUTO_FREE(path); 2204 + int ret; 2201 2205 2202 2206 path = btrfs_alloc_path(); 2203 2207 if (!path) ··· 2206 2210 ret = load_global_roots_objectid(tree_root, path, 2207 2211 BTRFS_EXTENT_TREE_OBJECTID, "extent"); 2208 2212 if (ret) 2209 - goto out; 2213 + return ret; 2210 2214 ret = load_global_roots_objectid(tree_root, path, 2211 2215 BTRFS_CSUM_TREE_OBJECTID, "csum"); 2212 2216 if (ret) 2213 - goto out; 2217 + return ret; 2214 2218 if (!btrfs_fs_compat_ro(tree_root->fs_info, FREE_SPACE_TREE)) 2215 - goto out; 2219 + return ret; 2216 2220 ret = load_global_roots_objectid(tree_root, path, 2217 2221 BTRFS_FREE_SPACE_TREE_OBJECTID, 2218 2222 "free space"); 2219 - out: 2220 - btrfs_free_path(path); 2223 + 2221 2224 return ret; 2222 2225 } 2223 2226 ··· 2442 2447 * Check sectorsize and nodesize first, other check will need it. 2443 2448 * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here. 2444 2449 */ 2445 - if (!is_power_of_2(sectorsize) || sectorsize < 4096 || 2450 + if (!is_power_of_2(sectorsize) || sectorsize < BTRFS_MIN_BLOCKSIZE || 2446 2451 sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { 2447 2452 btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize); 2448 2453 ret = -EINVAL; 2449 2454 } 2450 2455 2451 2456 /* 2452 - * We only support at most two sectorsizes: 4K and PAGE_SIZE. 2457 + * We only support at most 3 sectorsizes: 4K, PAGE_SIZE, MIN_BLOCKSIZE. 2458 + * 2459 + * For 4K page sized systems with non-debug builds, all 3 matches (4K). 2460 + * For 4K page sized systems with debug builds, there are two block sizes 2461 + * supported. (4K and 2K) 2453 2462 * 2454 2463 * We can support 16K sectorsize with 64K page size without problem, 2455 2464 * but such sectorsize/pagesize combination doesn't make much sense. 2456 2465 * 4K will be our future standard, PAGE_SIZE is supported from the very 2457 2466 * beginning. 2458 2467 */ 2459 - if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && sectorsize != PAGE_SIZE)) { 2468 + if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && 2469 + sectorsize != PAGE_SIZE && 2470 + sectorsize != BTRFS_MIN_BLOCKSIZE)) { 2460 2471 btrfs_err(fs_info, 2461 2472 "sectorsize %llu not yet supported for page size %lu", 2462 2473 sectorsize, PAGE_SIZE); ··· 2561 2560 btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); 2562 2561 ret = -EINVAL; 2563 2562 } 2563 + 2564 + if (ret) 2565 + return ret; 2564 2566 2565 2567 ret = validate_sys_chunk_array(fs_info, sb); 2566 2568 ··· 3394 3390 fs_info->nodesize = nodesize; 3395 3391 fs_info->sectorsize = sectorsize; 3396 3392 fs_info->sectorsize_bits = ilog2(sectorsize); 3397 - fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits); 3398 3393 fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size; 3399 3394 fs_info->stripesize = stripesize; 3400 3395 fs_info->fs_devices->fs_info = fs_info; ··· 3418 3415 * to something non-standard make sure we truncate it to sectorsize. 3419 3416 */ 3420 3417 fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize); 3421 - 3422 - if (sectorsize < PAGE_SIZE) 3423 - btrfs_warn(fs_info, 3424 - "read-write for sector size %u with page size %lu is experimental", 3425 - sectorsize, PAGE_SIZE); 3426 3418 3427 3419 ret = btrfs_init_workqueues(fs_info); 3428 3420 if (ret) ··· 4324 4326 btrfs_cleanup_defrag_inodes(fs_info); 4325 4327 4326 4328 /* 4329 + * Handle the error fs first, as it will flush and wait for all ordered 4330 + * extents. This will generate delayed iputs, thus we want to handle 4331 + * it first. 4332 + */ 4333 + if (unlikely(BTRFS_FS_ERROR(fs_info))) 4334 + btrfs_error_commit_super(fs_info); 4335 + 4336 + /* 4327 4337 * Wait for any fixup workers to complete. 4328 4338 * If we don't wait for them here and they are still running by the time 4329 4339 * we call kthread_stop() against the cleaner kthread further below, we ··· 4350 4344 * when we call kthread_stop(). 4351 4345 */ 4352 4346 btrfs_flush_workqueue(fs_info->delalloc_workers); 4347 + 4348 + /* 4349 + * We can have ordered extents getting their last reference dropped from 4350 + * the fs_info->workers queue because for async writes for data bios we 4351 + * queue a work for that queue, at btrfs_wq_submit_bio(), that runs 4352 + * run_one_async_done() which calls btrfs_bio_end_io() in case the bio 4353 + * has an error, and that later function can do the final 4354 + * btrfs_put_ordered_extent() on the ordered extent attached to the bio, 4355 + * which adds a delayed iput for the inode. So we must flush the queue 4356 + * so that we don't have delayed iputs after committing the current 4357 + * transaction below and stopping the cleaner and transaction kthreads. 4358 + */ 4359 + btrfs_flush_workqueue(fs_info->workers); 4360 + 4361 + /* 4362 + * When finishing a compressed write bio we schedule a work queue item 4363 + * to finish an ordered extent - btrfs_finish_compressed_write_work() 4364 + * calls btrfs_finish_ordered_extent() which in turns does a call to 4365 + * btrfs_queue_ordered_fn(), and that queues the ordered extent 4366 + * completion either in the endio_write_workers work queue or in the 4367 + * fs_info->endio_freespace_worker work queue. We flush those queues 4368 + * below, so before we flush them we must flush this queue for the 4369 + * workers of compressed writes. 4370 + */ 4371 + flush_workqueue(fs_info->compressed_write_workers); 4353 4372 4354 4373 /* 4355 4374 * After we parked the cleaner kthread, ordered extents may have ··· 4400 4369 /* Ordered extents for free space inodes. */ 4401 4370 btrfs_flush_workqueue(fs_info->endio_freespace_worker); 4402 4371 btrfs_run_delayed_iputs(fs_info); 4372 + /* There should be no more workload to generate new delayed iputs. */ 4373 + set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); 4403 4374 4404 4375 cancel_work_sync(&fs_info->async_reclaim_work); 4405 4376 cancel_work_sync(&fs_info->async_data_reclaim_work); ··· 4435 4402 if (ret) 4436 4403 btrfs_err(fs_info, "commit super ret %d", ret); 4437 4404 } 4438 - 4439 - if (BTRFS_FS_ERROR(fs_info)) 4440 - btrfs_error_commit_super(fs_info); 4441 4405 4442 4406 kthread_stop(fs_info->transaction_kthread); 4443 4407 kthread_stop(fs_info->cleaner_kthread); ··· 4557 4527 { 4558 4528 /* cleanup FS via transaction */ 4559 4529 btrfs_cleanup_transaction(fs_info); 4560 - 4561 - mutex_lock(&fs_info->cleaner_mutex); 4562 - btrfs_run_delayed_iputs(fs_info); 4563 - mutex_unlock(&fs_info->cleaner_mutex); 4564 4530 4565 4531 down_write(&fs_info->cleanup_work_sem); 4566 4532 up_write(&fs_info->cleanup_work_sem); ··· 4928 4902 4929 4903 int btrfs_init_root_free_objectid(struct btrfs_root *root) 4930 4904 { 4931 - struct btrfs_path *path; 4905 + BTRFS_PATH_AUTO_FREE(path); 4932 4906 int ret; 4933 4907 struct extent_buffer *l; 4934 4908 struct btrfs_key search_key; ··· 4944 4918 search_key.offset = (u64)-1; 4945 4919 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 4946 4920 if (ret < 0) 4947 - goto error; 4921 + return ret; 4948 4922 if (ret == 0) { 4949 4923 /* 4950 4924 * Key with offset -1 found, there would have to exist a root 4951 4925 * with such id, but this is out of valid range. 4952 4926 */ 4953 - ret = -EUCLEAN; 4954 - goto error; 4927 + return -EUCLEAN; 4955 4928 } 4956 4929 if (path->slots[0] > 0) { 4957 4930 slot = path->slots[0] - 1; ··· 4961 4936 } else { 4962 4937 root->free_objectid = BTRFS_FIRST_FREE_OBJECTID; 4963 4938 } 4964 - ret = 0; 4965 - error: 4966 - btrfs_free_path(path); 4967 - return ret; 4939 + 4940 + return 0; 4968 4941 } 4969 4942 4970 4943 int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)

+26 -25

fs/btrfs/export.c

··· 75 75 { 76 76 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 77 77 struct btrfs_root *root; 78 - struct inode *inode; 78 + struct btrfs_inode *inode; 79 79 80 80 if (objectid < BTRFS_FIRST_FREE_OBJECTID) 81 81 return ERR_PTR(-ESTALE); ··· 89 89 if (IS_ERR(inode)) 90 90 return ERR_CAST(inode); 91 91 92 - if (generation != 0 && generation != inode->i_generation) { 93 - iput(inode); 92 + if (generation != 0 && generation != inode->vfs_inode.i_generation) { 93 + iput(&inode->vfs_inode); 94 94 return ERR_PTR(-ESTALE); 95 95 } 96 96 97 - return d_obtain_alias(inode); 97 + return d_obtain_alias(&inode->vfs_inode); 98 98 } 99 99 100 100 static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, ··· 145 145 146 146 struct dentry *btrfs_get_parent(struct dentry *child) 147 147 { 148 - struct inode *dir = d_inode(child); 149 - struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 150 - struct btrfs_root *root = BTRFS_I(dir)->root; 148 + struct btrfs_inode *dir = BTRFS_I(d_inode(child)); 149 + struct btrfs_inode *inode; 150 + struct btrfs_root *root = dir->root; 151 + struct btrfs_fs_info *fs_info = root->fs_info; 151 152 struct btrfs_path *path; 152 153 struct extent_buffer *leaf; 153 154 struct btrfs_root_ref *ref; ··· 160 159 if (!path) 161 160 return ERR_PTR(-ENOMEM); 162 161 163 - if (btrfs_ino(BTRFS_I(dir)) == BTRFS_FIRST_FREE_OBJECTID) { 162 + if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) { 164 163 key.objectid = btrfs_root_id(root); 165 164 key.type = BTRFS_ROOT_BACKREF_KEY; 166 165 key.offset = (u64)-1; 167 166 root = fs_info->tree_root; 168 167 } else { 169 - key.objectid = btrfs_ino(BTRFS_I(dir)); 168 + key.objectid = btrfs_ino(dir); 170 169 key.type = BTRFS_INODE_REF_KEY; 171 170 key.offset = (u64)-1; 172 171 } ··· 211 210 found_key.offset, 0); 212 211 } 213 212 214 - return d_obtain_alias(btrfs_iget(key.objectid, root)); 213 + inode = btrfs_iget(key.objectid, root); 214 + if (IS_ERR(inode)) 215 + return ERR_CAST(inode); 216 + 217 + return d_obtain_alias(&inode->vfs_inode); 215 218 fail: 216 219 btrfs_free_path(path); 217 220 return ERR_PTR(ret); ··· 224 219 static int btrfs_get_name(struct dentry *parent, char *name, 225 220 struct dentry *child) 226 221 { 227 - struct inode *inode = d_inode(child); 228 - struct inode *dir = d_inode(parent); 229 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 230 - struct btrfs_path *path; 231 - struct btrfs_root *root = BTRFS_I(dir)->root; 222 + struct btrfs_inode *inode = BTRFS_I(d_inode(child)); 223 + struct btrfs_inode *dir = BTRFS_I(d_inode(parent)); 224 + struct btrfs_root *root = dir->root; 225 + struct btrfs_fs_info *fs_info = root->fs_info; 226 + BTRFS_PATH_AUTO_FREE(path); 232 227 struct btrfs_inode_ref *iref; 233 228 struct btrfs_root_ref *rref; 234 229 struct extent_buffer *leaf; ··· 238 233 int ret; 239 234 u64 ino; 240 235 241 - if (!S_ISDIR(dir->i_mode)) 236 + if (!S_ISDIR(dir->vfs_inode.i_mode)) 242 237 return -EINVAL; 243 238 244 - ino = btrfs_ino(BTRFS_I(inode)); 239 + ino = btrfs_ino(inode); 245 240 246 241 path = btrfs_alloc_path(); 247 242 if (!path) 248 243 return -ENOMEM; 249 244 250 245 if (ino == BTRFS_FIRST_FREE_OBJECTID) { 251 - key.objectid = btrfs_root_id(BTRFS_I(inode)->root); 246 + key.objectid = btrfs_root_id(inode->root); 252 247 key.type = BTRFS_ROOT_BACKREF_KEY; 253 248 key.offset = (u64)-1; 254 249 root = fs_info->tree_root; 255 250 } else { 256 251 key.objectid = ino; 257 - key.offset = btrfs_ino(BTRFS_I(dir)); 258 252 key.type = BTRFS_INODE_REF_KEY; 253 + key.offset = btrfs_ino(dir); 259 254 } 260 255 261 256 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 262 257 if (ret < 0) { 263 - btrfs_free_path(path); 264 258 return ret; 265 259 } else if (ret > 0) { 266 - if (ino == BTRFS_FIRST_FREE_OBJECTID) { 260 + if (ino == BTRFS_FIRST_FREE_OBJECTID) 267 261 path->slots[0]--; 268 - } else { 269 - btrfs_free_path(path); 262 + else 270 263 return -ENOENT; 271 - } 272 264 } 273 265 leaf = path->nodes[0]; 274 266 ··· 282 280 } 283 281 284 282 read_extent_buffer(leaf, name, name_ptr, name_len); 285 - btrfs_free_path(path); 286 283 287 284 /* 288 285 * have to add the null termination to make sure that reconnect_path

+4 -4

fs/btrfs/extent-io-tree.c

··· 346 346 return tree_search_for_insert(tree, offset, NULL, NULL); 347 347 } 348 348 349 - static void extent_io_tree_panic(const struct extent_io_tree *tree, 350 - const struct extent_state *state, 351 - const char *opname, 352 - int err) 349 + static void __cold extent_io_tree_panic(const struct extent_io_tree *tree, 350 + const struct extent_state *state, 351 + const char *opname, 352 + int err) 353 353 { 354 354 btrfs_panic(extent_io_tree_to_fs_info(tree), err, 355 355 "extent io tree error on %s state start %llu end %llu",

+30 -33

fs/btrfs/extent-tree.c

··· 70 70 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) 71 71 { 72 72 struct btrfs_root *root = btrfs_extent_root(fs_info, start); 73 - int ret; 74 73 struct btrfs_key key; 75 - struct btrfs_path *path; 74 + BTRFS_PATH_AUTO_FREE(path); 76 75 77 76 path = btrfs_alloc_path(); 78 77 if (!path) 79 78 return -ENOMEM; 80 79 81 80 key.objectid = start; 82 - key.offset = len; 83 81 key.type = BTRFS_EXTENT_ITEM_KEY; 84 - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 85 - btrfs_free_path(path); 86 - return ret; 82 + key.offset = len; 83 + return btrfs_search_slot(NULL, root, &key, path, 0, 0); 87 84 } 88 85 89 86 /* ··· 100 103 struct btrfs_root *extent_root; 101 104 struct btrfs_delayed_ref_head *head; 102 105 struct btrfs_delayed_ref_root *delayed_refs; 103 - struct btrfs_path *path; 106 + BTRFS_PATH_AUTO_FREE(path); 104 107 struct btrfs_key key; 105 108 u64 num_refs; 106 109 u64 extent_flags; ··· 122 125 123 126 search_again: 124 127 key.objectid = bytenr; 125 - key.offset = offset; 126 128 if (metadata) 127 129 key.type = BTRFS_METADATA_ITEM_KEY; 128 130 else 129 131 key.type = BTRFS_EXTENT_ITEM_KEY; 132 + key.offset = offset; 130 133 131 134 extent_root = btrfs_extent_root(fs_info, bytenr); 132 135 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 133 136 if (ret < 0) 134 - goto out_free; 137 + return ret; 135 138 136 139 if (ret > 0 && key.type == BTRFS_METADATA_ITEM_KEY) { 137 140 if (path->slots[0]) { ··· 156 159 "unexpected extent item size, has %u expect >= %zu", 157 160 item_size, sizeof(*ei)); 158 161 btrfs_abort_transaction(trans, ret); 159 - goto out_free; 162 + return ret; 160 163 } 161 164 162 165 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); ··· 167 170 "unexpected zero reference count for extent item (%llu %u %llu)", 168 171 key.objectid, key.type, key.offset); 169 172 btrfs_abort_transaction(trans, ret); 170 - goto out_free; 173 + return ret; 171 174 } 172 175 extent_flags = btrfs_extent_flags(leaf, ei); 173 176 owner = btrfs_get_extent_owner_root(fs_info, leaf, path->slots[0]); ··· 213 216 *flags = extent_flags; 214 217 if (owning_root) 215 218 *owning_root = owner; 216 - out_free: 217 - btrfs_free_path(path); 219 + 218 220 return ret; 219 221 } 220 222 ··· 1483 1487 struct btrfs_delayed_ref_node *node, 1484 1488 struct btrfs_delayed_extent_op *extent_op) 1485 1489 { 1486 - struct btrfs_path *path; 1490 + BTRFS_PATH_AUTO_FREE(path); 1487 1491 struct extent_buffer *leaf; 1488 1492 struct btrfs_extent_item *item; 1489 1493 struct btrfs_key key; ··· 1504 1508 node->parent, node->ref_root, owner, 1505 1509 offset, refs_to_add, extent_op); 1506 1510 if ((ret < 0 && ret != -EAGAIN) || !ret) 1507 - goto out; 1511 + return ret; 1508 1512 1509 1513 /* 1510 1514 * Ok we had -EAGAIN which means we didn't have space to insert and ··· 1529 1533 1530 1534 if (ret) 1531 1535 btrfs_abort_transaction(trans, ret); 1532 - out: 1533 - btrfs_free_path(path); 1536 + 1534 1537 return ret; 1535 1538 } 1536 1539 ··· 1626 1631 struct btrfs_fs_info *fs_info = trans->fs_info; 1627 1632 struct btrfs_root *root; 1628 1633 struct btrfs_key key; 1629 - struct btrfs_path *path; 1634 + BTRFS_PATH_AUTO_FREE(path); 1630 1635 struct btrfs_extent_item *ei; 1631 1636 struct extent_buffer *leaf; 1632 1637 u32 item_size; ··· 1657 1662 again: 1658 1663 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1659 1664 if (ret < 0) { 1660 - goto out; 1665 + return ret; 1661 1666 } else if (ret > 0) { 1662 1667 if (metadata) { 1663 1668 if (path->slots[0] > 0) { ··· 1674 1679 metadata = 0; 1675 1680 1676 1681 key.objectid = head->bytenr; 1677 - key.offset = head->num_bytes; 1678 1682 key.type = BTRFS_EXTENT_ITEM_KEY; 1683 + key.offset = head->num_bytes; 1679 1684 goto again; 1680 1685 } 1681 1686 } else { ··· 1683 1688 btrfs_err(fs_info, 1684 1689 "missing extent item for extent %llu num_bytes %llu level %d", 1685 1690 head->bytenr, head->num_bytes, head->level); 1686 - goto out; 1691 + return ret; 1687 1692 } 1688 1693 } 1689 1694 ··· 1696 1701 "unexpected extent item size, has %u expect >= %zu", 1697 1702 item_size, sizeof(*ei)); 1698 1703 btrfs_abort_transaction(trans, ret); 1699 - goto out; 1704 + return ret; 1700 1705 } 1701 1706 1702 1707 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1703 1708 __run_delayed_extent_op(extent_op, leaf, ei); 1704 - out: 1705 - btrfs_free_path(path); 1709 + 1706 1710 return ret; 1707 1711 } 1708 1712 ··· 2342 2348 int ret; 2343 2349 2344 2350 key.objectid = bytenr; 2345 - key.offset = (u64)-1; 2346 2351 key.type = BTRFS_EXTENT_ITEM_KEY; 2352 + key.offset = (u64)-1; 2347 2353 2348 2354 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2349 2355 if (ret < 0) ··· 2868 2874 block_group->length, 2869 2875 &trimmed); 2870 2876 2877 + /* 2878 + * Not strictly necessary to lock, as the block_group should be 2879 + * read-only from btrfs_delete_unused_bgs(). 2880 + */ 2881 + ASSERT(block_group->ro); 2882 + spin_lock(&fs_info->unused_bgs_lock); 2871 2883 list_del_init(&block_group->bg_list); 2884 + spin_unlock(&fs_info->unused_bgs_lock); 2885 + 2872 2886 btrfs_unfreeze_block_group(block_group); 2873 2887 btrfs_put_block_group(block_group); 2874 2888 ··· 5467 5465 { 5468 5466 struct btrfs_delayed_ref_root *delayed_refs; 5469 5467 struct btrfs_delayed_ref_head *head; 5470 - struct btrfs_path *path; 5468 + BTRFS_PATH_AUTO_FREE(path); 5471 5469 struct btrfs_extent_inline_ref *iref; 5472 5470 int ret; 5473 5471 bool exists = false; ··· 5484 5482 * If we get 0 then we found our reference, return 1, else 5485 5483 * return the error if it's not -ENOENT; 5486 5484 */ 5487 - btrfs_free_path(path); 5488 5485 return (ret < 0 ) ? ret : 1; 5489 5486 } 5490 5487 ··· 5518 5517 mutex_unlock(&head->mutex); 5519 5518 out: 5520 5519 spin_unlock(&delayed_refs->lock); 5521 - btrfs_free_path(path); 5522 5520 return exists ? 1 : 0; 5523 5521 } 5524 5522 ··· 6285 6285 struct extent_buffer *parent) 6286 6286 { 6287 6287 struct btrfs_fs_info *fs_info = root->fs_info; 6288 - struct btrfs_path *path; 6288 + BTRFS_PATH_AUTO_FREE(path); 6289 6289 struct walk_control *wc; 6290 6290 int level; 6291 6291 int parent_level; ··· 6298 6298 return -ENOMEM; 6299 6299 6300 6300 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6301 - if (!wc) { 6302 - btrfs_free_path(path); 6301 + if (!wc) 6303 6302 return -ENOMEM; 6304 - } 6305 6303 6306 6304 btrfs_assert_tree_write_locked(parent); 6307 6305 parent_level = btrfs_header_level(parent); ··· 6336 6338 } 6337 6339 6338 6340 kfree(wc); 6339 - btrfs_free_path(path); 6340 6341 return ret; 6341 6342 } 6342 6343

-1

fs/btrfs/extent-tree.h

··· 4 4 #define BTRFS_EXTENT_TREE_H 5 5 6 6 #include <linux/types.h> 7 - #include "misc.h" 8 7 #include "block-group.h" 9 8 #include "locking.h" 10 9

+346 -243

fs/btrfs/extent_io.c

··· 425 425 struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); 426 426 427 427 ASSERT(folio_pos(folio) <= start && 428 - start + len <= folio_pos(folio) + PAGE_SIZE); 428 + start + len <= folio_pos(folio) + folio_size(folio)); 429 429 430 430 if (uptodate && btrfs_verify_folio(folio, start, len)) 431 431 btrfs_folio_set_uptodate(fs_info, folio, start, len); 432 432 else 433 433 btrfs_folio_clear_uptodate(fs_info, folio, start, len); 434 434 435 - if (!btrfs_is_subpage(fs_info, folio->mapping)) 435 + if (!btrfs_is_subpage(fs_info, folio)) 436 436 folio_unlock(folio); 437 437 else 438 438 btrfs_folio_end_lock(fs_info, folio, start, len); ··· 488 488 static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio) 489 489 { 490 490 ASSERT(folio_test_locked(folio)); 491 - if (!btrfs_is_subpage(fs_info, folio->mapping)) 491 + if (!btrfs_is_subpage(fs_info, folio)) 492 492 return; 493 493 494 494 ASSERT(folio_test_private(folio)); 495 - btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE); 495 + btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), folio_size(folio)); 496 496 } 497 497 498 498 /* ··· 753 753 { 754 754 struct btrfs_inode *inode = folio_to_inode(folio); 755 755 756 - ASSERT(pg_offset + size <= PAGE_SIZE); 756 + ASSERT(pg_offset + size <= folio_size(folio)); 757 757 ASSERT(bio_ctrl->end_io_func); 758 758 759 759 if (bio_ctrl->bbio && ··· 836 836 if (folio->mapping) 837 837 lockdep_assert_held(&folio->mapping->i_private_lock); 838 838 839 - if (fs_info->nodesize >= PAGE_SIZE) { 839 + if (!btrfs_meta_is_subpage(fs_info)) { 840 840 if (!folio_test_private(folio)) 841 841 folio_attach_private(folio, eb); 842 842 else ··· 870 870 871 871 fs_info = folio_to_fs_info(folio); 872 872 873 - if (btrfs_is_subpage(fs_info, folio->mapping)) 873 + if (btrfs_is_subpage(fs_info, folio)) 874 874 return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); 875 875 876 876 folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE); ··· 887 887 return; 888 888 889 889 fs_info = folio_to_fs_info(folio); 890 - if (btrfs_is_subpage(fs_info, folio->mapping)) 891 - return btrfs_detach_subpage(fs_info, folio); 890 + if (btrfs_is_subpage(fs_info, folio)) 891 + return btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); 892 892 893 893 folio_detach_private(folio); 894 894 } ··· 935 935 struct inode *inode = folio->mapping->host; 936 936 struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 937 937 u64 start = folio_pos(folio); 938 - const u64 end = start + PAGE_SIZE - 1; 939 - u64 cur = start; 938 + const u64 end = start + folio_size(folio) - 1; 940 939 u64 extent_offset; 941 940 u64 last_byte = i_size_read(inode); 942 - u64 block_start; 943 941 struct extent_map *em; 944 942 int ret = 0; 945 - size_t pg_offset = 0; 946 - size_t iosize; 947 - size_t blocksize = fs_info->sectorsize; 943 + const size_t blocksize = fs_info->sectorsize; 948 944 949 945 ret = set_folio_extent_mapped(folio); 950 946 if (ret < 0) { ··· 951 955 if (folio_contains(folio, last_byte >> PAGE_SHIFT)) { 952 956 size_t zero_offset = offset_in_folio(folio, last_byte); 953 957 954 - if (zero_offset) { 955 - iosize = folio_size(folio) - zero_offset; 956 - folio_zero_range(folio, zero_offset, iosize); 957 - } 958 + if (zero_offset) 959 + folio_zero_range(folio, zero_offset, 960 + folio_size(folio) - zero_offset); 958 961 } 959 962 bio_ctrl->end_io_func = end_bbio_data_read; 960 963 begin_folio_read(fs_info, folio); 961 - while (cur <= end) { 964 + for (u64 cur = start; cur <= end; cur += blocksize) { 962 965 enum btrfs_compression_type compress_type = BTRFS_COMPRESS_NONE; 966 + unsigned long pg_offset = offset_in_folio(folio, cur); 963 967 bool force_bio_submit = false; 964 968 u64 disk_bytenr; 969 + u64 block_start; 965 970 966 971 ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); 967 972 if (cur >= last_byte) { 968 - iosize = folio_size(folio) - pg_offset; 969 - folio_zero_range(folio, pg_offset, iosize); 970 - end_folio_read(folio, true, cur, iosize); 973 + folio_zero_range(folio, pg_offset, end - cur + 1); 974 + end_folio_read(folio, true, cur, end - cur + 1); 971 975 break; 976 + } 977 + if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { 978 + end_folio_read(folio, true, cur, blocksize); 979 + continue; 972 980 } 973 981 em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached); 974 982 if (IS_ERR(em)) { ··· 985 985 986 986 compress_type = extent_map_compression(em); 987 987 988 - iosize = min(extent_map_end(em) - cur, end - cur + 1); 989 - iosize = ALIGN(iosize, blocksize); 990 988 if (compress_type != BTRFS_COMPRESS_NONE) 991 989 disk_bytenr = em->disk_bytenr; 992 990 else 993 991 disk_bytenr = extent_map_block_start(em) + extent_offset; 994 - block_start = extent_map_block_start(em); 992 + 995 993 if (em->flags & EXTENT_FLAG_PREALLOC) 996 994 block_start = EXTENT_MAP_HOLE; 995 + else 996 + block_start = extent_map_block_start(em); 997 997 998 998 /* 999 999 * If we have a file range that points to a compressed extent ··· 1042 1042 1043 1043 /* we've found a hole, just zero and go on */ 1044 1044 if (block_start == EXTENT_MAP_HOLE) { 1045 - folio_zero_range(folio, pg_offset, iosize); 1046 - 1047 - end_folio_read(folio, true, cur, iosize); 1048 - cur = cur + iosize; 1049 - pg_offset += iosize; 1045 + folio_zero_range(folio, pg_offset, blocksize); 1046 + end_folio_read(folio, true, cur, blocksize); 1050 1047 continue; 1051 1048 } 1052 1049 /* the get_extent function already copied into the folio */ 1053 1050 if (block_start == EXTENT_MAP_INLINE) { 1054 - end_folio_read(folio, true, cur, iosize); 1055 - cur = cur + iosize; 1056 - pg_offset += iosize; 1051 + end_folio_read(folio, true, cur, blocksize); 1057 1052 continue; 1058 1053 } 1059 1054 ··· 1059 1064 1060 1065 if (force_bio_submit) 1061 1066 submit_one_bio(bio_ctrl); 1062 - submit_extent_folio(bio_ctrl, disk_bytenr, folio, iosize, 1067 + submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize, 1063 1068 pg_offset); 1064 - cur = cur + iosize; 1065 - pg_offset += iosize; 1069 + } 1070 + return 0; 1071 + } 1072 + 1073 + /* 1074 + * Check if we can skip waiting the @ordered extent covering the block at @fileoff. 1075 + * 1076 + * @fileoff: Both input and output. 1077 + * Input as the file offset where the check should start at. 1078 + * Output as where the next check should start at, 1079 + * if the function returns true. 1080 + * 1081 + * Return true if we can skip to @fileoff. The caller needs to check the new 1082 + * @fileoff value to make sure it covers the full range, before skipping the 1083 + * full OE. 1084 + * 1085 + * Return false if we must wait for the ordered extent. 1086 + */ 1087 + static bool can_skip_one_ordered_range(struct btrfs_inode *inode, 1088 + struct btrfs_ordered_extent *ordered, 1089 + u64 *fileoff) 1090 + { 1091 + const struct btrfs_fs_info *fs_info = inode->root->fs_info; 1092 + struct folio *folio; 1093 + const u32 blocksize = fs_info->sectorsize; 1094 + u64 cur = *fileoff; 1095 + bool ret; 1096 + 1097 + folio = filemap_get_folio(inode->vfs_inode.i_mapping, cur >> PAGE_SHIFT); 1098 + 1099 + /* 1100 + * We should have locked the folio(s) for range [start, end], thus 1101 + * there must be a folio and it must be locked. 1102 + */ 1103 + ASSERT(!IS_ERR(folio)); 1104 + ASSERT(folio_test_locked(folio)); 1105 + 1106 + /* 1107 + * There are several cases for the folio and OE combination: 1108 + * 1109 + * 1) Folio has no private flag 1110 + * The OE has all its IO done but not yet finished, and folio got 1111 + * invalidated. 1112 + * 1113 + * Have we have to wait for the OE to finish, as it may contain the 1114 + * to-be-inserted data checksum. 1115 + * Without the data checksum inserted into the csum tree, read will 1116 + * just fail with missing csum. 1117 + */ 1118 + if (!folio_test_private(folio)) { 1119 + ret = false; 1120 + goto out; 1066 1121 } 1067 1122 1068 - return 0; 1123 + /* 1124 + * 2) The first block is DIRTY. 1125 + * 1126 + * This means the OE is created by some other folios whose file pos is 1127 + * before this one. And since we are holding the folio lock, the writeback 1128 + * of this folio cannot start. 1129 + * 1130 + * We must skip the whole OE, because it will never start until we 1131 + * finished our folio read and unlocked the folio. 1132 + */ 1133 + if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) { 1134 + u64 range_len = min(folio_pos(folio) + folio_size(folio), 1135 + ordered->file_offset + ordered->num_bytes) - cur; 1136 + 1137 + ret = true; 1138 + /* 1139 + * At least inside the folio, all the remaining blocks should 1140 + * also be dirty. 1141 + */ 1142 + ASSERT(btrfs_folio_test_dirty(fs_info, folio, cur, range_len)); 1143 + *fileoff = ordered->file_offset + ordered->num_bytes; 1144 + goto out; 1145 + } 1146 + 1147 + /* 1148 + * 3) The first block is uptodate. 1149 + * 1150 + * At least the first block can be skipped, but we are still not fully 1151 + * sure. E.g. if the OE has some other folios in the range that cannot 1152 + * be skipped. 1153 + * So we return true and update @next_ret to the OE/folio boundary. 1154 + */ 1155 + if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { 1156 + u64 range_len = min(folio_pos(folio) + folio_size(folio), 1157 + ordered->file_offset + ordered->num_bytes) - cur; 1158 + 1159 + /* 1160 + * The whole range to the OE end or folio boundary should also 1161 + * be uptodate. 1162 + */ 1163 + ASSERT(btrfs_folio_test_uptodate(fs_info, folio, cur, range_len)); 1164 + ret = true; 1165 + *fileoff = cur + range_len; 1166 + goto out; 1167 + } 1168 + 1169 + /* 1170 + * 4) The first block is not uptodate. 1171 + * 1172 + * This means the folio is invalidated after the writeback was finished, 1173 + * but by some other operations (e.g. block aligned buffered write) the 1174 + * folio is inserted into filemap. 1175 + * Very much the same as case 1). 1176 + */ 1177 + ret = false; 1178 + out: 1179 + folio_put(folio); 1180 + return ret; 1181 + } 1182 + 1183 + static bool can_skip_ordered_extent(struct btrfs_inode *inode, 1184 + struct btrfs_ordered_extent *ordered, 1185 + u64 start, u64 end) 1186 + { 1187 + const u64 range_end = min(end, ordered->file_offset + ordered->num_bytes - 1); 1188 + u64 cur = max(start, ordered->file_offset); 1189 + 1190 + while (cur < range_end) { 1191 + bool can_skip; 1192 + 1193 + can_skip = can_skip_one_ordered_range(inode, ordered, &cur); 1194 + if (!can_skip) 1195 + return false; 1196 + } 1197 + return true; 1198 + } 1199 + 1200 + /* 1201 + * Locking helper to make sure we get a stable view of extent maps for the 1202 + * involved range. 1203 + * 1204 + * This is for folio read paths (read and readahead), thus the involved range 1205 + * should have all the folios locked. 1206 + */ 1207 + static void lock_extents_for_read(struct btrfs_inode *inode, u64 start, u64 end, 1208 + struct extent_state **cached_state) 1209 + { 1210 + u64 cur_pos; 1211 + 1212 + /* Caller must provide a valid @cached_state. */ 1213 + ASSERT(cached_state); 1214 + 1215 + /* The range must at least be page aligned, as all read paths are folio based. */ 1216 + ASSERT(IS_ALIGNED(start, PAGE_SIZE)); 1217 + ASSERT(IS_ALIGNED(end + 1, PAGE_SIZE)); 1218 + 1219 + again: 1220 + lock_extent(&inode->io_tree, start, end, cached_state); 1221 + cur_pos = start; 1222 + while (cur_pos < end) { 1223 + struct btrfs_ordered_extent *ordered; 1224 + 1225 + ordered = btrfs_lookup_ordered_range(inode, cur_pos, 1226 + end - cur_pos + 1); 1227 + /* 1228 + * No ordered extents in the range, and we hold the extent lock, 1229 + * no one can modify the extent maps in the range, we're safe to return. 1230 + */ 1231 + if (!ordered) 1232 + break; 1233 + 1234 + /* Check if we can skip waiting for the whole OE. */ 1235 + if (can_skip_ordered_extent(inode, ordered, start, end)) { 1236 + cur_pos = min(ordered->file_offset + ordered->num_bytes, 1237 + end + 1); 1238 + btrfs_put_ordered_extent(ordered); 1239 + continue; 1240 + } 1241 + 1242 + /* Now wait for the OE to finish. */ 1243 + unlock_extent(&inode->io_tree, start, end, cached_state); 1244 + btrfs_start_ordered_extent_nowriteback(ordered, start, end + 1 - start); 1245 + btrfs_put_ordered_extent(ordered); 1246 + /* We have unlocked the whole range, restart from the beginning. */ 1247 + goto again; 1248 + } 1069 1249 } 1070 1250 1071 1251 int btrfs_read_folio(struct file *file, struct folio *folio) ··· 1253 1083 struct extent_map *em_cached = NULL; 1254 1084 int ret; 1255 1085 1256 - btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state); 1086 + lock_extents_for_read(inode, start, end, &cached_state); 1257 1087 ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL); 1258 1088 unlock_extent(&inode->io_tree, start, end, &cached_state); 1259 1089 ··· 1275 1105 unsigned int start_bit; 1276 1106 unsigned int nbits; 1277 1107 1278 - ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE); 1108 + ASSERT(start >= folio_start && start + len <= folio_start + folio_size(folio)); 1279 1109 start_bit = (start - folio_start) >> fs_info->sectorsize_bits; 1280 1110 nbits = len >> fs_info->sectorsize_bits; 1281 1111 ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits)); ··· 1288 1118 { 1289 1119 struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); 1290 1120 const u64 folio_start = folio_pos(folio); 1291 - const unsigned int bitmap_size = fs_info->sectors_per_page; 1121 + const unsigned int bitmap_size = btrfs_blocks_per_folio(fs_info, folio); 1292 1122 unsigned int start_bit; 1293 1123 unsigned int first_zero; 1294 1124 unsigned int first_set; 1295 1125 1296 - ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE); 1126 + ASSERT(start >= folio_start && start < folio_start + folio_size(folio)); 1297 1127 1298 1128 start_bit = (start - folio_start) >> fs_info->sectorsize_bits; 1299 1129 first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit); ··· 1327 1157 { 1328 1158 struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode); 1329 1159 struct writeback_control *wbc = bio_ctrl->wbc; 1330 - const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping); 1160 + const bool is_subpage = btrfs_is_subpage(fs_info, folio); 1331 1161 const u64 page_start = folio_pos(folio); 1332 1162 const u64 page_end = page_start + folio_size(folio) - 1; 1163 + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 1333 1164 unsigned long delalloc_bitmap = 0; 1334 1165 /* 1335 1166 * Save the last found delalloc end. As the delalloc end can go beyond ··· 1355 1184 int bit; 1356 1185 1357 1186 /* Save the dirty bitmap as our submission bitmap will be a subset of it. */ 1358 - if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) { 1359 - ASSERT(fs_info->sectors_per_page > 1); 1187 + if (btrfs_is_subpage(fs_info, folio)) { 1188 + ASSERT(blocks_per_folio > 1); 1360 1189 btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap); 1361 1190 } else { 1362 1191 bio_ctrl->submit_bitmap = 1; 1363 1192 } 1364 1193 1365 - for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { 1194 + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, blocks_per_folio) { 1366 1195 u64 start = page_start + (bit << fs_info->sectorsize_bits); 1367 1196 1368 1197 btrfs_folio_set_lock(fs_info, folio, start, fs_info->sectorsize); ··· 1435 1264 btrfs_root_id(inode->root), 1436 1265 btrfs_ino(inode), 1437 1266 folio_pos(folio), 1438 - fs_info->sectors_per_page, 1267 + blocks_per_folio, 1439 1268 &bio_ctrl->submit_bitmap, 1440 1269 found_start, found_len, ret); 1441 1270 } else { ··· 1480 1309 unsigned int bitmap_size = min( 1481 1310 (last_finished_delalloc_end - page_start) >> 1482 1311 fs_info->sectorsize_bits, 1483 - fs_info->sectors_per_page); 1312 + blocks_per_folio); 1484 1313 1485 1314 for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size) 1486 1315 btrfs_mark_ordered_io_finished(inode, folio, ··· 1495 1324 delalloc_end = page_end; 1496 1325 /* 1497 1326 * delalloc_end is already one less than the total length, so 1498 - * we don't subtract one from PAGE_SIZE 1327 + * we don't subtract one from PAGE_SIZE. 1499 1328 */ 1500 1329 delalloc_to_write += 1501 1330 DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE); ··· 1504 1333 * If all ranges are submitted asynchronously, we just need to account 1505 1334 * for them here. 1506 1335 */ 1507 - if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) { 1336 + if (bitmap_empty(&bio_ctrl->submit_bitmap, blocks_per_folio)) { 1508 1337 wbc->nr_to_write -= delalloc_to_write; 1509 1338 return 1; 1510 1339 } ··· 1605 1434 bool submitted_io = false; 1606 1435 bool error = false; 1607 1436 const u64 folio_start = folio_pos(folio); 1437 + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 1608 1438 u64 cur; 1609 1439 int bit; 1610 1440 int ret = 0; ··· 1614 1442 start + len <= folio_start + folio_size(folio)); 1615 1443 1616 1444 ret = btrfs_writepage_cow_fixup(folio); 1617 - if (ret) { 1445 + if (ret == -EAGAIN) { 1618 1446 /* Fixup worker will requeue */ 1619 1447 folio_redirty_for_writepage(bio_ctrl->wbc, folio); 1620 1448 folio_unlock(folio); 1621 1449 return 1; 1622 1450 } 1451 + if (ret < 0) 1452 + return ret; 1623 1453 1624 1454 for (cur = start; cur < start + len; cur += fs_info->sectorsize) 1625 1455 set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); 1626 1456 bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap, 1627 - fs_info->sectors_per_page); 1457 + blocks_per_folio); 1628 1458 1629 1459 bio_ctrl->end_io_func = end_bbio_data_write; 1630 1460 1631 - for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { 1461 + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, blocks_per_folio) { 1632 1462 cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits); 1633 1463 1634 1464 if (cur >= i_size) { ··· 1704 1530 size_t pg_offset; 1705 1531 loff_t i_size = i_size_read(&inode->vfs_inode); 1706 1532 unsigned long end_index = i_size >> PAGE_SHIFT; 1533 + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 1707 1534 1708 1535 trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc); 1709 1536 ··· 1726 1551 * The proper bitmap can only be initialized until writepage_delalloc(). 1727 1552 */ 1728 1553 bio_ctrl->submit_bitmap = (unsigned long)-1; 1554 + 1555 + /* 1556 + * If the page is dirty but without private set, it's marked dirty 1557 + * without informing the fs. 1558 + * Nowadays that is a bug, since the introduction of 1559 + * pin_user_pages*(). 1560 + * 1561 + * So here we check if the page has private set to rule out such 1562 + * case. 1563 + * But we also have a long history of relying on the COW fixup, 1564 + * so here we only enable this check for experimental builds until 1565 + * we're sure it's safe. 1566 + */ 1567 + if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL) && 1568 + unlikely(!folio_test_private(folio))) { 1569 + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 1570 + btrfs_err_rl(fs_info, 1571 + "root %lld ino %llu folio %llu is marked dirty without notifying the fs", 1572 + inode->root->root_key.objectid, 1573 + btrfs_ino(inode), folio_pos(folio)); 1574 + ret = -EUCLEAN; 1575 + goto done; 1576 + } 1577 + 1729 1578 ret = set_folio_extent_mapped(folio); 1730 1579 if (ret < 0) 1731 1580 goto done; ··· 1761 1562 goto done; 1762 1563 1763 1564 ret = extent_writepage_io(inode, folio, folio_pos(folio), 1764 - PAGE_SIZE, bio_ctrl, i_size); 1565 + folio_size(folio), bio_ctrl, i_size); 1765 1566 if (ret == 1) 1766 1567 return 0; 1767 1568 if (ret < 0) 1768 1569 btrfs_err_rl(fs_info, 1769 1570 "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", 1770 1571 btrfs_root_id(inode->root), btrfs_ino(inode), 1771 - folio_pos(folio), fs_info->sectors_per_page, 1572 + folio_pos(folio), blocks_per_folio, 1772 1573 &bio_ctrl->submit_bitmap, ret); 1773 1574 1774 1575 bio_ctrl->wbc->nr_to_write--; ··· 1924 1725 static void end_bbio_meta_write(struct btrfs_bio *bbio) 1925 1726 { 1926 1727 struct extent_buffer *eb = bbio->private; 1927 - struct btrfs_fs_info *fs_info = eb->fs_info; 1928 1728 struct folio_iter fi; 1929 - u32 bio_offset = 0; 1930 1729 1931 1730 if (bbio->bio.bi_status != BLK_STS_OK) 1932 1731 set_btree_ioerr(eb); 1933 1732 1934 1733 bio_for_each_folio_all(fi, &bbio->bio) { 1935 - u64 start = eb->start + bio_offset; 1936 - struct folio *folio = fi.folio; 1937 - u32 len = fi.length; 1938 - 1939 - btrfs_folio_clear_writeback(fs_info, folio, start, len); 1940 - bio_offset += len; 1734 + btrfs_meta_folio_clear_writeback(fi.folio, eb); 1941 1735 } 1942 1736 1943 1737 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); ··· 1984 1792 wbc_init_bio(wbc, &bbio->bio); 1985 1793 bbio->inode = BTRFS_I(eb->fs_info->btree_inode); 1986 1794 bbio->file_offset = eb->start; 1987 - if (fs_info->nodesize < PAGE_SIZE) { 1988 - struct folio *folio = eb->folios[0]; 1989 - bool ret; 1795 + for (int i = 0; i < num_extent_folios(eb); i++) { 1796 + struct folio *folio = eb->folios[i]; 1797 + u64 range_start = max_t(u64, eb->start, folio_pos(folio)); 1798 + u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio), 1799 + eb->start + eb->len) - range_start; 1990 1800 1991 1801 folio_lock(folio); 1992 - btrfs_subpage_set_writeback(fs_info, folio, eb->start, eb->len); 1993 - if (btrfs_subpage_clear_and_test_dirty(fs_info, folio, eb->start, 1994 - eb->len)) { 1995 - folio_clear_dirty_for_io(folio); 1996 - wbc->nr_to_write--; 1997 - } 1998 - ret = bio_add_folio(&bbio->bio, folio, eb->len, 1999 - eb->start - folio_pos(folio)); 2000 - ASSERT(ret); 2001 - wbc_account_cgroup_owner(wbc, folio, eb->len); 2002 - folio_unlock(folio); 2003 - } else { 2004 - int num_folios = num_extent_folios(eb); 2005 - 2006 - for (int i = 0; i < num_folios; i++) { 2007 - struct folio *folio = eb->folios[i]; 2008 - bool ret; 2009 - 2010 - folio_lock(folio); 2011 - folio_clear_dirty_for_io(folio); 2012 - folio_start_writeback(folio); 2013 - ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0); 2014 - ASSERT(ret); 2015 - wbc_account_cgroup_owner(wbc, folio, eb->folio_size); 1802 + btrfs_meta_folio_clear_dirty(folio, eb); 1803 + btrfs_meta_folio_set_writeback(folio, eb); 1804 + if (!folio_test_dirty(folio)) 2016 1805 wbc->nr_to_write -= folio_nr_pages(folio); 2017 - folio_unlock(folio); 2018 - } 1806 + bio_add_folio_nofail(&bbio->bio, folio, range_len, 1807 + offset_in_folio(folio, range_start)); 1808 + wbc_account_cgroup_owner(wbc, folio, range_len); 1809 + folio_unlock(folio); 2019 1810 } 2020 1811 btrfs_submit_bbio(bbio, 0); 2021 1812 } ··· 2024 1849 u64 folio_start = folio_pos(folio); 2025 1850 int bit_start = 0; 2026 1851 int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits; 1852 + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 2027 1853 2028 1854 /* Lock and write each dirty extent buffers in the range */ 2029 - while (bit_start < fs_info->sectors_per_page) { 1855 + while (bit_start < blocks_per_folio) { 2030 1856 struct btrfs_subpage *subpage = folio_get_private(folio); 2031 1857 struct extent_buffer *eb; 2032 1858 unsigned long flags; ··· 2043 1867 break; 2044 1868 } 2045 1869 spin_lock_irqsave(&subpage->lock, flags); 2046 - if (!test_bit(bit_start + btrfs_bitmap_nr_dirty * fs_info->sectors_per_page, 1870 + if (!test_bit(bit_start + btrfs_bitmap_nr_dirty * blocks_per_folio, 2047 1871 subpage->bitmaps)) { 2048 1872 spin_unlock_irqrestore(&subpage->lock, flags); 2049 1873 spin_unlock(&folio->mapping->i_private_lock); ··· 2109 1933 if (!folio_test_private(folio)) 2110 1934 return 0; 2111 1935 2112 - if (folio_to_fs_info(folio)->nodesize < PAGE_SIZE) 1936 + if (btrfs_meta_is_subpage(folio_to_fs_info(folio))) 2113 1937 return submit_eb_subpage(folio, wbc); 2114 1938 2115 1939 spin_lock(&mapping->i_private_lock); ··· 2368 2192 done_index = folio_next_index(folio); 2369 2193 /* 2370 2194 * At this point we hold neither the i_pages lock nor 2371 - * the page lock: the page may be truncated or 2372 - * invalidated (changing page->mapping to NULL), 2373 - * or even swizzled back from swapper_space to 2374 - * tmpfs file mapping 2195 + * the folio lock: the folio may be truncated or 2196 + * invalidated (changing folio->mapping to NULL). 2375 2197 */ 2376 2198 if (!folio_trylock(folio)) { 2377 2199 submit_write_bio(bio_ctrl, 0); ··· 2407 2233 * regular submission. 2408 2234 */ 2409 2235 if (wbc->sync_mode != WB_SYNC_NONE || 2410 - btrfs_is_subpage(inode_to_fs_info(inode), mapping)) { 2236 + btrfs_is_subpage(inode_to_fs_info(inode), folio)) { 2411 2237 if (folio_test_writeback(folio)) 2412 2238 submit_write_bio(bio_ctrl, 0); 2413 2239 folio_wait_writeback(folio); ··· 2488 2314 ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize)); 2489 2315 2490 2316 while (cur <= end) { 2491 - u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); 2492 - u32 cur_len = cur_end + 1 - cur; 2317 + u64 cur_end; 2318 + u32 cur_len; 2493 2319 struct folio *folio; 2494 2320 2495 2321 folio = filemap_get_folio(mapping, cur >> PAGE_SHIFT); ··· 2499 2325 * code is just in case, but shouldn't actually be run. 2500 2326 */ 2501 2327 if (IS_ERR(folio)) { 2328 + cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); 2329 + cur_len = cur_end + 1 - cur; 2502 2330 btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL, 2503 2331 cur, cur_len, false); 2504 2332 mapping_set_error(mapping, PTR_ERR(folio)); 2505 - cur = cur_end + 1; 2333 + cur = cur_end; 2506 2334 continue; 2507 2335 } 2336 + 2337 + cur_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1, end); 2338 + cur_len = cur_end + 1 - cur; 2508 2339 2509 2340 ASSERT(folio_test_locked(folio)); 2510 2341 if (pages_dirty && folio != locked_folio) ··· 2569 2390 struct extent_map *em_cached = NULL; 2570 2391 u64 prev_em_start = (u64)-1; 2571 2392 2572 - btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state); 2393 + lock_extents_for_read(inode, start, end, &cached_state); 2573 2394 2574 2395 while ((folio = readahead_folio(rac)) != NULL) 2575 2396 btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start); ··· 2622 2443 struct folio *folio) 2623 2444 { 2624 2445 u64 start = folio_pos(folio); 2625 - u64 end = start + PAGE_SIZE - 1; 2446 + u64 end = start + folio_size(folio) - 1; 2626 2447 bool ret; 2627 2448 2628 2449 if (test_range_bit_exists(tree, start, end, EXTENT_LOCKED)) { ··· 2660 2481 bool try_release_extent_mapping(struct folio *folio, gfp_t mask) 2661 2482 { 2662 2483 u64 start = folio_pos(folio); 2663 - u64 end = start + PAGE_SIZE - 1; 2484 + u64 end = start + folio_size(folio) - 1; 2664 2485 struct btrfs_inode *inode = folio_to_inode(folio); 2665 2486 struct extent_io_tree *io_tree = &inode->io_tree; 2666 2487 ··· 2771 2592 return; 2772 2593 } 2773 2594 2774 - if (fs_info->nodesize >= PAGE_SIZE) { 2595 + if (!btrfs_meta_is_subpage(fs_info)) { 2775 2596 /* 2776 2597 * We do this since we'll remove the pages after we've 2777 2598 * removed the eb from the radix tree, so we could race ··· 2797 2618 * attached to one dummy eb, no sharing. 2798 2619 */ 2799 2620 if (!mapped) { 2800 - btrfs_detach_subpage(fs_info, folio); 2621 + btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); 2801 2622 return; 2802 2623 } 2803 2624 ··· 2808 2629 * page range and no unfinished IO. 2809 2630 */ 2810 2631 if (!folio_range_has_eb(folio)) 2811 - btrfs_detach_subpage(fs_info, folio); 2632 + btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); 2812 2633 2813 2634 spin_unlock(&folio->mapping->i_private_lock); 2814 2635 } ··· 2841 2662 kmem_cache_free(extent_buffer_cache, eb); 2842 2663 } 2843 2664 2844 - static struct extent_buffer * 2845 - __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, 2846 - unsigned long len) 2665 + static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *fs_info, 2666 + u64 start) 2847 2667 { 2848 2668 struct extent_buffer *eb = NULL; 2849 2669 2850 2670 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL); 2851 2671 eb->start = start; 2852 - eb->len = len; 2672 + eb->len = fs_info->nodesize; 2853 2673 eb->fs_info = fs_info; 2854 2674 init_rwsem(&eb->lock); 2855 2675 ··· 2857 2679 spin_lock_init(&eb->refs_lock); 2858 2680 atomic_set(&eb->refs, 1); 2859 2681 2860 - ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE); 2682 + ASSERT(eb->len <= BTRFS_MAX_METADATA_BLOCKSIZE); 2861 2683 2862 2684 return eb; 2863 2685 } ··· 2865 2687 struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) 2866 2688 { 2867 2689 struct extent_buffer *new; 2868 - int num_folios = num_extent_folios(src); 2869 2690 int ret; 2870 2691 2871 - new = __alloc_extent_buffer(src->fs_info, src->start, src->len); 2692 + new = __alloc_extent_buffer(src->fs_info, src->start); 2872 2693 if (new == NULL) 2873 2694 return NULL; 2874 2695 ··· 2884 2707 return NULL; 2885 2708 } 2886 2709 2887 - for (int i = 0; i < num_folios; i++) { 2710 + for (int i = 0; i < num_extent_folios(src); i++) { 2888 2711 struct folio *folio = new->folios[i]; 2889 2712 2890 2713 ret = attach_extent_buffer_folio(new, folio, NULL); ··· 2900 2723 return new; 2901 2724 } 2902 2725 2903 - struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 2904 - u64 start, unsigned long len) 2726 + struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 2727 + u64 start) 2905 2728 { 2906 2729 struct extent_buffer *eb; 2907 - int num_folios = 0; 2908 2730 int ret; 2909 2731 2910 - eb = __alloc_extent_buffer(fs_info, start, len); 2732 + eb = __alloc_extent_buffer(fs_info, start); 2911 2733 if (!eb) 2912 2734 return NULL; 2913 2735 2914 2736 ret = alloc_eb_folio_array(eb, false); 2915 2737 if (ret) 2916 - goto err; 2738 + goto out; 2917 2739 2918 - num_folios = num_extent_folios(eb); 2919 - for (int i = 0; i < num_folios; i++) { 2740 + for (int i = 0; i < num_extent_folios(eb); i++) { 2920 2741 ret = attach_extent_buffer_folio(eb, eb->folios[i], NULL); 2921 2742 if (ret < 0) 2922 - goto err; 2743 + goto out_detach; 2923 2744 } 2924 2745 2925 2746 set_extent_buffer_uptodate(eb); ··· 2925 2750 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); 2926 2751 2927 2752 return eb; 2928 - err: 2929 - for (int i = 0; i < num_folios; i++) { 2753 + 2754 + out_detach: 2755 + for (int i = 0; i < num_extent_folios(eb); i++) { 2930 2756 if (eb->folios[i]) { 2931 2757 detach_extent_buffer_folio(eb, eb->folios[i]); 2932 2758 folio_put(eb->folios[i]); 2933 2759 } 2934 2760 } 2761 + out: 2935 2762 kmem_cache_free(extent_buffer_cache, eb); 2936 2763 return NULL; 2937 - } 2938 - 2939 - struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 2940 - u64 start) 2941 - { 2942 - return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize); 2943 2764 } 2944 2765 2945 2766 static void check_buffer_tree_ref(struct extent_buffer *eb) ··· 2976 2805 2977 2806 static void mark_extent_buffer_accessed(struct extent_buffer *eb) 2978 2807 { 2979 - int num_folios= num_extent_folios(eb); 2980 - 2981 2808 check_buffer_tree_ref(eb); 2982 2809 2983 - for (int i = 0; i < num_folios; i++) 2810 + for (int i = 0; i < num_extent_folios(eb); i++) 2984 2811 folio_mark_accessed(eb->folios[i]); 2985 2812 } 2986 2813 ··· 3011 2842 return eb; 3012 2843 } 3013 2844 3014 - #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3015 2845 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 3016 2846 u64 start) 3017 2847 { 2848 + #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3018 2849 struct extent_buffer *eb, *exists = NULL; 3019 2850 int ret; 3020 2851 ··· 3050 2881 free_eb: 3051 2882 btrfs_release_extent_buffer(eb); 3052 2883 return exists; 3053 - } 2884 + #else 2885 + /* Stub to avoid linker error when compiled with optimizations turned off. */ 2886 + return NULL; 3054 2887 #endif 2888 + } 3055 2889 3056 2890 static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info, 3057 2891 struct folio *folio) ··· 3068 2896 * don't try to insert two ebs for the same bytenr. So here we always 3069 2897 * return NULL and just continue. 3070 2898 */ 3071 - if (fs_info->nodesize < PAGE_SIZE) 2899 + if (btrfs_meta_is_subpage(fs_info)) 3072 2900 return NULL; 3073 2901 3074 2902 /* Page not yet attached to an extent buffer */ ··· 3171 2999 3172 3000 finish: 3173 3001 spin_lock(&mapping->i_private_lock); 3174 - if (existing_folio && fs_info->nodesize < PAGE_SIZE) { 3002 + if (existing_folio && btrfs_meta_is_subpage(fs_info)) { 3175 3003 /* We're going to reuse the existing page, can drop our folio now. */ 3176 3004 __free_page(folio_page(eb->folios[i], 0)); 3177 3005 eb->folios[i] = existing_folio; ··· 3213 3041 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 3214 3042 u64 start, u64 owner_root, int level) 3215 3043 { 3216 - unsigned long len = fs_info->nodesize; 3217 - int num_folios; 3218 3044 int attached = 0; 3219 3045 struct extent_buffer *eb; 3220 3046 struct extent_buffer *existing_eb = NULL; ··· 3240 3070 if (eb) 3241 3071 return eb; 3242 3072 3243 - eb = __alloc_extent_buffer(fs_info, start, len); 3073 + eb = __alloc_extent_buffer(fs_info, start); 3244 3074 if (!eb) 3245 3075 return ERR_PTR(-ENOMEM); 3246 3076 ··· 3260 3090 * The memory will be freed by attach_extent_buffer_page() or freed 3261 3091 * manually if we exit earlier. 3262 3092 */ 3263 - if (fs_info->nodesize < PAGE_SIZE) { 3264 - prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA); 3093 + if (btrfs_meta_is_subpage(fs_info)) { 3094 + prealloc = btrfs_alloc_subpage(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA); 3265 3095 if (IS_ERR(prealloc)) { 3266 3096 ret = PTR_ERR(prealloc); 3267 3097 goto out; ··· 3276 3106 goto out; 3277 3107 } 3278 3108 3279 - num_folios = num_extent_folios(eb); 3280 3109 /* Attach all pages to the filemap. */ 3281 - for (int i = 0; i < num_folios; i++) { 3110 + for (int i = 0; i < num_extent_folios(eb); i++) { 3282 3111 struct folio *folio; 3283 3112 3284 3113 ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb); ··· 3317 3148 * and free the allocated page. 3318 3149 */ 3319 3150 folio = eb->folios[i]; 3320 - WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len)); 3151 + WARN_ON(btrfs_meta_folio_test_dirty(folio, eb)); 3321 3152 3322 3153 /* 3323 3154 * Check if the current page is physically contiguous with previous eb ··· 3328 3159 if (i && folio_page(eb->folios[i - 1], 0) + 1 != folio_page(folio, 0)) 3329 3160 page_contig = false; 3330 3161 3331 - if (!btrfs_folio_test_uptodate(fs_info, folio, eb->start, eb->len)) 3162 + if (!btrfs_meta_folio_test_uptodate(folio, eb)) 3332 3163 uptodate = 0; 3333 3164 3334 3165 /* ··· 3371 3202 * btree_release_folio will correctly detect that a page belongs to a 3372 3203 * live buffer and won't free them prematurely. 3373 3204 */ 3374 - for (int i = 0; i < num_folios; i++) 3205 + for (int i = 0; i < num_extent_folios(eb); i++) 3375 3206 folio_unlock(eb->folios[i]); 3376 3207 return eb; 3377 3208 ··· 3402 3233 } 3403 3234 /* 3404 3235 * Now all pages of that extent buffer is unmapped, set UNMAPPED flag, 3405 - * so it can be cleaned up without utilizing page->mapping. 3236 + * so it can be cleaned up without utilizing folio->mapping. 3406 3237 */ 3407 3238 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); 3408 3239 ··· 3502 3333 release_extent_buffer(eb); 3503 3334 } 3504 3335 3505 - static void btree_clear_folio_dirty(struct folio *folio) 3336 + static void btree_clear_folio_dirty_tag(struct folio *folio) 3506 3337 { 3507 - ASSERT(folio_test_dirty(folio)); 3338 + ASSERT(!folio_test_dirty(folio)); 3508 3339 ASSERT(folio_test_locked(folio)); 3509 - folio_clear_dirty_for_io(folio); 3510 3340 xa_lock_irq(&folio->mapping->i_pages); 3511 3341 if (!folio_test_dirty(folio)) 3512 3342 __xa_clear_mark(&folio->mapping->i_pages, ··· 3513 3345 xa_unlock_irq(&folio->mapping->i_pages); 3514 3346 } 3515 3347 3516 - static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb) 3517 - { 3518 - struct btrfs_fs_info *fs_info = eb->fs_info; 3519 - struct folio *folio = eb->folios[0]; 3520 - bool last; 3521 - 3522 - /* btree_clear_folio_dirty() needs page locked. */ 3523 - folio_lock(folio); 3524 - last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, eb->start, eb->len); 3525 - if (last) 3526 - btree_clear_folio_dirty(folio); 3527 - folio_unlock(folio); 3528 - WARN_ON(atomic_read(&eb->refs) == 0); 3529 - } 3530 - 3531 3348 void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, 3532 3349 struct extent_buffer *eb) 3533 3350 { 3534 3351 struct btrfs_fs_info *fs_info = eb->fs_info; 3535 - int num_folios; 3536 3352 3537 3353 btrfs_assert_tree_write_locked(eb); 3538 3354 ··· 3543 3391 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, -eb->len, 3544 3392 fs_info->dirty_metadata_batch); 3545 3393 3546 - if (eb->fs_info->nodesize < PAGE_SIZE) 3547 - return clear_subpage_extent_buffer_dirty(eb); 3548 - 3549 - num_folios = num_extent_folios(eb); 3550 - for (int i = 0; i < num_folios; i++) { 3394 + for (int i = 0; i < num_extent_folios(eb); i++) { 3551 3395 struct folio *folio = eb->folios[i]; 3396 + bool last; 3552 3397 3553 3398 if (!folio_test_dirty(folio)) 3554 3399 continue; 3555 3400 folio_lock(folio); 3556 - btree_clear_folio_dirty(folio); 3401 + last = btrfs_meta_folio_clear_and_test_dirty(folio, eb); 3402 + if (last) 3403 + btree_clear_folio_dirty_tag(folio); 3557 3404 folio_unlock(folio); 3558 3405 } 3559 3406 WARN_ON(atomic_read(&eb->refs) == 0); ··· 3560 3409 3561 3410 void set_extent_buffer_dirty(struct extent_buffer *eb) 3562 3411 { 3563 - int num_folios; 3564 3412 bool was_dirty; 3565 3413 3566 3414 check_buffer_tree_ref(eb); 3567 3415 3568 3416 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 3569 3417 3570 - num_folios = num_extent_folios(eb); 3571 3418 WARN_ON(atomic_read(&eb->refs) == 0); 3572 3419 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); 3573 3420 WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)); 3574 3421 3575 3422 if (!was_dirty) { 3576 - bool subpage = eb->fs_info->nodesize < PAGE_SIZE; 3423 + bool subpage = btrfs_meta_is_subpage(eb->fs_info); 3577 3424 3578 3425 /* 3579 3426 * For subpage case, we can have other extent buffers in the 3580 - * same page, and in clear_subpage_extent_buffer_dirty() we 3427 + * same page, and in clear_extent_buffer_dirty() we 3581 3428 * have to clear page dirty without subpage lock held. 3582 3429 * This can cause race where our page gets dirty cleared after 3583 3430 * we just set it. 3584 3431 * 3585 - * Thankfully, clear_subpage_extent_buffer_dirty() has locked 3432 + * Thankfully, clear_extent_buffer_dirty() has locked 3586 3433 * its page for other reasons, we can use page lock to prevent 3587 3434 * the above race. 3588 3435 */ 3589 3436 if (subpage) 3590 3437 folio_lock(eb->folios[0]); 3591 - for (int i = 0; i < num_folios; i++) 3592 - btrfs_folio_set_dirty(eb->fs_info, eb->folios[i], 3593 - eb->start, eb->len); 3438 + for (int i = 0; i < num_extent_folios(eb); i++) 3439 + btrfs_meta_folio_set_dirty(eb->folios[i], eb); 3594 3440 if (subpage) 3595 3441 folio_unlock(eb->folios[0]); 3596 3442 percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes, ··· 3595 3447 eb->fs_info->dirty_metadata_batch); 3596 3448 } 3597 3449 #ifdef CONFIG_BTRFS_DEBUG 3598 - for (int i = 0; i < num_folios; i++) 3450 + for (int i = 0; i < num_extent_folios(eb); i++) 3599 3451 ASSERT(folio_test_dirty(eb->folios[i])); 3600 3452 #endif 3601 3453 } 3602 3454 3603 3455 void clear_extent_buffer_uptodate(struct extent_buffer *eb) 3604 3456 { 3605 - struct btrfs_fs_info *fs_info = eb->fs_info; 3606 - int num_folios = num_extent_folios(eb); 3607 3457 3608 3458 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3609 - for (int i = 0; i < num_folios; i++) { 3459 + for (int i = 0; i < num_extent_folios(eb); i++) { 3610 3460 struct folio *folio = eb->folios[i]; 3611 3461 3612 3462 if (!folio) 3613 3463 continue; 3614 3464 3615 - /* 3616 - * This is special handling for metadata subpage, as regular 3617 - * btrfs_is_subpage() can not handle cloned/dummy metadata. 3618 - */ 3619 - if (fs_info->nodesize >= PAGE_SIZE) 3620 - folio_clear_uptodate(folio); 3621 - else 3622 - btrfs_subpage_clear_uptodate(fs_info, folio, 3623 - eb->start, eb->len); 3465 + btrfs_meta_folio_clear_uptodate(folio, eb); 3624 3466 } 3625 3467 } 3626 3468 3627 3469 void set_extent_buffer_uptodate(struct extent_buffer *eb) 3628 3470 { 3629 - struct btrfs_fs_info *fs_info = eb->fs_info; 3630 - int num_folios = num_extent_folios(eb); 3631 3471 3632 3472 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3633 - for (int i = 0; i < num_folios; i++) { 3634 - struct folio *folio = eb->folios[i]; 3635 - 3636 - /* 3637 - * This is special handling for metadata subpage, as regular 3638 - * btrfs_is_subpage() can not handle cloned/dummy metadata. 3639 - */ 3640 - if (fs_info->nodesize >= PAGE_SIZE) 3641 - folio_mark_uptodate(folio); 3642 - else 3643 - btrfs_subpage_set_uptodate(fs_info, folio, 3644 - eb->start, eb->len); 3645 - } 3473 + for (int i = 0; i < num_extent_folios(eb); i++) 3474 + btrfs_meta_folio_set_uptodate(eb->folios[i], eb); 3646 3475 } 3647 3476 3648 3477 static void clear_extent_buffer_reading(struct extent_buffer *eb) ··· 3632 3507 static void end_bbio_meta_read(struct btrfs_bio *bbio) 3633 3508 { 3634 3509 struct extent_buffer *eb = bbio->private; 3635 - struct btrfs_fs_info *fs_info = eb->fs_info; 3636 3510 bool uptodate = !bbio->bio.bi_status; 3637 - struct folio_iter fi; 3638 - u32 bio_offset = 0; 3639 3511 3640 3512 /* 3641 3513 * If the extent buffer is marked UPTODATE before the read operation ··· 3654 3532 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); 3655 3533 } 3656 3534 3657 - bio_for_each_folio_all(fi, &bbio->bio) { 3658 - struct folio *folio = fi.folio; 3659 - u64 start = eb->start + bio_offset; 3660 - u32 len = fi.length; 3661 - 3662 - if (uptodate) 3663 - btrfs_folio_set_uptodate(fs_info, folio, start, len); 3664 - else 3665 - btrfs_folio_clear_uptodate(fs_info, folio, start, len); 3666 - 3667 - bio_offset += len; 3668 - } 3669 - 3670 3535 clear_extent_buffer_reading(eb); 3671 3536 free_extent_buffer(eb); 3672 3537 ··· 3664 3555 const struct btrfs_tree_parent_check *check) 3665 3556 { 3666 3557 struct btrfs_bio *bbio; 3667 - bool ret; 3668 3558 3669 3559 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3670 3560 return 0; ··· 3703 3595 bbio->inode = BTRFS_I(eb->fs_info->btree_inode); 3704 3596 bbio->file_offset = eb->start; 3705 3597 memcpy(&bbio->parent_check, check, sizeof(*check)); 3706 - if (eb->fs_info->nodesize < PAGE_SIZE) { 3707 - ret = bio_add_folio(&bbio->bio, eb->folios[0], eb->len, 3708 - eb->start - folio_pos(eb->folios[0])); 3709 - ASSERT(ret); 3710 - } else { 3711 - int num_folios = num_extent_folios(eb); 3598 + for (int i = 0; i < num_extent_folios(eb); i++) { 3599 + struct folio *folio = eb->folios[i]; 3600 + u64 range_start = max_t(u64, eb->start, folio_pos(folio)); 3601 + u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio), 3602 + eb->start + eb->len) - range_start; 3712 3603 3713 - for (int i = 0; i < num_folios; i++) { 3714 - struct folio *folio = eb->folios[i]; 3715 - 3716 - ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0); 3717 - ASSERT(ret); 3718 - } 3604 + bio_add_folio_nofail(&bbio->bio, folio, range_len, 3605 + offset_in_folio(folio, range_start)); 3719 3606 } 3720 3607 btrfs_submit_bbio(bbio, mirror_num); 3721 3608 return 0; ··· 3899 3796 if (test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) 3900 3797 return; 3901 3798 3902 - if (fs_info->nodesize < PAGE_SIZE) { 3799 + if (btrfs_meta_is_subpage(fs_info)) { 3903 3800 folio = eb->folios[0]; 3904 3801 ASSERT(i == 0); 3905 3802 if (WARN_ON(!btrfs_subpage_test_uptodate(fs_info, folio, ··· 4385 4282 { 4386 4283 struct extent_buffer *eb; 4387 4284 4388 - if (folio_to_fs_info(folio)->nodesize < PAGE_SIZE) 4285 + if (btrfs_meta_is_subpage(folio_to_fs_info(folio))) 4389 4286 return try_release_subpage_extent_buffer(folio); 4390 4287 4391 4288 /*

+5 -4

fs/btrfs/extent_io.h

··· 252 252 253 253 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 254 254 u64 start, u64 owner_root, int level); 255 - struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 256 - u64 start, unsigned long len); 257 255 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 258 256 u64 start); 259 257 struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src); ··· 274 276 u64 bytenr, u64 owner_root, u64 gen, int level); 275 277 void btrfs_readahead_node_child(struct extent_buffer *node, int slot); 276 278 277 - static inline int num_extent_pages(const struct extent_buffer *eb) 279 + /* Note: this can be used in for loops without caching the value in a variable. */ 280 + static inline int __pure num_extent_pages(const struct extent_buffer *eb) 278 281 { 279 282 /* 280 283 * For sectorsize == PAGE_SIZE case, since nodesize is always aligned to ··· 293 294 * As we can have either one large folio covering the whole eb 294 295 * (either nodesize <= PAGE_SIZE, or high order folio), or multiple 295 296 * single-paged folios. 297 + * 298 + * Note: this can be used in for loops without caching the value in a variable. 296 299 */ 297 - static inline int num_extent_folios(const struct extent_buffer *eb) 300 + static inline int __pure num_extent_folios(const struct extent_buffer *eb) 298 301 { 299 302 if (folio_order(eb->folios[0])) 300 303 return 1;

+13 -17

fs/btrfs/file-item.c

··· 163 163 int ret = 0; 164 164 struct btrfs_file_extent_item *item; 165 165 struct btrfs_key file_key; 166 - struct btrfs_path *path; 166 + BTRFS_PATH_AUTO_FREE(path); 167 167 struct extent_buffer *leaf; 168 168 169 169 path = btrfs_alloc_path(); 170 170 if (!path) 171 171 return -ENOMEM; 172 + 172 173 file_key.objectid = objectid; 173 - file_key.offset = pos; 174 174 file_key.type = BTRFS_EXTENT_DATA_KEY; 175 + file_key.offset = pos; 175 176 176 177 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 177 178 sizeof(*item)); 178 179 if (ret < 0) 179 - goto out; 180 + return ret; 180 181 leaf = path->nodes[0]; 181 182 item = btrfs_item_ptr(leaf, path->slots[0], 182 183 struct btrfs_file_extent_item); ··· 191 190 btrfs_set_file_extent_compression(leaf, item, 0); 192 191 btrfs_set_file_extent_encryption(leaf, item, 0); 193 192 btrfs_set_file_extent_other_encoding(leaf, item, 0); 194 - out: 195 - btrfs_free_path(path); 193 + 196 194 return ret; 197 195 } 198 196 ··· 212 212 int csums_in_item; 213 213 214 214 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 215 - file_key.offset = bytenr; 216 215 file_key.type = BTRFS_EXTENT_CSUM_KEY; 216 + file_key.offset = bytenr; 217 217 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 218 218 if (ret < 0) 219 219 goto fail; ··· 259 259 int cow = mod != 0; 260 260 261 261 file_key.objectid = objectid; 262 - file_key.offset = offset; 263 262 file_key.type = BTRFS_EXTENT_DATA_KEY; 263 + file_key.offset = offset; 264 264 265 265 return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); 266 266 } ··· 341 341 struct btrfs_inode *inode = bbio->inode; 342 342 struct btrfs_fs_info *fs_info = inode->root->fs_info; 343 343 struct bio *bio = &bbio->bio; 344 - struct btrfs_path *path; 344 + BTRFS_PATH_AUTO_FREE(path); 345 345 const u32 sectorsize = fs_info->sectorsize; 346 346 const u32 csum_size = fs_info->csum_size; 347 347 u32 orig_len = bio->bi_iter.bi_size; ··· 373 373 374 374 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 375 375 bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); 376 - if (!bbio->csum) { 377 - btrfs_free_path(path); 376 + if (!bbio->csum) 378 377 return BLK_STS_RESOURCE; 379 - } 380 378 } else { 381 379 bbio->csum = bbio->csum_inline; 382 380 } ··· 442 444 bio_offset += count * sectorsize; 443 445 } 444 446 445 - btrfs_free_path(path); 446 447 return ret; 447 448 } 448 449 ··· 481 484 path->nowait = nowait; 482 485 483 486 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 484 - key.offset = start; 485 487 key.type = BTRFS_EXTENT_CSUM_KEY; 488 + key.offset = start; 486 489 487 490 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 488 491 if (ret < 0) ··· 871 874 struct btrfs_root *root, u64 bytenr, u64 len) 872 875 { 873 876 struct btrfs_fs_info *fs_info = trans->fs_info; 874 - struct btrfs_path *path; 877 + BTRFS_PATH_AUTO_FREE(path); 875 878 struct btrfs_key key; 876 879 u64 end_byte = bytenr + len; 877 880 u64 csum_end; ··· 889 892 890 893 while (1) { 891 894 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 892 - key.offset = end_byte - 1; 893 895 key.type = BTRFS_EXTENT_CSUM_KEY; 896 + key.offset = end_byte - 1; 894 897 895 898 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 896 899 if (ret > 0) { ··· 1007 1010 } 1008 1011 btrfs_release_path(path); 1009 1012 } 1010 - btrfs_free_path(path); 1011 1013 return ret; 1012 1014 } 1013 1015 ··· 1070 1074 found_next = 0; 1071 1075 bytenr = sums->logical + total_bytes; 1072 1076 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 1073 - file_key.offset = bytenr; 1074 1077 file_key.type = BTRFS_EXTENT_CSUM_KEY; 1078 + file_key.offset = bytenr; 1075 1079 1076 1080 item = btrfs_lookup_csum(trans, root, path, bytenr, 1); 1077 1081 if (!IS_ERR(item)) {

+2

fs/btrfs/file-item.h

··· 3 3 #ifndef BTRFS_FILE_ITEM_H 4 4 #define BTRFS_FILE_ITEM_H 5 5 6 + #include <linux/blk_types.h> 6 7 #include <linux/list.h> 7 8 #include <uapi/linux/btrfs_tree.h> 9 + #include "ctree.h" 8 10 #include "accessors.h" 9 11 10 12 struct extent_map;

+14 -14

fs/btrfs/file.c

··· 804 804 { 805 805 u64 clamp_start = max_t(u64, pos, folio_pos(folio)); 806 806 u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio)); 807 + const u32 blocksize = inode_to_fs_info(inode)->sectorsize; 807 808 int ret = 0; 808 809 809 810 if (folio_test_uptodate(folio)) 810 811 return 0; 811 812 812 813 if (!force_uptodate && 813 - IS_ALIGNED(clamp_start, PAGE_SIZE) && 814 - IS_ALIGNED(clamp_end, PAGE_SIZE)) 814 + IS_ALIGNED(clamp_start, blocksize) && 815 + IS_ALIGNED(clamp_end, blocksize)) 815 816 return 0; 816 817 817 818 ret = btrfs_read_folio(NULL, folio); ··· 875 874 ret = PTR_ERR(folio); 876 875 return ret; 877 876 } 878 - folio_wait_writeback(folio); 879 877 /* Only support page sized folio yet. */ 880 878 ASSERT(folio_order(folio) == 0); 881 879 ret = set_folio_extent_mapped(folio); ··· 1014 1014 btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, 1015 1015 &cached_state); 1016 1016 } 1017 - ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, 1018 - NULL, nowait); 1017 + ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, nowait); 1019 1018 if (ret <= 0) 1020 1019 btrfs_drew_write_unlock(&root->snapshot_lock); 1021 1020 else ··· 1782 1783 struct extent_changeset *data_reserved = NULL; 1783 1784 unsigned long zero_start; 1784 1785 loff_t size; 1786 + size_t fsize = folio_size(folio); 1785 1787 vm_fault_t ret; 1786 1788 int ret2; 1787 1789 int reserved = 0; ··· 1793 1793 1794 1794 ASSERT(folio_order(folio) == 0); 1795 1795 1796 - reserved_space = PAGE_SIZE; 1796 + reserved_space = fsize; 1797 1797 1798 1798 sb_start_pagefault(inode->i_sb); 1799 1799 page_start = folio_pos(folio); ··· 1847 1847 * We can't set the delalloc bits if there are pending ordered 1848 1848 * extents. Drop our locks and wait for them to finish. 1849 1849 */ 1850 - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, PAGE_SIZE); 1850 + ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, fsize); 1851 1851 if (ordered) { 1852 1852 unlock_extent(io_tree, page_start, page_end, &cached_state); 1853 1853 folio_unlock(folio); ··· 1859 1859 1860 1860 if (folio->index == ((size - 1) >> PAGE_SHIFT)) { 1861 1861 reserved_space = round_up(size - page_start, fs_info->sectorsize); 1862 - if (reserved_space < PAGE_SIZE) { 1862 + if (reserved_space < fsize) { 1863 1863 end = page_start + reserved_space - 1; 1864 1864 btrfs_delalloc_release_space(BTRFS_I(inode), 1865 1865 data_reserved, page_start, 1866 - PAGE_SIZE - reserved_space, true); 1866 + fsize - reserved_space, true); 1867 1867 } 1868 1868 } 1869 1869 ··· 1890 1890 if (page_start + folio_size(folio) > size) 1891 1891 zero_start = offset_in_folio(folio, size); 1892 1892 else 1893 - zero_start = PAGE_SIZE; 1893 + zero_start = fsize; 1894 1894 1895 - if (zero_start != PAGE_SIZE) 1895 + if (zero_start != fsize) 1896 1896 folio_zero_range(folio, zero_start, folio_size(folio) - zero_start); 1897 1897 1898 - btrfs_folio_clear_checked(fs_info, folio, page_start, PAGE_SIZE); 1898 + btrfs_folio_clear_checked(fs_info, folio, page_start, fsize); 1899 1899 btrfs_folio_set_dirty(fs_info, folio, page_start, end + 1 - page_start); 1900 1900 btrfs_folio_set_uptodate(fs_info, folio, page_start, end + 1 - page_start); 1901 1901 ··· 1904 1904 unlock_extent(io_tree, page_start, page_end, &cached_state); 1905 1905 up_read(&BTRFS_I(inode)->i_mmap_lock); 1906 1906 1907 - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); 1907 + btrfs_delalloc_release_extents(BTRFS_I(inode), fsize); 1908 1908 sb_end_pagefault(inode->i_sb); 1909 1909 extent_changeset_free(data_reserved); 1910 1910 return VM_FAULT_LOCKED; ··· 1913 1913 folio_unlock(folio); 1914 1914 up_read(&BTRFS_I(inode)->i_mmap_lock); 1915 1915 out: 1916 - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); 1916 + btrfs_delalloc_release_extents(BTRFS_I(inode), fsize); 1917 1917 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start, 1918 1918 reserved_space, (ret != 0)); 1919 1919 out_noreserve:

+2

fs/btrfs/file.h

··· 9 9 struct extent_state; 10 10 struct kiocb; 11 11 struct iov_iter; 12 + struct inode; 13 + struct folio; 12 14 struct page; 13 15 struct btrfs_ioctl_encoded_io_args; 14 16 struct btrfs_drop_extents_args;

+28 -29

fs/btrfs/free-space-cache.c

··· 88 88 struct btrfs_disk_key disk_key; 89 89 struct btrfs_free_space_header *header; 90 90 struct extent_buffer *leaf; 91 - struct inode *inode = NULL; 91 + struct btrfs_inode *inode; 92 92 unsigned nofs_flag; 93 93 int ret; 94 94 95 95 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 96 - key.offset = offset; 97 96 key.type = 0; 97 + key.offset = offset; 98 98 99 99 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 100 100 if (ret < 0) ··· 120 120 btrfs_release_path(path); 121 121 memalloc_nofs_restore(nofs_flag); 122 122 if (IS_ERR(inode)) 123 - return inode; 123 + return ERR_CAST(inode); 124 124 125 - mapping_set_gfp_mask(inode->i_mapping, 126 - mapping_gfp_constraint(inode->i_mapping, 125 + mapping_set_gfp_mask(inode->vfs_inode.i_mapping, 126 + mapping_gfp_constraint(inode->vfs_inode.i_mapping, 127 127 ~(__GFP_FS | __GFP_HIGHMEM))); 128 128 129 - return inode; 129 + return &inode->vfs_inode; 130 130 } 131 131 132 132 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, ··· 201 201 btrfs_release_path(path); 202 202 203 203 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 204 - key.offset = offset; 205 204 key.type = 0; 205 + key.offset = offset; 206 206 ret = btrfs_insert_empty_item(trans, root, path, &key, 207 207 sizeof(struct btrfs_free_space_header)); 208 208 if (ret < 0) { ··· 244 244 struct inode *inode, 245 245 struct btrfs_block_group *block_group) 246 246 { 247 - struct btrfs_path *path; 247 + BTRFS_PATH_AUTO_FREE(path); 248 248 struct btrfs_key key; 249 249 int ret = 0; 250 250 ··· 257 257 if (IS_ERR(inode)) { 258 258 if (PTR_ERR(inode) != -ENOENT) 259 259 ret = PTR_ERR(inode); 260 - goto out; 260 + return ret; 261 261 } 262 262 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 263 263 if (ret) { 264 264 btrfs_add_delayed_iput(BTRFS_I(inode)); 265 - goto out; 265 + return ret; 266 266 } 267 267 clear_nlink(inode); 268 268 /* One for the block groups ref */ ··· 285 285 if (ret) { 286 286 if (ret > 0) 287 287 ret = 0; 288 - goto out; 288 + return ret; 289 289 } 290 - ret = btrfs_del_item(trans, trans->fs_info->tree_root, path); 291 - out: 292 - btrfs_free_path(path); 293 - return ret; 290 + return btrfs_del_item(trans, trans->fs_info->tree_root, path); 294 291 } 295 292 296 293 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, ··· 444 447 445 448 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 446 449 { 447 - struct page *page; 450 + struct folio *folio; 448 451 struct inode *inode = io_ctl->inode; 449 452 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 450 453 int i; ··· 452 455 for (i = 0; i < io_ctl->num_pages; i++) { 453 456 int ret; 454 457 455 - page = find_or_create_page(inode->i_mapping, i, mask); 456 - if (!page) { 458 + folio = __filemap_get_folio(inode->i_mapping, i, 459 + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, 460 + mask); 461 + if (IS_ERR(folio)) { 457 462 io_ctl_drop_pages(io_ctl); 458 463 return -ENOMEM; 459 464 } 460 465 461 - ret = set_folio_extent_mapped(page_folio(page)); 466 + ret = set_folio_extent_mapped(folio); 462 467 if (ret < 0) { 463 - unlock_page(page); 464 - put_page(page); 468 + folio_unlock(folio); 469 + folio_put(folio); 465 470 io_ctl_drop_pages(io_ctl); 466 471 return ret; 467 472 } 468 473 469 - io_ctl->pages[i] = page; 470 - if (uptodate && !PageUptodate(page)) { 471 - btrfs_read_folio(NULL, page_folio(page)); 472 - lock_page(page); 473 - if (page->mapping != inode->i_mapping) { 474 + io_ctl->pages[i] = &folio->page; 475 + if (uptodate && !folio_test_uptodate(folio)) { 476 + btrfs_read_folio(NULL, folio); 477 + folio_lock(folio); 478 + if (folio->mapping != inode->i_mapping) { 474 479 btrfs_err(BTRFS_I(inode)->root->fs_info, 475 480 "free space cache page truncated"); 476 481 io_ctl_drop_pages(io_ctl); 477 482 return -EIO; 478 483 } 479 - if (!PageUptodate(page)) { 484 + if (!folio_test_uptodate(folio)) { 480 485 btrfs_err(BTRFS_I(inode)->root->fs_info, 481 486 "error reading free space cache"); 482 487 io_ctl_drop_pages(io_ctl); ··· 752 753 return 0; 753 754 754 755 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 755 - key.offset = offset; 756 756 key.type = 0; 757 + key.offset = offset; 757 758 758 759 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 759 760 if (ret < 0) ··· 1155 1156 int ret; 1156 1157 1157 1158 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1158 - key.offset = offset; 1159 1159 key.type = 0; 1160 + key.offset = offset; 1160 1161 1161 1162 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1162 1163 if (ret < 0) {

+17 -28

fs/btrfs/free-space-tree.c

··· 1062 1062 struct btrfs_block_group *block_group) 1063 1063 { 1064 1064 struct btrfs_root *extent_root; 1065 - struct btrfs_path *path, *path2; 1065 + BTRFS_PATH_AUTO_FREE(path); 1066 + BTRFS_PATH_AUTO_FREE(path2); 1066 1067 struct btrfs_key key; 1067 1068 u64 start, end; 1068 1069 int ret; ··· 1071 1070 path = btrfs_alloc_path(); 1072 1071 if (!path) 1073 1072 return -ENOMEM; 1074 - path->reada = READA_FORWARD; 1075 1073 1076 1074 path2 = btrfs_alloc_path(); 1077 - if (!path2) { 1078 - btrfs_free_path(path); 1075 + if (!path2) 1079 1076 return -ENOMEM; 1080 - } 1077 + 1078 + path->reada = READA_FORWARD; 1081 1079 1082 1080 ret = add_new_free_space_info(trans, block_group, path2); 1083 1081 if (ret) 1084 - goto out; 1082 + return ret; 1085 1083 1086 1084 mutex_lock(&block_group->free_space_lock); 1087 1085 ··· 1146 1146 ret = 0; 1147 1147 out_locked: 1148 1148 mutex_unlock(&block_group->free_space_lock); 1149 - out: 1150 - btrfs_free_path(path2); 1151 - btrfs_free_path(path); 1149 + 1152 1150 return ret; 1153 1151 } 1154 1152 ··· 1215 1217 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1216 1218 struct btrfs_root *root) 1217 1219 { 1218 - struct btrfs_path *path; 1220 + BTRFS_PATH_AUTO_FREE(path); 1219 1221 struct btrfs_key key; 1220 1222 int nr; 1221 1223 int ret; ··· 1231 1233 while (1) { 1232 1234 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1233 1235 if (ret < 0) 1234 - goto out; 1236 + return ret; 1235 1237 1236 1238 nr = btrfs_header_nritems(path->nodes[0]); 1237 1239 if (!nr) ··· 1240 1242 path->slots[0] = 0; 1241 1243 ret = btrfs_del_items(trans, root, path, 0, nr); 1242 1244 if (ret) 1243 - goto out; 1245 + return ret; 1244 1246 1245 1247 btrfs_release_path(path); 1246 1248 } 1247 1249 1248 - ret = 0; 1249 - out: 1250 - btrfs_free_path(path); 1251 - return ret; 1250 + return 0; 1252 1251 } 1253 1252 1254 1253 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) ··· 1633 1638 { 1634 1639 struct btrfs_block_group *block_group; 1635 1640 struct btrfs_free_space_info *info; 1636 - struct btrfs_path *path; 1641 + BTRFS_PATH_AUTO_FREE(path); 1637 1642 u32 extent_count, flags; 1638 - int ret; 1639 1643 1640 1644 block_group = caching_ctl->block_group; 1641 1645 ··· 1651 1657 path->reada = READA_FORWARD; 1652 1658 1653 1659 info = search_free_space_info(NULL, block_group, path, 0); 1654 - if (IS_ERR(info)) { 1655 - ret = PTR_ERR(info); 1656 - goto out; 1657 - } 1660 + if (IS_ERR(info)) 1661 + return PTR_ERR(info); 1662 + 1658 1663 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1659 1664 flags = btrfs_free_space_flags(path->nodes[0], info); 1660 1665 ··· 1663 1670 * there. 1664 1671 */ 1665 1672 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1666 - ret = load_free_space_bitmaps(caching_ctl, path, extent_count); 1673 + return load_free_space_bitmaps(caching_ctl, path, extent_count); 1667 1674 else 1668 - ret = load_free_space_extents(caching_ctl, path, extent_count); 1669 - 1670 - out: 1671 - btrfs_free_path(path); 1672 - return ret; 1675 + return load_free_space_extents(caching_ctl, path, extent_count); 1673 1676 }

-1

fs/btrfs/fs.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #include "messages.h" 4 - #include "ctree.h" 5 4 #include "fs.h" 6 5 #include "accessors.h" 7 6 #include "volumes.h"

+23 -3

fs/btrfs/fs.h

··· 47 47 struct btrfs_stripe_hash_table; 48 48 struct btrfs_space_info; 49 49 50 + /* 51 + * Minimum data and metadata block size. 52 + * 53 + * Normally it's 4K, but for testing subpage block size on 4K page systems, we 54 + * allow DEBUG builds to accept 2K page size. 55 + */ 56 + #ifdef CONFIG_BTRFS_DEBUG 57 + #define BTRFS_MIN_BLOCKSIZE (SZ_2K) 58 + #else 59 + #define BTRFS_MIN_BLOCKSIZE (SZ_4K) 60 + #endif 61 + 50 62 #define BTRFS_MAX_EXTENT_SIZE SZ_128M 51 63 52 64 #define BTRFS_OLDEST_GENERATION 0ULL ··· 116 104 117 105 /* Indicates there was an error cleaning up a log tree. */ 118 106 BTRFS_FS_STATE_LOG_CLEANUP_ERROR, 107 + 108 + /* No more delayed iput can be queued. */ 109 + BTRFS_FS_STATE_NO_DELAYED_IPUT, 119 110 120 111 BTRFS_FS_STATE_COUNT 121 112 }; ··· 500 485 u64 last_trans_log_full_commit; 501 486 unsigned long long mount_opt; 502 487 503 - unsigned long compress_type:4; 504 - unsigned int compress_level; 488 + int compress_type; 489 + int compress_level; 505 490 u32 commit_interval; 506 491 /* 507 492 * It is a suggestive number, the read side is safe even it gets a ··· 724 709 * running. 725 710 */ 726 711 refcount_t scrub_workers_refcnt; 727 - u32 sectors_per_page; 728 712 struct workqueue_struct *scrub_workers; 729 713 730 714 struct btrfs_discard_ctl discard_ctl; ··· 993 979 #endif 994 980 995 981 return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); 982 + } 983 + 984 + static inline unsigned int btrfs_blocks_per_folio(const struct btrfs_fs_info *fs_info, 985 + const struct folio *folio) 986 + { 987 + return folio_size(folio) >> fs_info->sectorsize_bits; 996 988 } 997 989 998 990 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,

+3 -3

fs/btrfs/inode-item.c

··· 191 191 int del_len = name->len + sizeof(*ref); 192 192 193 193 key.objectid = inode_objectid; 194 - key.offset = ref_objectid; 195 194 key.type = BTRFS_INODE_REF_KEY; 195 + key.offset = ref_objectid; 196 196 197 197 path = btrfs_alloc_path(); 198 198 if (!path) ··· 317 317 int ins_len = name->len + sizeof(*ref); 318 318 319 319 key.objectid = inode_objectid; 320 - key.offset = ref_objectid; 321 320 key.type = BTRFS_INODE_REF_KEY; 321 + key.offset = ref_objectid; 322 322 323 323 path = btrfs_alloc_path(); 324 324 if (!path) ··· 493 493 path->reada = READA_BACK; 494 494 495 495 key.objectid = control->ino; 496 - key.offset = (u64)-1; 497 496 key.type = (u8)-1; 497 + key.offset = (u64)-1; 498 498 499 499 search_again: 500 500 /*

+315 -272

fs/btrfs/inode.c

··· 489 489 size_t datasize; 490 490 491 491 key.objectid = btrfs_ino(inode); 492 - key.offset = 0; 493 492 key.type = BTRFS_EXTENT_DATA_KEY; 493 + key.offset = 0; 494 494 495 495 datasize = btrfs_file_extent_calc_inline_size(cur_size); 496 496 ret = btrfs_insert_empty_item(trans, root, path, &key, ··· 566 566 if (offset != 0) 567 567 return false; 568 568 569 - /* 570 - * Due to the page size limit, for subpage we can only trigger the 571 - * writeback for the dirty sectors of page, that means data writeback 572 - * is doing more writeback than what we want. 573 - * 574 - * This is especially unexpected for some call sites like fallocate, 575 - * where we only increase i_size after everything is done. 576 - * This means we can trigger inline extent even if we didn't want to. 577 - * So here we skip inline extent creation completely. 578 - */ 579 - if (fs_info->sectorsize != PAGE_SIZE) 580 - return false; 581 - 582 569 /* Inline extents are limited to sectorsize. */ 583 570 if (size > fs_info->sectorsize) 571 + return false; 572 + 573 + /* We do not allow a non-compressed extent to be as large as block size. */ 574 + if (data_len >= fs_info->sectorsize) 584 575 return false; 585 576 586 577 /* We cannot exceed the maximum inline data size. */ ··· 663 672 * And at reserve time, it's always aligned to page size, so 664 673 * just free one page here. 665 674 */ 666 - btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL); 675 + btrfs_qgroup_free_data(inode, NULL, 0, fs_info->sectorsize, NULL); 667 676 btrfs_free_path(path); 668 677 btrfs_end_transaction(trans); 669 678 return ret; ··· 823 832 btrfs_add_inode_defrag(inode, small_write); 824 833 } 825 834 826 - static int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) 835 + static int extent_range_clear_dirty_for_io(struct btrfs_inode *inode, u64 start, u64 end) 827 836 { 828 837 unsigned long end_index = end >> PAGE_SHIFT; 829 838 struct folio *folio; ··· 831 840 832 841 for (unsigned long index = start >> PAGE_SHIFT; 833 842 index <= end_index; index++) { 834 - folio = filemap_get_folio(inode->i_mapping, index); 843 + folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); 835 844 if (IS_ERR(folio)) { 836 845 if (!ret) 837 846 ret = PTR_ERR(folio); 838 847 continue; 839 848 } 840 - btrfs_folio_clamp_clear_dirty(inode_to_fs_info(inode), folio, start, 849 + btrfs_folio_clamp_clear_dirty(inode->root->fs_info, folio, start, 841 850 end + 1 - start); 842 851 folio_put(folio); 843 852 } ··· 877 886 unsigned int poff; 878 887 int i; 879 888 int compress_type = fs_info->compress_type; 889 + int compress_level = fs_info->compress_level; 880 890 881 891 inode_should_defrag(inode, start, end, end - start + 1, SZ_16K); 882 892 ··· 886 894 * Otherwise applications with the file mmap'd can wander in and change 887 895 * the page contents while we are compressing them. 888 896 */ 889 - ret = extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end); 897 + ret = extent_range_clear_dirty_for_io(inode, start, end); 890 898 891 899 /* 892 900 * All the folios should have been locked thus no failure. ··· 960 968 goto cleanup_and_bail_uncompressed; 961 969 } 962 970 963 - if (inode->defrag_compress) 971 + if (inode->defrag_compress) { 964 972 compress_type = inode->defrag_compress; 965 - else if (inode->prop_compress) 973 + compress_level = inode->defrag_compress_level; 974 + } else if (inode->prop_compress) { 966 975 compress_type = inode->prop_compress; 976 + } 967 977 968 978 /* Compression level is applied here. */ 969 - ret = btrfs_compress_folios(compress_type | (fs_info->compress_level << 4), 979 + ret = btrfs_compress_folios(compress_type, compress_level, 970 980 mapping, start, folios, &nr_folios, &total_in, 971 981 &total_compressed); 972 982 if (ret) ··· 1084 1090 &wbc, false); 1085 1091 wbc_detach_inode(&wbc); 1086 1092 if (ret < 0) { 1087 - btrfs_cleanup_ordered_extents(inode, start, end - start + 1); 1088 1093 if (locked_folio) 1089 1094 btrfs_folio_end_lock(inode->root->fs_info, locked_folio, 1090 1095 start, async_extent->ram_size); ··· 1265 1272 * - Else all pages except for @locked_folio are unlocked. 1266 1273 * 1267 1274 * When a failure happens in the second or later iteration of the 1268 - * while-loop, the ordered extents created in previous iterations are kept 1269 - * intact. So, the caller must clean them up by calling 1270 - * btrfs_cleanup_ordered_extents(). See btrfs_run_delalloc_range() for 1271 - * example. 1275 + * while-loop, the ordered extents created in previous iterations are cleaned up. 1272 1276 */ 1273 1277 static noinline int cow_file_range(struct btrfs_inode *inode, 1274 1278 struct folio *locked_folio, u64 start, ··· 1482 1492 1483 1493 /* 1484 1494 * For the range (1). We have already instantiated the ordered extents 1485 - * for this region. They are cleaned up by 1486 - * btrfs_cleanup_ordered_extents() in e.g, 1487 - * btrfs_run_delalloc_range(). 1495 + * for this region, thus we need to cleanup those ordered extents. 1488 1496 * EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV 1489 - * are also handled by the cleanup function. 1497 + * are also handled by the ordered extents cleanup. 1490 1498 * 1491 1499 * So here we only clear EXTENT_LOCKED and EXTENT_DELALLOC flag, and 1492 1500 * finish the writeback of the involved folios, which will be never submitted. ··· 1495 1507 1496 1508 if (!locked_folio) 1497 1509 mapping_set_error(inode->vfs_inode.i_mapping, ret); 1510 + 1511 + btrfs_cleanup_ordered_extents(inode, orig_start, start - orig_start); 1498 1512 extent_clear_unlock_delalloc(inode, orig_start, start - 1, 1499 1513 locked_folio, NULL, clear_bits, page_ops); 1500 1514 } ··· 1966 1976 mapping_set_error(mapping, error); 1967 1977 } 1968 1978 1979 + static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio, 1980 + struct extent_state **cached, 1981 + struct can_nocow_file_extent_args *nocow_args, 1982 + u64 file_pos, bool is_prealloc) 1983 + { 1984 + struct btrfs_ordered_extent *ordered; 1985 + u64 len = nocow_args->file_extent.num_bytes; 1986 + u64 end = file_pos + len - 1; 1987 + int ret = 0; 1988 + 1989 + lock_extent(&inode->io_tree, file_pos, end, cached); 1990 + 1991 + if (is_prealloc) { 1992 + struct extent_map *em; 1993 + 1994 + em = btrfs_create_io_em(inode, file_pos, &nocow_args->file_extent, 1995 + BTRFS_ORDERED_PREALLOC); 1996 + if (IS_ERR(em)) { 1997 + unlock_extent(&inode->io_tree, file_pos, end, cached); 1998 + return PTR_ERR(em); 1999 + } 2000 + free_extent_map(em); 2001 + } 2002 + 2003 + ordered = btrfs_alloc_ordered_extent(inode, file_pos, &nocow_args->file_extent, 2004 + is_prealloc 2005 + ? (1 << BTRFS_ORDERED_PREALLOC) 2006 + : (1 << BTRFS_ORDERED_NOCOW)); 2007 + if (IS_ERR(ordered)) { 2008 + if (is_prealloc) 2009 + btrfs_drop_extent_map_range(inode, file_pos, end, false); 2010 + unlock_extent(&inode->io_tree, file_pos, end, cached); 2011 + return PTR_ERR(ordered); 2012 + } 2013 + 2014 + if (btrfs_is_data_reloc_root(inode->root)) 2015 + /* 2016 + * Errors are handled later, as we must prevent 2017 + * extent_clear_unlock_delalloc() in error handler from freeing 2018 + * metadata of the created ordered extent. 2019 + */ 2020 + ret = btrfs_reloc_clone_csums(ordered); 2021 + btrfs_put_ordered_extent(ordered); 2022 + 2023 + extent_clear_unlock_delalloc(inode, file_pos, end, locked_folio, cached, 2024 + EXTENT_LOCKED | EXTENT_DELALLOC | 2025 + EXTENT_CLEAR_DATA_RESV, 2026 + PAGE_UNLOCK | PAGE_SET_ORDERED); 2027 + /* 2028 + * On error, we need to cleanup the ordered extents we created. 2029 + * 2030 + * We do not clear the folio Dirty flags because they are set and 2031 + * cleaered by the caller. 2032 + */ 2033 + if (ret < 0) 2034 + btrfs_cleanup_ordered_extents(inode, file_pos, end); 2035 + return ret; 2036 + } 2037 + 1969 2038 /* 1970 2039 * when nowcow writeback call back. This checks for snapshots or COW copies 1971 2040 * of the extents that exist in the file, and COWs the file as required. ··· 2069 2020 2070 2021 while (cur_offset <= end) { 2071 2022 struct btrfs_block_group *nocow_bg = NULL; 2072 - struct btrfs_ordered_extent *ordered; 2073 2023 struct btrfs_key found_key; 2074 2024 struct btrfs_file_extent_item *fi; 2075 2025 struct extent_buffer *leaf; 2076 2026 struct extent_state *cached_state = NULL; 2077 2027 u64 extent_end; 2078 - u64 nocow_end; 2079 2028 int extent_type; 2080 - bool is_prealloc; 2081 2029 2082 2030 ret = btrfs_lookup_file_extent(NULL, root, path, ino, 2083 2031 cur_offset, 0); ··· 2200 2154 if (cow_start != (u64)-1) { 2201 2155 ret = fallback_to_cow(inode, locked_folio, cow_start, 2202 2156 found_key.offset - 1); 2203 - cow_start = (u64)-1; 2204 2157 if (ret) { 2205 2158 cow_end = found_key.offset - 1; 2206 2159 btrfs_dec_nocow_writers(nocow_bg); 2207 2160 goto error; 2208 2161 } 2162 + cow_start = (u64)-1; 2209 2163 } 2210 2164 2211 - nocow_end = cur_offset + nocow_args.file_extent.num_bytes - 1; 2212 - lock_extent(&inode->io_tree, cur_offset, nocow_end, &cached_state); 2213 - 2214 - is_prealloc = extent_type == BTRFS_FILE_EXTENT_PREALLOC; 2215 - if (is_prealloc) { 2216 - struct extent_map *em; 2217 - 2218 - em = btrfs_create_io_em(inode, cur_offset, 2219 - &nocow_args.file_extent, 2220 - BTRFS_ORDERED_PREALLOC); 2221 - if (IS_ERR(em)) { 2222 - unlock_extent(&inode->io_tree, cur_offset, 2223 - nocow_end, &cached_state); 2224 - btrfs_dec_nocow_writers(nocow_bg); 2225 - ret = PTR_ERR(em); 2226 - goto error; 2227 - } 2228 - free_extent_map(em); 2229 - } 2230 - 2231 - ordered = btrfs_alloc_ordered_extent(inode, cur_offset, 2232 - &nocow_args.file_extent, 2233 - is_prealloc 2234 - ? (1 << BTRFS_ORDERED_PREALLOC) 2235 - : (1 << BTRFS_ORDERED_NOCOW)); 2165 + ret = nocow_one_range(inode, locked_folio, &cached_state, 2166 + &nocow_args, cur_offset, 2167 + extent_type == BTRFS_FILE_EXTENT_PREALLOC); 2236 2168 btrfs_dec_nocow_writers(nocow_bg); 2237 - if (IS_ERR(ordered)) { 2238 - if (is_prealloc) { 2239 - btrfs_drop_extent_map_range(inode, cur_offset, 2240 - nocow_end, false); 2241 - } 2242 - unlock_extent(&inode->io_tree, cur_offset, 2243 - nocow_end, &cached_state); 2244 - ret = PTR_ERR(ordered); 2169 + if (ret < 0) 2245 2170 goto error; 2246 - } 2247 - 2248 - if (btrfs_is_data_reloc_root(root)) 2249 - /* 2250 - * Error handled later, as we must prevent 2251 - * extent_clear_unlock_delalloc() in error handler 2252 - * from freeing metadata of created ordered extent. 2253 - */ 2254 - ret = btrfs_reloc_clone_csums(ordered); 2255 - btrfs_put_ordered_extent(ordered); 2256 - 2257 - extent_clear_unlock_delalloc(inode, cur_offset, nocow_end, 2258 - locked_folio, &cached_state, 2259 - EXTENT_LOCKED | EXTENT_DELALLOC | 2260 - EXTENT_CLEAR_DATA_RESV, 2261 - PAGE_UNLOCK | PAGE_SET_ORDERED); 2262 - 2263 2171 cur_offset = extent_end; 2264 - 2265 - /* 2266 - * btrfs_reloc_clone_csums() error, now we're OK to call error 2267 - * handler, as metadata for created ordered extent will only 2268 - * be freed by btrfs_finish_ordered_io(). 2269 - */ 2270 - if (ret) 2271 - goto error; 2272 2172 } 2273 2173 btrfs_release_path(path); 2274 2174 ··· 2223 2231 2224 2232 if (cow_start != (u64)-1) { 2225 2233 ret = fallback_to_cow(inode, locked_folio, cow_start, end); 2226 - cow_start = (u64)-1; 2227 2234 if (ret) { 2228 2235 cow_end = end; 2229 2236 goto error; 2230 2237 } 2238 + cow_start = (u64)-1; 2231 2239 } 2232 2240 2233 2241 btrfs_free_path(path); ··· 2241 2249 * start cur_offset end 2242 2250 * |/////////////| | 2243 2251 * 2252 + * In this case, cow_start should be (u64)-1. 2253 + * 2244 2254 * For range [start, cur_offset) the folios are already unlocked (except 2245 2255 * @locked_folio), EXTENT_DELALLOC already removed. 2246 - * Only need to clear the dirty flag as they will never be submitted. 2247 - * Ordered extent and extent maps are handled by 2248 - * btrfs_mark_ordered_io_finished() inside run_delalloc_range(). 2256 + * Need to clear the dirty flags and finish the ordered extents. 2249 2257 * 2250 - * 2) Failed with error from fallback_to_cow() 2251 - * start cur_offset cow_end end 2258 + * 2) Failed with error before calling fallback_to_cow() 2259 + * 2260 + * start cow_start end 2261 + * |/////////////| | 2262 + * 2263 + * In this case, only @cow_start is set, @cur_offset is between 2264 + * [cow_start, end) 2265 + * 2266 + * It's mostly the same as case 1), just replace @cur_offset with 2267 + * @cow_start. 2268 + * 2269 + * 3) Failed with error from fallback_to_cow() 2270 + * 2271 + * start cow_start cow_end end 2252 2272 * |/////////////|-----------| | 2253 2273 * 2254 - * For range [start, cur_offset) it's the same as case 1). 2255 - * But for range [cur_offset, cow_end), the folios have dirty flag 2256 - * cleared and unlocked, EXTENT_DEALLLOC cleared by cow_file_range(). 2274 + * In this case, both @cow_start and @cow_end is set. 2257 2275 * 2258 - * Thus we should not call extent_clear_unlock_delalloc() on range 2259 - * [cur_offset, cow_end), as the folios are already unlocked. 2276 + * For range [start, cow_start) it's the same as case 1). 2277 + * But for range [cow_start, cow_end), all the cleanup is handled by 2278 + * cow_file_range(), we should not touch anything in that range. 2260 2279 * 2261 - * So clear the folio dirty flags for [start, cur_offset) first. 2280 + * So for all above cases, if @cow_start is set, cleanup ordered extents 2281 + * for range [start, @cow_start), other wise cleanup range [start, @cur_offset). 2262 2282 */ 2263 - if (cur_offset > start) 2283 + if (cow_start != (u64)-1) 2284 + cur_offset = cow_start; 2285 + 2286 + if (cur_offset > start) { 2287 + btrfs_cleanup_ordered_extents(inode, start, cur_offset - start); 2264 2288 cleanup_dirty_folios(inode, locked_folio, start, cur_offset - 1, ret); 2289 + } 2265 2290 2266 2291 /* 2267 2292 * If an error happened while a COW region is outstanding, cur_offset ··· 2343 2334 2344 2335 if (should_nocow(inode, start, end)) { 2345 2336 ret = run_delalloc_nocow(inode, locked_folio, start, end); 2346 - goto out; 2337 + return ret; 2347 2338 } 2348 2339 2349 2340 if (btrfs_inode_can_compress(inode) && ··· 2357 2348 else 2358 2349 ret = cow_file_range(inode, locked_folio, start, end, NULL, 2359 2350 false, false); 2360 - 2361 - out: 2362 - if (ret < 0) 2363 - btrfs_cleanup_ordered_extents(inode, start, end - start + 1); 2364 2351 return ret; 2365 2352 } 2366 2353 ··· 2883 2878 return 0; 2884 2879 2885 2880 /* 2881 + * For experimental build, we error out instead of EAGAIN. 2882 + * 2883 + * We should not hit such out-of-band dirty folios anymore. 2884 + */ 2885 + if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL)) { 2886 + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 2887 + btrfs_err_rl(fs_info, 2888 + "root %lld ino %llu folio %llu is marked dirty without notifying the fs", 2889 + BTRFS_I(inode)->root->root_key.objectid, 2890 + btrfs_ino(BTRFS_I(inode)), 2891 + folio_pos(folio)); 2892 + return -EUCLEAN; 2893 + } 2894 + 2895 + /* 2886 2896 * folio_checked is set below when we create a fixup worker for this 2887 2897 * folio, don't try to create another one if we're already 2888 2898 * folio_test_checked. ··· 2916 2896 * We are already holding a reference to this inode from 2917 2897 * write_cache_pages. We need to hold it because the space reservation 2918 2898 * takes place outside of the folio lock, and we can't trust 2919 - * page->mapping outside of the folio lock. 2899 + * folio->mapping outside of the folio lock. 2920 2900 */ 2921 2901 ihold(inode); 2922 2902 btrfs_folio_set_checked(fs_info, folio, folio_pos(folio), folio_size(folio)); ··· 2972 2952 2973 2953 if (!drop_args.extent_inserted) { 2974 2954 ins.objectid = btrfs_ino(inode); 2975 - ins.offset = file_pos; 2976 2955 ins.type = BTRFS_EXTENT_DATA_KEY; 2956 + ins.offset = file_pos; 2977 2957 2978 2958 ret = btrfs_insert_empty_item(trans, root, path, &ins, 2979 2959 sizeof(*stack_fi)); ··· 3008 2988 btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found); 3009 2989 3010 2990 ins.objectid = disk_bytenr; 3011 - ins.offset = disk_num_bytes; 3012 2991 ins.type = BTRFS_EXTENT_ITEM_KEY; 2992 + ins.offset = disk_num_bytes; 3013 2993 3014 2994 ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes); 3015 2995 if (ret) ··· 3427 3407 if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1)) 3428 3408 return; 3429 3409 3410 + WARN_ON_ONCE(test_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state)); 3430 3411 atomic_inc(&fs_info->nr_delayed_iputs); 3431 3412 /* 3432 3413 * Need to be irq safe here because we can be called from either an irq ··· 3548 3527 struct extent_buffer *leaf; 3549 3528 struct btrfs_key key, found_key; 3550 3529 struct btrfs_trans_handle *trans; 3551 - struct inode *inode; 3552 3530 u64 last_objectid = 0; 3553 3531 int ret = 0, nr_unlink = 0; 3554 3532 ··· 3566 3546 key.offset = (u64)-1; 3567 3547 3568 3548 while (1) { 3549 + struct btrfs_inode *inode; 3550 + 3569 3551 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3570 3552 if (ret < 0) 3571 3553 goto out; ··· 3691 3669 * deleted but wasn't. The inode number may have been reused, 3692 3670 * but either way, we can delete the orphan item. 3693 3671 */ 3694 - if (!inode || inode->i_nlink) { 3672 + if (!inode || inode->vfs_inode.i_nlink) { 3695 3673 if (inode) { 3696 - ret = btrfs_drop_verity_items(BTRFS_I(inode)); 3697 - iput(inode); 3674 + ret = btrfs_drop_verity_items(inode); 3675 + iput(&inode->vfs_inode); 3698 3676 inode = NULL; 3699 3677 if (ret) 3700 3678 goto out; ··· 3717 3695 nr_unlink++; 3718 3696 3719 3697 /* this will do delete_inode and everything for us */ 3720 - iput(inode); 3698 + iput(&inode->vfs_inode); 3721 3699 } 3722 3700 /* release the path since we're done with it */ 3723 3701 btrfs_release_path(path); ··· 3867 3845 * 3868 3846 * On failure clean up the inode. 3869 3847 */ 3870 - static int btrfs_read_locked_inode(struct inode *inode, struct btrfs_path *path) 3848 + static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path *path) 3871 3849 { 3872 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 3850 + struct btrfs_root *root = inode->root; 3851 + struct btrfs_fs_info *fs_info = root->fs_info; 3873 3852 struct extent_buffer *leaf; 3874 3853 struct btrfs_inode_item *inode_item; 3875 - struct btrfs_root *root = BTRFS_I(inode)->root; 3854 + struct inode *vfs_inode = &inode->vfs_inode; 3876 3855 struct btrfs_key location; 3877 3856 unsigned long ptr; 3878 3857 int maybe_acls; ··· 3882 3859 bool filled = false; 3883 3860 int first_xattr_slot; 3884 3861 3885 - ret = btrfs_init_file_extent_tree(BTRFS_I(inode)); 3862 + ret = btrfs_init_file_extent_tree(inode); 3886 3863 if (ret) 3887 3864 goto out; 3888 3865 ··· 3892 3869 3893 3870 ASSERT(path); 3894 3871 3895 - btrfs_get_inode_key(BTRFS_I(inode), &location); 3872 + btrfs_get_inode_key(inode, &location); 3896 3873 3897 3874 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3898 3875 if (ret) { ··· 3912 3889 3913 3890 inode_item = btrfs_item_ptr(leaf, path->slots[0], 3914 3891 struct btrfs_inode_item); 3915 - inode->i_mode = btrfs_inode_mode(leaf, inode_item); 3916 - set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); 3917 - i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); 3918 - i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); 3919 - btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item)); 3920 - btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0, 3921 - round_up(i_size_read(inode), fs_info->sectorsize)); 3892 + vfs_inode->i_mode = btrfs_inode_mode(leaf, inode_item); 3893 + set_nlink(vfs_inode, btrfs_inode_nlink(leaf, inode_item)); 3894 + i_uid_write(vfs_inode, btrfs_inode_uid(leaf, inode_item)); 3895 + i_gid_write(vfs_inode, btrfs_inode_gid(leaf, inode_item)); 3896 + btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 3897 + btrfs_inode_set_file_extent_range(inode, 0, 3898 + round_up(i_size_read(vfs_inode), fs_info->sectorsize)); 3922 3899 3923 - inode_set_atime(inode, btrfs_timespec_sec(leaf, &inode_item->atime), 3900 + inode_set_atime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->atime), 3924 3901 btrfs_timespec_nsec(leaf, &inode_item->atime)); 3925 3902 3926 - inode_set_mtime(inode, btrfs_timespec_sec(leaf, &inode_item->mtime), 3903 + inode_set_mtime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->mtime), 3927 3904 btrfs_timespec_nsec(leaf, &inode_item->mtime)); 3928 3905 3929 - inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime), 3906 + inode_set_ctime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->ctime), 3930 3907 btrfs_timespec_nsec(leaf, &inode_item->ctime)); 3931 3908 3932 - BTRFS_I(inode)->i_otime_sec = btrfs_timespec_sec(leaf, &inode_item->otime); 3933 - BTRFS_I(inode)->i_otime_nsec = btrfs_timespec_nsec(leaf, &inode_item->otime); 3909 + inode->i_otime_sec = btrfs_timespec_sec(leaf, &inode_item->otime); 3910 + inode->i_otime_nsec = btrfs_timespec_nsec(leaf, &inode_item->otime); 3934 3911 3935 - inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 3936 - BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 3937 - BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); 3912 + inode_set_bytes(vfs_inode, btrfs_inode_nbytes(leaf, inode_item)); 3913 + inode->generation = btrfs_inode_generation(leaf, inode_item); 3914 + inode->last_trans = btrfs_inode_transid(leaf, inode_item); 3938 3915 3939 - inode_set_iversion_queried(inode, 3940 - btrfs_inode_sequence(leaf, inode_item)); 3941 - inode->i_generation = BTRFS_I(inode)->generation; 3942 - inode->i_rdev = 0; 3916 + inode_set_iversion_queried(vfs_inode, btrfs_inode_sequence(leaf, inode_item)); 3917 + vfs_inode->i_generation = inode->generation; 3918 + vfs_inode->i_rdev = 0; 3943 3919 rdev = btrfs_inode_rdev(leaf, inode_item); 3944 3920 3945 - if (S_ISDIR(inode->i_mode)) 3946 - BTRFS_I(inode)->index_cnt = (u64)-1; 3921 + if (S_ISDIR(vfs_inode->i_mode)) 3922 + inode->index_cnt = (u64)-1; 3947 3923 3948 3924 btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item), 3949 - &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags); 3925 + &inode->flags, &inode->ro_flags); 3926 + btrfs_update_inode_mapping_flags(inode); 3950 3927 3951 3928 cache_index: 3952 3929 /* ··· 3958 3935 * This is required for both inode re-read from disk and delayed inode 3959 3936 * in the delayed_nodes xarray. 3960 3937 */ 3961 - if (BTRFS_I(inode)->last_trans == btrfs_get_fs_generation(fs_info)) 3962 - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3963 - &BTRFS_I(inode)->runtime_flags); 3938 + if (inode->last_trans == btrfs_get_fs_generation(fs_info)) 3939 + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); 3964 3940 3965 3941 /* 3966 3942 * We don't persist the id of the transaction where an unlink operation ··· 3988 3966 * transaction commits on fsync if our inode is a directory, or if our 3989 3967 * inode is not a directory, logging its parent unnecessarily. 3990 3968 */ 3991 - BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; 3969 + inode->last_unlink_trans = inode->last_trans; 3992 3970 3993 3971 /* 3994 3972 * Same logic as for last_unlink_trans. We don't persist the generation ··· 3996 3974 * operation, so after eviction and reloading the inode we must be 3997 3975 * pessimistic and assume the last transaction that modified the inode. 3998 3976 */ 3999 - BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans; 3977 + inode->last_reflink_trans = inode->last_trans; 4000 3978 4001 3979 path->slots[0]++; 4002 - if (inode->i_nlink != 1 || 3980 + if (vfs_inode->i_nlink != 1 || 4003 3981 path->slots[0] >= btrfs_header_nritems(leaf)) 4004 3982 goto cache_acl; 4005 3983 4006 3984 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]); 4007 - if (location.objectid != btrfs_ino(BTRFS_I(inode))) 3985 + if (location.objectid != btrfs_ino(inode)) 4008 3986 goto cache_acl; 4009 3987 4010 3988 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); ··· 4012 3990 struct btrfs_inode_ref *ref; 4013 3991 4014 3992 ref = (struct btrfs_inode_ref *)ptr; 4015 - BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref); 3993 + inode->dir_index = btrfs_inode_ref_index(leaf, ref); 4016 3994 } else if (location.type == BTRFS_INODE_EXTREF_KEY) { 4017 3995 struct btrfs_inode_extref *extref; 4018 3996 4019 3997 extref = (struct btrfs_inode_extref *)ptr; 4020 - BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf, 4021 - extref); 3998 + inode->dir_index = btrfs_inode_extref_index(leaf, extref); 4022 3999 } 4023 4000 cache_acl: 4024 4001 /* ··· 4025 4004 * any xattrs or acls 4026 4005 */ 4027 4006 maybe_acls = acls_after_inode_item(leaf, path->slots[0], 4028 - btrfs_ino(BTRFS_I(inode)), &first_xattr_slot); 4007 + btrfs_ino(inode), &first_xattr_slot); 4029 4008 if (first_xattr_slot != -1) { 4030 4009 path->slots[0] = first_xattr_slot; 4031 4010 ret = btrfs_load_inode_props(inode, path); 4032 4011 if (ret) 4033 4012 btrfs_err(fs_info, 4034 4013 "error loading props for ino %llu (root %llu): %d", 4035 - btrfs_ino(BTRFS_I(inode)), 4036 - btrfs_root_id(root), ret); 4014 + btrfs_ino(inode), btrfs_root_id(root), ret); 4037 4015 } 4038 4016 4039 4017 if (!maybe_acls) 4040 - cache_no_acl(inode); 4018 + cache_no_acl(vfs_inode); 4041 4019 4042 - switch (inode->i_mode & S_IFMT) { 4020 + switch (vfs_inode->i_mode & S_IFMT) { 4043 4021 case S_IFREG: 4044 - inode->i_mapping->a_ops = &btrfs_aops; 4045 - inode->i_fop = &btrfs_file_operations; 4046 - inode->i_op = &btrfs_file_inode_operations; 4022 + vfs_inode->i_mapping->a_ops = &btrfs_aops; 4023 + vfs_inode->i_fop = &btrfs_file_operations; 4024 + vfs_inode->i_op = &btrfs_file_inode_operations; 4047 4025 break; 4048 4026 case S_IFDIR: 4049 - inode->i_fop = &btrfs_dir_file_operations; 4050 - inode->i_op = &btrfs_dir_inode_operations; 4027 + vfs_inode->i_fop = &btrfs_dir_file_operations; 4028 + vfs_inode->i_op = &btrfs_dir_inode_operations; 4051 4029 break; 4052 4030 case S_IFLNK: 4053 - inode->i_op = &btrfs_symlink_inode_operations; 4054 - inode_nohighmem(inode); 4055 - inode->i_mapping->a_ops = &btrfs_aops; 4031 + vfs_inode->i_op = &btrfs_symlink_inode_operations; 4032 + inode_nohighmem(vfs_inode); 4033 + vfs_inode->i_mapping->a_ops = &btrfs_aops; 4056 4034 break; 4057 4035 default: 4058 - inode->i_op = &btrfs_special_inode_operations; 4059 - init_special_inode(inode, inode->i_mode, rdev); 4036 + vfs_inode->i_op = &btrfs_special_inode_operations; 4037 + init_special_inode(vfs_inode, vfs_inode->i_mode, rdev); 4060 4038 break; 4061 4039 } 4062 4040 4063 4041 btrfs_sync_inode_flags_to_i_flags(inode); 4064 4042 4065 - ret = btrfs_add_inode_to_root(BTRFS_I(inode), true); 4043 + ret = btrfs_add_inode_to_root(inode, true); 4066 4044 if (ret) 4067 4045 goto out; 4068 4046 4069 4047 return 0; 4070 4048 out: 4071 - iget_failed(inode); 4049 + iget_failed(vfs_inode); 4072 4050 return ret; 4073 4051 } 4074 4052 ··· 5622 5602 args->root == BTRFS_I(inode)->root; 5623 5603 } 5624 5604 5625 - static struct inode *btrfs_iget_locked(u64 ino, struct btrfs_root *root) 5605 + static struct btrfs_inode *btrfs_iget_locked(u64 ino, struct btrfs_root *root) 5626 5606 { 5627 5607 struct inode *inode; 5628 5608 struct btrfs_iget_args args; ··· 5634 5614 inode = iget5_locked_rcu(root->fs_info->sb, hashval, btrfs_find_actor, 5635 5615 btrfs_init_locked_inode, 5636 5616 (void *)&args); 5637 - return inode; 5617 + if (!inode) 5618 + return NULL; 5619 + return BTRFS_I(inode); 5638 5620 } 5639 5621 5640 5622 /* 5641 5623 * Get an inode object given its inode number and corresponding root. Path is 5642 5624 * preallocated to prevent recursing back to iget through allocator. 5643 5625 */ 5644 - struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root, 5645 - struct btrfs_path *path) 5626 + struct btrfs_inode *btrfs_iget_path(u64 ino, struct btrfs_root *root, 5627 + struct btrfs_path *path) 5646 5628 { 5647 - struct inode *inode; 5629 + struct btrfs_inode *inode; 5648 5630 int ret; 5649 5631 5650 5632 inode = btrfs_iget_locked(ino, root); 5651 5633 if (!inode) 5652 5634 return ERR_PTR(-ENOMEM); 5653 5635 5654 - if (!(inode->i_state & I_NEW)) 5636 + if (!(inode->vfs_inode.i_state & I_NEW)) 5655 5637 return inode; 5656 5638 5657 5639 ret = btrfs_read_locked_inode(inode, path); 5658 5640 if (ret) 5659 5641 return ERR_PTR(ret); 5660 5642 5661 - unlock_new_inode(inode); 5643 + unlock_new_inode(&inode->vfs_inode); 5662 5644 return inode; 5663 5645 } 5664 5646 5665 5647 /* 5666 5648 * Get an inode object given its inode number and corresponding root. 5667 5649 */ 5668 - struct inode *btrfs_iget(u64 ino, struct btrfs_root *root) 5650 + struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) 5669 5651 { 5670 - struct inode *inode; 5652 + struct btrfs_inode *inode; 5671 5653 struct btrfs_path *path; 5672 5654 int ret; 5673 5655 ··· 5677 5655 if (!inode) 5678 5656 return ERR_PTR(-ENOMEM); 5679 5657 5680 - if (!(inode->i_state & I_NEW)) 5658 + if (!(inode->vfs_inode.i_state & I_NEW)) 5681 5659 return inode; 5682 5660 5683 5661 path = btrfs_alloc_path(); ··· 5689 5667 if (ret) 5690 5668 return ERR_PTR(ret); 5691 5669 5692 - unlock_new_inode(inode); 5670 + unlock_new_inode(&inode->vfs_inode); 5693 5671 return inode; 5694 5672 } 5695 5673 5696 - static struct inode *new_simple_dir(struct inode *dir, 5697 - struct btrfs_key *key, 5698 - struct btrfs_root *root) 5674 + static struct btrfs_inode *new_simple_dir(struct inode *dir, 5675 + struct btrfs_key *key, 5676 + struct btrfs_root *root) 5699 5677 { 5700 5678 struct timespec64 ts; 5701 - struct inode *inode = new_inode(dir->i_sb); 5679 + struct inode *vfs_inode; 5680 + struct btrfs_inode *inode; 5702 5681 5703 - if (!inode) 5682 + vfs_inode = new_inode(dir->i_sb); 5683 + if (!vfs_inode) 5704 5684 return ERR_PTR(-ENOMEM); 5705 5685 5706 - BTRFS_I(inode)->root = btrfs_grab_root(root); 5707 - BTRFS_I(inode)->ref_root_id = key->objectid; 5708 - set_bit(BTRFS_INODE_ROOT_STUB, &BTRFS_I(inode)->runtime_flags); 5709 - set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); 5686 + inode = BTRFS_I(vfs_inode); 5687 + inode->root = btrfs_grab_root(root); 5688 + inode->ref_root_id = key->objectid; 5689 + set_bit(BTRFS_INODE_ROOT_STUB, &inode->runtime_flags); 5690 + set_bit(BTRFS_INODE_DUMMY, &inode->runtime_flags); 5710 5691 5711 - btrfs_set_inode_number(BTRFS_I(inode), BTRFS_EMPTY_SUBVOL_DIR_OBJECTID); 5692 + btrfs_set_inode_number(inode, BTRFS_EMPTY_SUBVOL_DIR_OBJECTID); 5712 5693 /* 5713 5694 * We only need lookup, the rest is read-only and there's no inode 5714 5695 * associated with the dentry 5715 5696 */ 5716 - inode->i_op = &simple_dir_inode_operations; 5717 - inode->i_opflags &= ~IOP_XATTR; 5718 - inode->i_fop = &simple_dir_operations; 5719 - inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; 5697 + vfs_inode->i_op = &simple_dir_inode_operations; 5698 + vfs_inode->i_opflags &= ~IOP_XATTR; 5699 + vfs_inode->i_fop = &simple_dir_operations; 5700 + vfs_inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; 5720 5701 5721 - ts = inode_set_ctime_current(inode); 5722 - inode_set_mtime_to_ts(inode, ts); 5723 - inode_set_atime_to_ts(inode, inode_get_atime(dir)); 5724 - BTRFS_I(inode)->i_otime_sec = ts.tv_sec; 5725 - BTRFS_I(inode)->i_otime_nsec = ts.tv_nsec; 5702 + ts = inode_set_ctime_current(vfs_inode); 5703 + inode_set_mtime_to_ts(vfs_inode, ts); 5704 + inode_set_atime_to_ts(vfs_inode, inode_get_atime(dir)); 5705 + inode->i_otime_sec = ts.tv_sec; 5706 + inode->i_otime_nsec = ts.tv_nsec; 5726 5707 5727 - inode->i_uid = dir->i_uid; 5728 - inode->i_gid = dir->i_gid; 5708 + vfs_inode->i_uid = dir->i_uid; 5709 + vfs_inode->i_gid = dir->i_gid; 5729 5710 5730 5711 return inode; 5731 5712 } ··· 5742 5717 static_assert(BTRFS_FT_SOCK == FT_SOCK); 5743 5718 static_assert(BTRFS_FT_SYMLINK == FT_SYMLINK); 5744 5719 5745 - static inline u8 btrfs_inode_type(struct inode *inode) 5720 + static inline u8 btrfs_inode_type(const struct btrfs_inode *inode) 5746 5721 { 5747 - return fs_umode_to_ftype(inode->i_mode); 5722 + return fs_umode_to_ftype(inode->vfs_inode.i_mode); 5748 5723 } 5749 5724 5750 5725 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) 5751 5726 { 5752 5727 struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); 5753 - struct inode *inode; 5728 + struct btrfs_inode *inode; 5754 5729 struct btrfs_root *root = BTRFS_I(dir)->root; 5755 5730 struct btrfs_root *sub_root = root; 5756 5731 struct btrfs_key location = { 0 }; ··· 5767 5742 if (location.type == BTRFS_INODE_ITEM_KEY) { 5768 5743 inode = btrfs_iget(location.objectid, root); 5769 5744 if (IS_ERR(inode)) 5770 - return inode; 5745 + return ERR_CAST(inode); 5771 5746 5772 5747 /* Do extra check against inode mode with di_type */ 5773 5748 if (btrfs_inode_type(inode) != di_type) { 5774 5749 btrfs_crit(fs_info, 5775 5750 "inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u", 5776 - inode->i_mode, btrfs_inode_type(inode), 5751 + inode->vfs_inode.i_mode, btrfs_inode_type(inode), 5777 5752 di_type); 5778 - iput(inode); 5753 + iput(&inode->vfs_inode); 5779 5754 return ERR_PTR(-EUCLEAN); 5780 5755 } 5781 - return inode; 5756 + return &inode->vfs_inode; 5782 5757 } 5783 5758 5784 5759 ret = fixup_tree_root_location(fs_info, BTRFS_I(dir), dentry, ··· 5793 5768 btrfs_put_root(sub_root); 5794 5769 5795 5770 if (IS_ERR(inode)) 5796 - return inode; 5771 + return ERR_CAST(inode); 5797 5772 5798 5773 down_read(&fs_info->cleanup_work_sem); 5799 - if (!sb_rdonly(inode->i_sb)) 5774 + if (!sb_rdonly(inode->vfs_inode.i_sb)) 5800 5775 ret = btrfs_orphan_cleanup(sub_root); 5801 5776 up_read(&fs_info->cleanup_work_sem); 5802 5777 if (ret) { 5803 - iput(inode); 5778 + iput(&inode->vfs_inode); 5804 5779 inode = ERR_PTR(ret); 5805 5780 } 5806 5781 } 5807 5782 5808 - return inode; 5783 + if (IS_ERR(inode)) 5784 + return ERR_CAST(inode); 5785 + 5786 + return &inode->vfs_inode; 5809 5787 } 5810 5788 5811 5789 static int btrfs_dentry_delete(const struct dentry *dentry) ··· 6281 6253 inode->flags |= BTRFS_INODE_NODATASUM; 6282 6254 } 6283 6255 6284 - btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode); 6256 + btrfs_sync_inode_flags_to_i_flags(inode); 6285 6257 } 6286 6258 6287 6259 int btrfs_create_new_inode(struct btrfs_trans_handle *trans, ··· 6367 6339 if (btrfs_test_opt(fs_info, NODATACOW)) 6368 6340 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW | 6369 6341 BTRFS_INODE_NODATASUM; 6342 + btrfs_update_inode_mapping_flags(BTRFS_I(inode)); 6370 6343 } 6371 6344 6372 6345 ret = btrfs_insert_inode_locked(inode); ··· 6461 6432 path = NULL; 6462 6433 6463 6434 if (args->subvol) { 6464 - struct inode *parent; 6435 + struct btrfs_inode *parent; 6465 6436 6466 6437 /* 6467 6438 * Subvolumes inherit properties from their parent subvolume, ··· 6471 6442 if (IS_ERR(parent)) { 6472 6443 ret = PTR_ERR(parent); 6473 6444 } else { 6474 - ret = btrfs_inode_inherit_props(trans, inode, parent); 6475 - iput(parent); 6445 + ret = btrfs_inode_inherit_props(trans, BTRFS_I(inode), 6446 + parent); 6447 + iput(&parent->vfs_inode); 6476 6448 } 6477 6449 } else { 6478 - ret = btrfs_inode_inherit_props(trans, inode, dir); 6450 + ret = btrfs_inode_inherit_props(trans, BTRFS_I(inode), 6451 + BTRFS_I(dir)); 6479 6452 } 6480 6453 if (ret) { 6481 6454 btrfs_err(fs_info, ··· 6575 6544 return ret; 6576 6545 6577 6546 ret = btrfs_insert_dir_item(trans, name, parent_inode, &key, 6578 - btrfs_inode_type(&inode->vfs_inode), index); 6547 + btrfs_inode_type(inode), index); 6579 6548 if (ret == -EEXIST || ret == -EOVERFLOW) 6580 6549 goto fail_dir_item; 6581 6550 else if (ret) { ··· 6795 6764 { 6796 6765 int ret; 6797 6766 struct extent_buffer *leaf = path->nodes[0]; 6767 + const u32 blocksize = leaf->fs_info->sectorsize; 6798 6768 char *tmp; 6799 6769 size_t max_size; 6800 6770 unsigned long inline_size; ··· 6812 6780 6813 6781 read_extent_buffer(leaf, tmp, ptr, inline_size); 6814 6782 6815 - max_size = min_t(unsigned long, PAGE_SIZE, max_size); 6783 + max_size = min_t(unsigned long, blocksize, max_size); 6816 6784 ret = btrfs_decompress(compress_type, tmp, folio, 0, inline_size, 6817 6785 max_size); 6818 6786 ··· 6824 6792 * cover that region here. 6825 6793 */ 6826 6794 6827 - if (max_size < PAGE_SIZE) 6828 - folio_zero_range(folio, max_size, PAGE_SIZE - max_size); 6795 + if (max_size < blocksize) 6796 + folio_zero_range(folio, max_size, blocksize - max_size); 6829 6797 kfree(tmp); 6830 6798 return ret; 6831 6799 } 6832 6800 6833 6801 static int read_inline_extent(struct btrfs_path *path, struct folio *folio) 6834 6802 { 6803 + const u32 blocksize = path->nodes[0]->fs_info->sectorsize; 6835 6804 struct btrfs_file_extent_item *fi; 6836 6805 void *kaddr; 6837 6806 size_t copy_size; ··· 6847 6814 if (btrfs_file_extent_compression(path->nodes[0], fi) != BTRFS_COMPRESS_NONE) 6848 6815 return uncompress_inline(path, folio, fi); 6849 6816 6850 - copy_size = min_t(u64, PAGE_SIZE, 6817 + copy_size = min_t(u64, blocksize, 6851 6818 btrfs_file_extent_ram_bytes(path->nodes[0], fi)); 6852 6819 kaddr = kmap_local_folio(folio, 0); 6853 6820 read_extent_buffer(path->nodes[0], kaddr, 6854 6821 btrfs_file_extent_inline_start(fi), copy_size); 6855 6822 kunmap_local(kaddr); 6856 - if (copy_size < PAGE_SIZE) 6857 - folio_zero_range(folio, copy_size, PAGE_SIZE - copy_size); 6823 + if (copy_size < blocksize) 6824 + folio_zero_range(folio, copy_size, blocksize - copy_size); 6858 6825 return 0; 6859 6826 } 6860 6827 ··· 7095 7062 * NOTE: This only checks the file extents, caller is responsible to wait for 7096 7063 * any ordered extents. 7097 7064 */ 7098 - noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, 7065 + noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len, 7099 7066 struct btrfs_file_extent *file_extent, 7100 7067 bool nowait) 7101 7068 { 7102 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 7069 + struct btrfs_root *root = inode->root; 7070 + struct btrfs_fs_info *fs_info = root->fs_info; 7103 7071 struct can_nocow_file_extent_args nocow_args = { 0 }; 7104 7072 struct btrfs_path *path; 7105 7073 int ret; 7106 7074 struct extent_buffer *leaf; 7107 - struct btrfs_root *root = BTRFS_I(inode)->root; 7108 - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 7075 + struct extent_io_tree *io_tree = &inode->io_tree; 7109 7076 struct btrfs_file_extent_item *fi; 7110 7077 struct btrfs_key key; 7111 7078 int found_type; ··· 7115 7082 return -ENOMEM; 7116 7083 path->nowait = nowait; 7117 7084 7118 - ret = btrfs_lookup_file_extent(NULL, root, path, 7119 - btrfs_ino(BTRFS_I(inode)), offset, 0); 7085 + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), 7086 + offset, 0); 7120 7087 if (ret < 0) 7121 7088 goto out; 7122 7089 ··· 7131 7098 ret = 0; 7132 7099 leaf = path->nodes[0]; 7133 7100 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 7134 - if (key.objectid != btrfs_ino(BTRFS_I(inode)) || 7101 + if (key.objectid != btrfs_ino(inode) || 7135 7102 key.type != BTRFS_EXTENT_DATA_KEY) { 7136 7103 /* not our file or wrong item type, must cow */ 7137 7104 goto out; ··· 7152 7119 nocow_args.end = offset + *len - 1; 7153 7120 nocow_args.free_path = true; 7154 7121 7155 - ret = can_nocow_file_extent(path, &key, BTRFS_I(inode), &nocow_args); 7122 + ret = can_nocow_file_extent(path, &key, inode, &nocow_args); 7156 7123 /* can_nocow_file_extent() has freed the path. */ 7157 7124 path = NULL; 7158 7125 ··· 7168 7135 nocow_args.file_extent.offset)) 7169 7136 goto out; 7170 7137 7171 - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && 7138 + if (!(inode->flags & BTRFS_INODE_NODATACOW) && 7172 7139 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 7173 7140 u64 range_end; 7174 7141 ··· 7273 7240 struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); 7274 7241 struct btrfs_subpage *subpage; 7275 7242 7276 - if (!btrfs_is_subpage(fs_info, folio->mapping)) 7243 + if (!btrfs_is_subpage(fs_info, folio)) 7277 7244 return; 7278 7245 7279 7246 ASSERT(folio_test_private(folio) && folio_get_private(folio)); ··· 7297 7264 static int btrfs_launder_folio(struct folio *folio) 7298 7265 { 7299 7266 return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), 7300 - PAGE_SIZE, NULL); 7267 + folio_size(folio), NULL); 7301 7268 } 7302 7269 7303 7270 static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) ··· 8532 8499 struct writeback_control *wbc, bool snapshot, 8533 8500 bool in_reclaim_context) 8534 8501 { 8535 - struct btrfs_inode *binode; 8536 - struct inode *inode; 8537 8502 struct btrfs_delalloc_work *work, *next; 8538 8503 LIST_HEAD(works); 8539 8504 LIST_HEAD(splice); ··· 8542 8511 spin_lock(&root->delalloc_lock); 8543 8512 list_splice_init(&root->delalloc_inodes, &splice); 8544 8513 while (!list_empty(&splice)) { 8545 - binode = list_entry(splice.next, struct btrfs_inode, 8546 - delalloc_inodes); 8514 + struct btrfs_inode *inode; 8515 + struct inode *tmp_inode; 8547 8516 8548 - list_move_tail(&binode->delalloc_inodes, 8549 - &root->delalloc_inodes); 8517 + inode = list_entry(splice.next, struct btrfs_inode, delalloc_inodes); 8518 + 8519 + list_move_tail(&inode->delalloc_inodes, &root->delalloc_inodes); 8550 8520 8551 8521 if (in_reclaim_context && 8552 - test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags)) 8522 + test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags)) 8553 8523 continue; 8554 8524 8555 - inode = igrab(&binode->vfs_inode); 8556 - if (!inode) { 8525 + tmp_inode = igrab(&inode->vfs_inode); 8526 + if (!tmp_inode) { 8557 8527 cond_resched_lock(&root->delalloc_lock); 8558 8528 continue; 8559 8529 } 8560 8530 spin_unlock(&root->delalloc_lock); 8561 8531 8562 8532 if (snapshot) 8563 - set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, 8564 - &binode->runtime_flags); 8533 + set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags); 8565 8534 if (full_flush) { 8566 - work = btrfs_alloc_delalloc_work(inode); 8535 + work = btrfs_alloc_delalloc_work(&inode->vfs_inode); 8567 8536 if (!work) { 8568 - iput(inode); 8537 + iput(&inode->vfs_inode); 8569 8538 ret = -ENOMEM; 8570 8539 goto out; 8571 8540 } ··· 8573 8542 btrfs_queue_work(root->fs_info->flush_workers, 8574 8543 &work->work); 8575 8544 } else { 8576 - ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc); 8577 - btrfs_add_delayed_iput(BTRFS_I(inode)); 8545 + ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc); 8546 + btrfs_add_delayed_iput(inode); 8578 8547 if (ret || wbc->nr_to_write <= 0) 8579 8548 goto out; 8580 8549 } ··· 8691 8660 struct extent_buffer *leaf; 8692 8661 8693 8662 name_len = strlen(symname); 8694 - if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info)) 8663 + /* 8664 + * Symlinks utilize uncompressed inline extent data, which should not 8665 + * reach block size. 8666 + */ 8667 + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) || 8668 + name_len >= fs_info->sectorsize) 8695 8669 return -ENAMETOOLONG; 8696 8670 8697 8671 inode = new_inode(dir->i_sb); ··· 8735 8699 goto out; 8736 8700 } 8737 8701 key.objectid = btrfs_ino(BTRFS_I(inode)); 8738 - key.offset = 0; 8739 8702 key.type = BTRFS_EXTENT_DATA_KEY; 8703 + key.offset = 0; 8740 8704 datasize = btrfs_file_extent_calc_inline_size(name_len); 8741 8705 err = btrfs_insert_empty_item(trans, root, path, &key, 8742 8706 datasize); ··· 9182 9146 } 9183 9147 9184 9148 struct btrfs_encoded_read_private { 9185 - struct completion done; 9149 + struct completion *sync_reads; 9186 9150 void *uring_ctx; 9187 9151 refcount_t pending_refs; 9188 9152 blk_status_t status; ··· 9194 9158 9195 9159 if (bbio->bio.bi_status) { 9196 9160 /* 9197 - * The memory barrier implied by the atomic_dec_return() here 9198 - * pairs with the memory barrier implied by the 9199 - * atomic_dec_return() or io_wait_event() in 9200 - * btrfs_encoded_read_regular_fill_pages() to ensure that this 9201 - * write is observed before the load of status in 9161 + * The memory barrier implied by the refcount_dec_and_test() here 9162 + * pairs with the memory barrier implied by the refcount_dec_and_test() 9163 + * in btrfs_encoded_read_regular_fill_pages() to ensure that 9164 + * this write is observed before the load of status in 9202 9165 * btrfs_encoded_read_regular_fill_pages(). 9203 9166 */ 9204 9167 WRITE_ONCE(priv->status, bbio->bio.bi_status); ··· 9209 9174 btrfs_uring_read_extent_endio(priv->uring_ctx, err); 9210 9175 kfree(priv); 9211 9176 } else { 9212 - complete(&priv->done); 9177 + complete(priv->sync_reads); 9213 9178 } 9214 9179 } 9215 9180 bio_put(&bbio->bio); ··· 9220 9185 struct page **pages, void *uring_ctx) 9221 9186 { 9222 9187 struct btrfs_fs_info *fs_info = inode->root->fs_info; 9223 - struct btrfs_encoded_read_private *priv; 9188 + struct btrfs_encoded_read_private *priv, sync_priv; 9189 + struct completion sync_reads; 9224 9190 unsigned long i = 0; 9225 9191 struct btrfs_bio *bbio; 9226 9192 int ret; 9227 9193 9228 - priv = kmalloc(sizeof(struct btrfs_encoded_read_private), GFP_NOFS); 9229 - if (!priv) 9230 - return -ENOMEM; 9194 + /* 9195 + * Fast path for synchronous reads which completes in this call, io_uring 9196 + * needs longer time span. 9197 + */ 9198 + if (uring_ctx) { 9199 + priv = kmalloc(sizeof(struct btrfs_encoded_read_private), GFP_NOFS); 9200 + if (!priv) 9201 + return -ENOMEM; 9202 + } else { 9203 + priv = &sync_priv; 9204 + init_completion(&sync_reads); 9205 + priv->sync_reads = &sync_reads; 9206 + } 9231 9207 9232 - init_completion(&priv->done); 9233 9208 refcount_set(&priv->pending_refs, 1); 9234 9209 priv->status = 0; 9235 9210 priv->uring_ctx = uring_ctx; ··· 9282 9237 return -EIOCBQUEUED; 9283 9238 } else { 9284 9239 if (!refcount_dec_and_test(&priv->pending_refs)) 9285 - wait_for_completion_io(&priv->done); 9240 + wait_for_completion_io(&sync_reads); 9286 9241 /* See btrfs_encoded_read_endio() for ordering. */ 9287 - ret = blk_status_to_errno(READ_ONCE(priv->status)); 9288 - kfree(priv); 9289 - return ret; 9242 + return blk_status_to_errno(READ_ONCE(priv->status)); 9290 9243 } 9291 9244 } 9292 9245

+108 -109

fs/btrfs/ioctl.c

··· 118 118 #endif 119 119 120 120 /* Mask out flags that are inappropriate for the given type of inode. */ 121 - static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode, 122 - unsigned int flags) 121 + static unsigned int btrfs_mask_fsflags_for_type(const struct inode *inode, 122 + unsigned int flags) 123 123 { 124 124 if (S_ISDIR(inode->i_mode)) 125 125 return flags; ··· 133 133 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS 134 134 * ioctl. 135 135 */ 136 - static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode) 136 + static unsigned int btrfs_inode_flags_to_fsflags(const struct btrfs_inode *inode) 137 137 { 138 138 unsigned int iflags = 0; 139 - u32 flags = binode->flags; 140 - u32 ro_flags = binode->ro_flags; 139 + u32 flags = inode->flags; 140 + u32 ro_flags = inode->ro_flags; 141 141 142 142 if (flags & BTRFS_INODE_SYNC) 143 143 iflags |= FS_SYNC_FL; ··· 167 167 /* 168 168 * Update inode->i_flags based on the btrfs internal flags. 169 169 */ 170 - void btrfs_sync_inode_flags_to_i_flags(struct inode *inode) 170 + void btrfs_sync_inode_flags_to_i_flags(struct btrfs_inode *inode) 171 171 { 172 - struct btrfs_inode *binode = BTRFS_I(inode); 173 172 unsigned int new_fl = 0; 174 173 175 - if (binode->flags & BTRFS_INODE_SYNC) 174 + if (inode->flags & BTRFS_INODE_SYNC) 176 175 new_fl |= S_SYNC; 177 - if (binode->flags & BTRFS_INODE_IMMUTABLE) 176 + if (inode->flags & BTRFS_INODE_IMMUTABLE) 178 177 new_fl |= S_IMMUTABLE; 179 - if (binode->flags & BTRFS_INODE_APPEND) 178 + if (inode->flags & BTRFS_INODE_APPEND) 180 179 new_fl |= S_APPEND; 181 - if (binode->flags & BTRFS_INODE_NOATIME) 180 + if (inode->flags & BTRFS_INODE_NOATIME) 182 181 new_fl |= S_NOATIME; 183 - if (binode->flags & BTRFS_INODE_DIRSYNC) 182 + if (inode->flags & BTRFS_INODE_DIRSYNC) 184 183 new_fl |= S_DIRSYNC; 185 - if (binode->ro_flags & BTRFS_INODE_RO_VERITY) 184 + if (inode->ro_flags & BTRFS_INODE_RO_VERITY) 186 185 new_fl |= S_VERITY; 187 186 188 - set_mask_bits(&inode->i_flags, 187 + set_mask_bits(&inode->vfs_inode.i_flags, 189 188 S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC | 190 189 S_VERITY, new_fl); 191 190 } ··· 218 219 return 0; 219 220 } 220 221 221 - static int check_fsflags_compatible(struct btrfs_fs_info *fs_info, 222 + static int check_fsflags_compatible(const struct btrfs_fs_info *fs_info, 222 223 unsigned int flags) 223 224 { 224 225 if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL)) ··· 247 248 */ 248 249 int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) 249 250 { 250 - struct btrfs_inode *binode = BTRFS_I(d_inode(dentry)); 251 + const struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); 251 252 252 - fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode)); 253 + fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(inode)); 253 254 return 0; 254 255 } 255 256 256 257 int btrfs_fileattr_set(struct mnt_idmap *idmap, 257 258 struct dentry *dentry, struct fileattr *fa) 258 259 { 259 - struct inode *inode = d_inode(dentry); 260 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 261 - struct btrfs_inode *binode = BTRFS_I(inode); 262 - struct btrfs_root *root = binode->root; 260 + struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); 261 + struct btrfs_root *root = inode->root; 262 + struct btrfs_fs_info *fs_info = root->fs_info; 263 263 struct btrfs_trans_handle *trans; 264 264 unsigned int fsflags, old_fsflags; 265 265 int ret; 266 266 const char *comp = NULL; 267 - u32 binode_flags; 267 + u32 inode_flags; 268 268 269 269 if (btrfs_root_readonly(root)) 270 270 return -EROFS; ··· 271 273 if (fileattr_has_fsx(fa)) 272 274 return -EOPNOTSUPP; 273 275 274 - fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags); 275 - old_fsflags = btrfs_inode_flags_to_fsflags(binode); 276 + fsflags = btrfs_mask_fsflags_for_type(&inode->vfs_inode, fa->flags); 277 + old_fsflags = btrfs_inode_flags_to_fsflags(inode); 276 278 ret = check_fsflags(old_fsflags, fsflags); 277 279 if (ret) 278 280 return ret; ··· 281 283 if (ret) 282 284 return ret; 283 285 284 - binode_flags = binode->flags; 286 + inode_flags = inode->flags; 285 287 if (fsflags & FS_SYNC_FL) 286 - binode_flags |= BTRFS_INODE_SYNC; 288 + inode_flags |= BTRFS_INODE_SYNC; 287 289 else 288 - binode_flags &= ~BTRFS_INODE_SYNC; 290 + inode_flags &= ~BTRFS_INODE_SYNC; 289 291 if (fsflags & FS_IMMUTABLE_FL) 290 - binode_flags |= BTRFS_INODE_IMMUTABLE; 292 + inode_flags |= BTRFS_INODE_IMMUTABLE; 291 293 else 292 - binode_flags &= ~BTRFS_INODE_IMMUTABLE; 294 + inode_flags &= ~BTRFS_INODE_IMMUTABLE; 293 295 if (fsflags & FS_APPEND_FL) 294 - binode_flags |= BTRFS_INODE_APPEND; 296 + inode_flags |= BTRFS_INODE_APPEND; 295 297 else 296 - binode_flags &= ~BTRFS_INODE_APPEND; 298 + inode_flags &= ~BTRFS_INODE_APPEND; 297 299 if (fsflags & FS_NODUMP_FL) 298 - binode_flags |= BTRFS_INODE_NODUMP; 300 + inode_flags |= BTRFS_INODE_NODUMP; 299 301 else 300 - binode_flags &= ~BTRFS_INODE_NODUMP; 302 + inode_flags &= ~BTRFS_INODE_NODUMP; 301 303 if (fsflags & FS_NOATIME_FL) 302 - binode_flags |= BTRFS_INODE_NOATIME; 304 + inode_flags |= BTRFS_INODE_NOATIME; 303 305 else 304 - binode_flags &= ~BTRFS_INODE_NOATIME; 306 + inode_flags &= ~BTRFS_INODE_NOATIME; 305 307 306 308 /* If coming from FS_IOC_FSSETXATTR then skip unconverted flags */ 307 309 if (!fa->flags_valid) { ··· 313 315 } 314 316 315 317 if (fsflags & FS_DIRSYNC_FL) 316 - binode_flags |= BTRFS_INODE_DIRSYNC; 318 + inode_flags |= BTRFS_INODE_DIRSYNC; 317 319 else 318 - binode_flags &= ~BTRFS_INODE_DIRSYNC; 320 + inode_flags &= ~BTRFS_INODE_DIRSYNC; 319 321 if (fsflags & FS_NOCOW_FL) { 320 - if (S_ISREG(inode->i_mode)) { 322 + if (S_ISREG(inode->vfs_inode.i_mode)) { 321 323 /* 322 324 * It's safe to turn csums off here, no extents exist. 323 325 * Otherwise we want the flag to reflect the real COW 324 326 * status of the file and will not set it. 325 327 */ 326 - if (inode->i_size == 0) 327 - binode_flags |= BTRFS_INODE_NODATACOW | 328 - BTRFS_INODE_NODATASUM; 328 + if (inode->vfs_inode.i_size == 0) 329 + inode_flags |= BTRFS_INODE_NODATACOW | 330 + BTRFS_INODE_NODATASUM; 329 331 } else { 330 - binode_flags |= BTRFS_INODE_NODATACOW; 332 + inode_flags |= BTRFS_INODE_NODATACOW; 331 333 } 332 334 } else { 333 335 /* 334 336 * Revert back under same assumptions as above 335 337 */ 336 - if (S_ISREG(inode->i_mode)) { 337 - if (inode->i_size == 0) 338 - binode_flags &= ~(BTRFS_INODE_NODATACOW | 339 - BTRFS_INODE_NODATASUM); 338 + if (S_ISREG(inode->vfs_inode.i_mode)) { 339 + if (inode->vfs_inode.i_size == 0) 340 + inode_flags &= ~(BTRFS_INODE_NODATACOW | 341 + BTRFS_INODE_NODATASUM); 340 342 } else { 341 - binode_flags &= ~BTRFS_INODE_NODATACOW; 343 + inode_flags &= ~BTRFS_INODE_NODATACOW; 342 344 } 343 345 } 344 346 ··· 348 350 * things smaller. 349 351 */ 350 352 if (fsflags & FS_NOCOMP_FL) { 351 - binode_flags &= ~BTRFS_INODE_COMPRESS; 352 - binode_flags |= BTRFS_INODE_NOCOMPRESS; 353 + inode_flags &= ~BTRFS_INODE_COMPRESS; 354 + inode_flags |= BTRFS_INODE_NOCOMPRESS; 353 355 } else if (fsflags & FS_COMPR_FL) { 354 356 355 - if (IS_SWAPFILE(inode)) 357 + if (IS_SWAPFILE(&inode->vfs_inode)) 356 358 return -ETXTBSY; 357 359 358 - binode_flags |= BTRFS_INODE_COMPRESS; 359 - binode_flags &= ~BTRFS_INODE_NOCOMPRESS; 360 + inode_flags |= BTRFS_INODE_COMPRESS; 361 + inode_flags &= ~BTRFS_INODE_NOCOMPRESS; 360 362 361 363 comp = btrfs_compress_type2str(fs_info->compress_type); 362 364 if (!comp || comp[0] == 0) 363 365 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); 364 366 } else { 365 - binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 367 + inode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 366 368 } 367 369 368 370 /* ··· 374 376 return PTR_ERR(trans); 375 377 376 378 if (comp) { 377 - ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression", 379 + ret = btrfs_set_prop(trans, inode, "btrfs.compression", 378 380 comp, strlen(comp), 0); 379 381 if (ret) { 380 382 btrfs_abort_transaction(trans, ret); 381 383 goto out_end_trans; 382 384 } 383 385 } else { 384 - ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression", 385 - NULL, 0, 0); 386 + ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); 386 387 if (ret && ret != -ENODATA) { 387 388 btrfs_abort_transaction(trans, ret); 388 389 goto out_end_trans; ··· 389 392 } 390 393 391 394 update_flags: 392 - binode->flags = binode_flags; 395 + inode->flags = inode_flags; 396 + btrfs_update_inode_mapping_flags(inode); 393 397 btrfs_sync_inode_flags_to_i_flags(inode); 394 - inode_inc_iversion(inode); 395 - inode_set_ctime_current(inode); 396 - ret = btrfs_update_inode(trans, BTRFS_I(inode)); 398 + inode_inc_iversion(&inode->vfs_inode); 399 + inode_set_ctime_current(&inode->vfs_inode); 400 + ret = btrfs_update_inode(trans, inode); 397 401 398 402 out_end_trans: 399 403 btrfs_end_transaction(trans); 400 404 return ret; 401 405 } 402 406 403 - static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg) 407 + static int btrfs_ioctl_getversion(const struct inode *inode, int __user *arg) 404 408 { 405 409 return put_user(inode->i_generation, arg); 406 410 } ··· 473 475 * Calculate the number of transaction items to reserve for creating a subvolume 474 476 * or snapshot, not including the inode, directory entries, or parent directory. 475 477 */ 476 - static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit) 478 + static unsigned int create_subvol_num_items(const struct btrfs_qgroup_inherit *inherit) 477 479 { 478 480 /* 479 481 * 1 to add root block ··· 615 617 btrfs_set_root_dirid(root_item, BTRFS_FIRST_FREE_OBJECTID); 616 618 617 619 key.objectid = objectid; 618 - key.offset = 0; 619 620 key.type = BTRFS_ROOT_ITEM_KEY; 621 + key.offset = 0; 620 622 ret = btrfs_insert_root(trans, fs_info->tree_root, &key, 621 623 root_item); 622 624 if (ret) { ··· 876 878 877 879 /* copy of may_create in fs/namei.c() */ 878 880 static inline int btrfs_may_create(struct mnt_idmap *idmap, 879 - struct inode *dir, struct dentry *child) 881 + struct inode *dir, const struct dentry *child) 880 882 { 881 883 if (d_really_is_positive(child)) 882 884 return -EEXIST; ··· 1031 1033 void __user *arg) 1032 1034 { 1033 1035 BTRFS_DEV_LOOKUP_ARGS(args); 1034 - struct inode *inode = file_inode(file); 1035 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 1036 + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 1037 + struct btrfs_fs_info *fs_info = root->fs_info; 1036 1038 u64 new_size; 1037 1039 u64 old_size; 1038 1040 u64 devid = 1; 1039 - struct btrfs_root *root = BTRFS_I(inode)->root; 1040 1041 struct btrfs_ioctl_vol_args *vol_args; 1041 - struct btrfs_trans_handle *trans; 1042 1042 struct btrfs_device *device = NULL; 1043 1043 char *sizestr; 1044 - char *retptr; 1045 1044 char *devstr = NULL; 1046 1045 int ret = 0; 1047 1046 int mod = 0; ··· 1106 1111 if (!strcmp(sizestr, "max")) 1107 1112 new_size = bdev_nr_bytes(device->bdev); 1108 1113 else { 1114 + char *retptr; 1115 + 1109 1116 if (sizestr[0] == '-') { 1110 1117 mod = -1; 1111 1118 sizestr++; ··· 1155 1158 new_size = round_down(new_size, fs_info->sectorsize); 1156 1159 1157 1160 if (new_size > old_size) { 1161 + struct btrfs_trans_handle *trans; 1162 + 1158 1163 trans = btrfs_start_transaction(root, 0); 1159 1164 if (IS_ERR(trans)) { 1160 1165 ret = PTR_ERR(trans); ··· 1335 1336 return ret; 1336 1337 } 1337 1338 1338 - static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode, 1339 + static noinline int btrfs_ioctl_subvol_getflags(struct btrfs_inode *inode, 1339 1340 void __user *arg) 1340 1341 { 1341 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 1342 - struct btrfs_root *root = BTRFS_I(inode)->root; 1342 + struct btrfs_root *root = inode->root; 1343 + struct btrfs_fs_info *fs_info = root->fs_info; 1343 1344 int ret = 0; 1344 1345 u64 flags = 0; 1345 1346 1346 - if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) 1347 + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) 1347 1348 return -EINVAL; 1348 1349 1349 1350 down_read(&fs_info->subvol_sem); ··· 1446 1447 return ret; 1447 1448 } 1448 1449 1449 - static noinline int key_in_sk(struct btrfs_key *key, 1450 - struct btrfs_ioctl_search_key *sk) 1450 + static noinline int key_in_sk(const struct btrfs_key *key, 1451 + const struct btrfs_ioctl_search_key *sk) 1451 1452 { 1452 1453 struct btrfs_key test; 1453 1454 int ret; ··· 1472 1473 1473 1474 static noinline int copy_to_sk(struct btrfs_path *path, 1474 1475 struct btrfs_key *key, 1475 - struct btrfs_ioctl_search_key *sk, 1476 + const struct btrfs_ioctl_search_key *sk, 1476 1477 u64 *buf_size, 1477 1478 char __user *ubuf, 1478 1479 unsigned long *sk_offset, ··· 1529 1530 } 1530 1531 1531 1532 sh.objectid = key->objectid; 1532 - sh.offset = key->offset; 1533 1533 sh.type = key->type; 1534 + sh.offset = key->offset; 1534 1535 sh.len = item_len; 1535 1536 sh.transid = found_transid; 1536 1537 ··· 1603 1604 return ret; 1604 1605 } 1605 1606 1606 - static noinline int search_ioctl(struct inode *inode, 1607 + static noinline int search_ioctl(struct btrfs_root *root, 1607 1608 struct btrfs_ioctl_search_key *sk, 1608 1609 u64 *buf_size, 1609 1610 char __user *ubuf) 1610 1611 { 1611 - struct btrfs_fs_info *info = inode_to_fs_info(inode); 1612 - struct btrfs_root *root; 1612 + struct btrfs_fs_info *info = root->fs_info; 1613 1613 struct btrfs_key key; 1614 1614 struct btrfs_path *path; 1615 1615 int ret; ··· 1625 1627 return -ENOMEM; 1626 1628 1627 1629 if (sk->tree_id == 0) { 1628 - /* search the root of the inode that was passed */ 1629 - root = btrfs_grab_root(BTRFS_I(inode)->root); 1630 + /* Search the root that we got passed. */ 1631 + root = btrfs_grab_root(root); 1630 1632 } else { 1633 + /* Look up the root from the arguments. */ 1631 1634 root = btrfs_get_fs_root(info, sk->tree_id, true); 1632 1635 if (IS_ERR(root)) { 1633 1636 btrfs_free_path(path); ··· 1641 1642 key.offset = sk->min_offset; 1642 1643 1643 1644 while (1) { 1644 - ret = -EFAULT; 1645 1645 /* 1646 1646 * Ensure that the whole user buffer is faulted in at sub-page 1647 1647 * granularity, otherwise the loop may live-lock. 1648 1648 */ 1649 - if (fault_in_subpage_writeable(ubuf + sk_offset, 1650 - *buf_size - sk_offset)) 1649 + if (fault_in_subpage_writeable(ubuf + sk_offset, *buf_size - sk_offset)) { 1650 + ret = -EFAULT; 1651 1651 break; 1652 + } 1652 1653 1653 1654 ret = btrfs_search_forward(root, &key, path, sk->min_transid); 1654 - if (ret != 0) { 1655 - if (ret > 0) 1656 - ret = 0; 1657 - goto err; 1658 - } 1655 + if (ret) 1656 + break; 1657 + 1659 1658 ret = copy_to_sk(path, &key, sk, buf_size, ubuf, 1660 1659 &sk_offset, &num_found); 1661 1660 btrfs_release_path(path); ··· 1661 1664 break; 1662 1665 1663 1666 } 1667 + /* Normalize return values from btrfs_search_forward() and copy_to_sk(). */ 1664 1668 if (ret > 0) 1665 1669 ret = 0; 1666 - err: 1670 + 1667 1671 sk->nr_items = num_found; 1668 1672 btrfs_put_root(root); 1669 1673 btrfs_free_path(path); 1670 1674 return ret; 1671 1675 } 1672 1676 1673 - static noinline int btrfs_ioctl_tree_search(struct inode *inode, 1677 + static noinline int btrfs_ioctl_tree_search(struct btrfs_root *root, 1674 1678 void __user *argp) 1675 1679 { 1676 1680 struct btrfs_ioctl_search_args __user *uargs = argp; ··· 1687 1689 1688 1690 buf_size = sizeof(uargs->buf); 1689 1691 1690 - ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); 1692 + ret = search_ioctl(root, &sk, &buf_size, uargs->buf); 1691 1693 1692 1694 /* 1693 1695 * In the origin implementation an overflow is handled by returning a ··· 1701 1703 return ret; 1702 1704 } 1703 1705 1704 - static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode, 1706 + static noinline int btrfs_ioctl_tree_search_v2(struct btrfs_root *root, 1705 1707 void __user *argp) 1706 1708 { 1707 1709 struct btrfs_ioctl_search_args_v2 __user *uarg = argp; ··· 1723 1725 if (buf_size > buf_limit) 1724 1726 buf_size = buf_limit; 1725 1727 1726 - ret = search_ioctl(inode, &args.key, &buf_size, 1728 + ret = search_ioctl(root, &args.key, &buf_size, 1727 1729 (char __user *)(&uarg->buf[0])); 1728 1730 if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) 1729 1731 ret = -EFAULT; ··· 1831 1833 struct btrfs_path *path; 1832 1834 struct btrfs_key key, key2; 1833 1835 struct extent_buffer *leaf; 1834 - struct inode *temp_inode; 1835 1836 char *ptr; 1836 1837 int slot; 1837 1838 int len; ··· 1858 1861 key.type = BTRFS_INODE_REF_KEY; 1859 1862 key.offset = (u64)-1; 1860 1863 while (1) { 1864 + struct btrfs_inode *temp_inode; 1865 + 1861 1866 ret = btrfs_search_backwards(root, &key, path); 1862 1867 if (ret < 0) 1863 1868 goto out_put; ··· 1914 1915 ret = PTR_ERR(temp_inode); 1915 1916 goto out_put; 1916 1917 } 1917 - ret = inode_permission(idmap, temp_inode, 1918 + ret = inode_permission(idmap, &temp_inode->vfs_inode, 1918 1919 MAY_READ | MAY_EXEC); 1919 - iput(temp_inode); 1920 + iput(&temp_inode->vfs_inode); 1920 1921 if (ret) { 1921 1922 ret = -EACCES; 1922 1923 goto out_put; ··· 2570 2571 /* the rest are all set to zero by kzalloc */ 2571 2572 range.len = (u64)-1; 2572 2573 } 2573 - ret = btrfs_defrag_file(file_inode(file), &file->f_ra, 2574 + ret = btrfs_defrag_file(BTRFS_I(file_inode(file)), &file->f_ra, 2574 2575 &range, BTRFS_OLDEST_GENERATION, 0); 2575 2576 if (ret > 0) 2576 2577 ret = 0; ··· 2762 2763 return ret; 2763 2764 } 2764 2765 2765 - static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, 2766 + static long btrfs_ioctl_fs_info(const struct btrfs_fs_info *fs_info, 2766 2767 void __user *arg) 2767 2768 { 2768 2769 struct btrfs_ioctl_fs_info_args *fi_args; ··· 2816 2817 return ret; 2817 2818 } 2818 2819 2819 - static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, 2820 + static long btrfs_ioctl_dev_info(const struct btrfs_fs_info *fs_info, 2820 2821 void __user *arg) 2821 2822 { 2822 2823 BTRFS_DEV_LOOKUP_ARGS(args); ··· 4247 4248 return 0; 4248 4249 } 4249 4250 4250 - static int check_feature_bits(struct btrfs_fs_info *fs_info, 4251 + static int check_feature_bits(const struct btrfs_fs_info *fs_info, 4251 4252 enum btrfs_feature_set set, 4252 4253 u64 change_mask, u64 flags, u64 supported_flags, 4253 4254 u64 safe_set, u64 safe_clear) ··· 4383 4384 return ret; 4384 4385 } 4385 4386 4386 - static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool compat) 4387 + static int _btrfs_ioctl_send(struct btrfs_root *root, void __user *argp, bool compat) 4387 4388 { 4388 4389 struct btrfs_ioctl_send_args *arg; 4389 4390 int ret; ··· 4414 4415 if (IS_ERR(arg)) 4415 4416 return PTR_ERR(arg); 4416 4417 } 4417 - ret = btrfs_ioctl_send(inode, arg); 4418 + ret = btrfs_ioctl_send(root, arg); 4418 4419 kfree(arg); 4419 4420 return ret; 4420 4421 } ··· 5241 5242 case BTRFS_IOC_SNAP_DESTROY_V2: 5242 5243 return btrfs_ioctl_snap_destroy(file, argp, true); 5243 5244 case BTRFS_IOC_SUBVOL_GETFLAGS: 5244 - return btrfs_ioctl_subvol_getflags(inode, argp); 5245 + return btrfs_ioctl_subvol_getflags(BTRFS_I(inode), argp); 5245 5246 case BTRFS_IOC_SUBVOL_SETFLAGS: 5246 5247 return btrfs_ioctl_subvol_setflags(file, argp); 5247 5248 case BTRFS_IOC_DEFAULT_SUBVOL: ··· 5263 5264 case BTRFS_IOC_DEV_INFO: 5264 5265 return btrfs_ioctl_dev_info(fs_info, argp); 5265 5266 case BTRFS_IOC_TREE_SEARCH: 5266 - return btrfs_ioctl_tree_search(inode, argp); 5267 + return btrfs_ioctl_tree_search(root, argp); 5267 5268 case BTRFS_IOC_TREE_SEARCH_V2: 5268 - return btrfs_ioctl_tree_search_v2(inode, argp); 5269 + return btrfs_ioctl_tree_search_v2(root, argp); 5269 5270 case BTRFS_IOC_INO_LOOKUP: 5270 5271 return btrfs_ioctl_ino_lookup(root, argp); 5271 5272 case BTRFS_IOC_INO_PATHS: ··· 5313 5314 return btrfs_ioctl_set_received_subvol_32(file, argp); 5314 5315 #endif 5315 5316 case BTRFS_IOC_SEND: 5316 - return _btrfs_ioctl_send(BTRFS_I(inode), argp, false); 5317 + return _btrfs_ioctl_send(root, argp, false); 5317 5318 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 5318 5319 case BTRFS_IOC_SEND_32: 5319 - return _btrfs_ioctl_send(BTRFS_I(inode), argp, true); 5320 + return _btrfs_ioctl_send(root, argp, true); 5320 5321 #endif 5321 5322 case BTRFS_IOC_GET_DEV_STATS: 5322 5323 return btrfs_ioctl_get_dev_stats(fs_info, argp);

+3 -1

fs/btrfs/ioctl.h

··· 9 9 struct dentry; 10 10 struct mnt_idmap; 11 11 struct fileattr; 12 + struct io_uring_cmd; 13 + struct btrfs_inode; 12 14 struct btrfs_fs_info; 13 15 struct btrfs_ioctl_balance_args; 14 16 ··· 20 18 int btrfs_fileattr_set(struct mnt_idmap *idmap, 21 19 struct dentry *dentry, struct fileattr *fa); 22 20 int btrfs_ioctl_get_supported_features(void __user *arg); 23 - void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); 21 + void btrfs_sync_inode_flags_to_i_flags(struct btrfs_inode *inode); 24 22 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, 25 23 struct btrfs_ioctl_balance_args *bargs); 26 24 int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);

-1

fs/btrfs/locking.c

··· 9 9 #include <linux/page-flags.h> 10 10 #include <asm/bug.h> 11 11 #include <trace/events/btrfs.h> 12 - #include "misc.h" 13 12 #include "ctree.h" 14 13 #include "extent_io.h" 15 14 #include "locking.h"

+18 -5

fs/btrfs/ordered-data.c

··· 842 842 /* 843 843 * Start IO and wait for a given ordered extent to finish. 844 844 * 845 - * Wait on page writeback for all the pages in the extent and the IO completion 846 - * code to insert metadata into the btree corresponding to the extent. 845 + * Wait on page writeback for all the pages in the extent but not in 846 + * [@nowriteback_start, @nowriteback_start + @nowriteback_len) and the 847 + * IO completion code to insert metadata into the btree corresponding to the extent. 847 848 */ 848 - void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry) 849 + void btrfs_start_ordered_extent_nowriteback(struct btrfs_ordered_extent *entry, 850 + u64 nowriteback_start, u32 nowriteback_len) 849 851 { 850 852 u64 start = entry->file_offset; 851 853 u64 end = start + entry->num_bytes - 1; ··· 867 865 * start IO on any dirty ones so the wait doesn't stall waiting 868 866 * for the flusher thread to find them 869 867 */ 870 - if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) 871 - filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); 868 + if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) { 869 + if (!nowriteback_len) { 870 + filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); 871 + } else { 872 + if (start < nowriteback_start) 873 + filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, 874 + nowriteback_start - 1); 875 + if (nowriteback_start + nowriteback_len < end) 876 + filemap_fdatawrite_range(inode->vfs_inode.i_mapping, 877 + nowriteback_start + nowriteback_len, 878 + end); 879 + } 880 + } 872 881 873 882 if (!freespace_inode) 874 883 btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);

+8 -1

fs/btrfs/ordered-data.h

··· 17 17 struct inode; 18 18 struct page; 19 19 struct extent_state; 20 + struct btrfs_block_group; 20 21 struct btrfs_inode; 21 22 struct btrfs_root; 22 23 struct btrfs_fs_info; ··· 192 191 struct btrfs_ordered_sum *sum); 193 192 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, 194 193 u64 file_offset); 195 - void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry); 194 + void btrfs_start_ordered_extent_nowriteback(struct btrfs_ordered_extent *entry, 195 + u64 nowriteback_start, u32 nowriteback_len); 196 + static inline void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry) 197 + { 198 + return btrfs_start_ordered_extent_nowriteback(entry, 0, 0); 199 + } 200 + 196 201 int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len); 197 202 struct btrfs_ordered_extent * 198 203 btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset);

+2

fs/btrfs/print-tree.h

··· 6 6 #ifndef BTRFS_PRINT_TREE_H 7 7 #define BTRFS_PRINT_TREE_H 8 8 9 + #include <linux/types.h> 10 + 9 11 /* Buffer size to contain tree name and possibly additional data (offset) */ 10 12 #define BTRFS_ROOT_NAME_BUF_LEN 48 11 13

+33 -33

fs/btrfs/props.c

··· 26 26 const char *xattr_name; 27 27 int (*validate)(const struct btrfs_inode *inode, const char *value, 28 28 size_t len); 29 - int (*apply)(struct inode *inode, const char *value, size_t len); 30 - const char *(*extract)(const struct inode *inode); 29 + int (*apply)(struct btrfs_inode *inode, const char *value, size_t len); 30 + const char *(*extract)(const struct btrfs_inode *inode); 31 31 bool (*ignore)(const struct btrfs_inode *inode); 32 32 int inheritable; 33 33 }; ··· 121 121 if (ret) 122 122 return ret; 123 123 124 - ret = handler->apply(&inode->vfs_inode, NULL, 0); 124 + ret = handler->apply(inode, NULL, 0); 125 125 ASSERT(ret == 0); 126 126 127 127 return ret; ··· 131 131 value_len, flags); 132 132 if (ret) 133 133 return ret; 134 - ret = handler->apply(&inode->vfs_inode, value, value_len); 134 + ret = handler->apply(inode, value, value_len); 135 135 if (ret) { 136 136 btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name, NULL, 137 137 0, flags); ··· 263 263 struct btrfs_root *root = BTRFS_I(inode)->root; 264 264 int ret; 265 265 266 - ret = handler->apply(inode, value, len); 266 + ret = handler->apply(BTRFS_I(inode), value, len); 267 267 if (unlikely(ret)) 268 268 btrfs_warn(root->fs_info, 269 269 "error applying prop %s to ino %llu (root %llu): %d", ··· 273 273 set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags); 274 274 } 275 275 276 - int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) 276 + int btrfs_load_inode_props(struct btrfs_inode *inode, struct btrfs_path *path) 277 277 { 278 - struct btrfs_root *root = BTRFS_I(inode)->root; 279 - u64 ino = btrfs_ino(BTRFS_I(inode)); 278 + struct btrfs_root *root = inode->root; 279 + u64 ino = btrfs_ino(inode); 280 280 281 - return iterate_object_props(root, path, ino, inode_prop_iterator, inode); 281 + return iterate_object_props(root, path, ino, inode_prop_iterator, 282 + &inode->vfs_inode); 282 283 } 283 284 284 285 static int prop_compression_validate(const struct btrfs_inode *inode, ··· 301 300 return -EINVAL; 302 301 } 303 302 304 - static int prop_compression_apply(struct inode *inode, const char *value, 303 + static int prop_compression_apply(struct btrfs_inode *inode, const char *value, 305 304 size_t len) 306 305 { 307 - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); 306 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 308 307 int type; 309 308 310 309 /* Reset to defaults */ 311 310 if (len == 0) { 312 - BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 313 - BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 314 - BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE; 311 + inode->flags &= ~BTRFS_INODE_COMPRESS; 312 + inode->flags &= ~BTRFS_INODE_NOCOMPRESS; 313 + inode->prop_compress = BTRFS_COMPRESS_NONE; 315 314 return 0; 316 315 } 317 316 318 317 /* Set NOCOMPRESS flag */ 319 318 if ((len == 2 && strncmp("no", value, 2) == 0) || 320 319 (len == 4 && strncmp("none", value, 4) == 0)) { 321 - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 322 - BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 323 - BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE; 320 + inode->flags |= BTRFS_INODE_NOCOMPRESS; 321 + inode->flags &= ~BTRFS_INODE_COMPRESS; 322 + inode->prop_compress = BTRFS_COMPRESS_NONE; 324 323 325 324 return 0; 326 325 } ··· 337 336 return -EINVAL; 338 337 } 339 338 340 - BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 341 - BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 342 - BTRFS_I(inode)->prop_compress = type; 339 + inode->flags &= ~BTRFS_INODE_NOCOMPRESS; 340 + inode->flags |= BTRFS_INODE_COMPRESS; 341 + inode->prop_compress = type; 343 342 344 343 return 0; 345 344 } ··· 360 359 return false; 361 360 } 362 361 363 - static const char *prop_compression_extract(const struct inode *inode) 362 + static const char *prop_compression_extract(const struct btrfs_inode *inode) 364 363 { 365 - switch (BTRFS_I(inode)->prop_compress) { 364 + switch (inode->prop_compress) { 366 365 case BTRFS_COMPRESS_ZLIB: 367 366 case BTRFS_COMPRESS_LZO: 368 367 case BTRFS_COMPRESS_ZSTD: 369 - return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress); 368 + return btrfs_compress_type2str(inode->prop_compress); 370 369 default: 371 370 break; 372 371 } ··· 386 385 }; 387 386 388 387 int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, 389 - struct inode *inode, const struct inode *parent) 388 + struct btrfs_inode *inode, 389 + const struct btrfs_inode *parent) 390 390 { 391 - struct btrfs_root *root = BTRFS_I(inode)->root; 391 + struct btrfs_root *root = inode->root; 392 392 struct btrfs_fs_info *fs_info = root->fs_info; 393 393 int ret; 394 394 int i; 395 395 bool need_reserve = false; 396 396 397 - if (!test_bit(BTRFS_INODE_HAS_PROPS, 398 - &BTRFS_I(parent)->runtime_flags)) 397 + if (!test_bit(BTRFS_INODE_HAS_PROPS, &parent->runtime_flags)) 399 398 return 0; 400 399 401 400 for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) { ··· 406 405 if (!h->inheritable) 407 406 continue; 408 407 409 - if (h->ignore(BTRFS_I(inode))) 408 + if (h->ignore(inode)) 410 409 continue; 411 410 412 411 value = h->extract(parent); ··· 417 416 * This is not strictly necessary as the property should be 418 417 * valid, but in case it isn't, don't propagate it further. 419 418 */ 420 - ret = h->validate(BTRFS_I(inode), value, strlen(value)); 419 + ret = h->validate(inode, value, strlen(value)); 421 420 if (ret) 422 421 continue; 423 422 ··· 437 436 return ret; 438 437 } 439 438 440 - ret = btrfs_setxattr(trans, inode, h->xattr_name, value, 439 + ret = btrfs_setxattr(trans, &inode->vfs_inode, h->xattr_name, value, 441 440 strlen(value), 0); 442 441 if (!ret) { 443 442 ret = h->apply(inode, value, strlen(value)); 444 443 if (ret) 445 - btrfs_setxattr(trans, inode, h->xattr_name, 444 + btrfs_setxattr(trans, &inode->vfs_inode, h->xattr_name, 446 445 NULL, 0, 0); 447 446 else 448 - set_bit(BTRFS_INODE_HAS_PROPS, 449 - &BTRFS_I(inode)->runtime_flags); 447 + set_bit(BTRFS_INODE_HAS_PROPS, &inode->runtime_flags); 450 448 } 451 449 452 450 if (need_reserve) {

+4 -4

fs/btrfs/props.h

··· 6 6 #ifndef BTRFS_PROPS_H 7 7 #define BTRFS_PROPS_H 8 8 9 + #include <linux/types.h> 9 10 #include <linux/compiler_types.h> 10 11 11 - struct inode; 12 12 struct btrfs_inode; 13 13 struct btrfs_path; 14 14 struct btrfs_trans_handle; ··· 22 22 const char *value, size_t value_len); 23 23 bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name); 24 24 25 - int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); 25 + int btrfs_load_inode_props(struct btrfs_inode *inode, struct btrfs_path *path); 26 26 27 27 int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, 28 - struct inode *inode, 29 - const struct inode *dir); 28 + struct btrfs_inode *inode, 29 + const struct btrfs_inode *dir); 30 30 31 31 #endif

+1 -1

fs/btrfs/qgroup.c

··· 956 956 return -ENOMEM; 957 957 958 958 key.objectid = 0; 959 - key.offset = 0; 960 959 key.type = 0; 960 + key.offset = 0; 961 961 962 962 while (1) { 963 963 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);

+3

fs/btrfs/qgroup.h

··· 22 22 struct btrfs_trans_handle; 23 23 struct btrfs_delayed_ref_root; 24 24 struct btrfs_inode; 25 + struct btrfs_transaction; 26 + struct btrfs_block_group; 27 + struct btrfs_qgroup_swapped_blocks; 25 28 26 29 /* 27 30 * Btrfs qgroup overview

+1

fs/btrfs/raid-stripe-tree.h

··· 9 9 #include <linux/types.h> 10 10 #include <uapi/linux/btrfs_tree.h> 11 11 #include "fs.h" 12 + #include "accessors.h" 12 13 13 14 #define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP | \ 14 15 BTRFS_BLOCK_GROUP_RAID1_MASK | \

+51 -49

fs/btrfs/reflink.c

··· 165 165 * the source inode to destination inode when possible. When not possible we 166 166 * copy the inline extent's data into the respective page of the inode. 167 167 */ 168 - static int clone_copy_inline_extent(struct inode *dst, 168 + static int clone_copy_inline_extent(struct btrfs_inode *inode, 169 169 struct btrfs_path *path, 170 170 struct btrfs_key *new_key, 171 171 const u64 drop_start, ··· 175 175 char *inline_data, 176 176 struct btrfs_trans_handle **trans_out) 177 177 { 178 - struct btrfs_fs_info *fs_info = inode_to_fs_info(dst); 179 - struct btrfs_root *root = BTRFS_I(dst)->root; 178 + struct btrfs_root *root = inode->root; 179 + struct btrfs_fs_info *fs_info = root->fs_info; 180 180 const u64 aligned_end = ALIGN(new_key->offset + datal, 181 181 fs_info->sectorsize); 182 182 struct btrfs_trans_handle *trans = NULL; ··· 185 185 struct btrfs_key key; 186 186 187 187 if (new_key->offset > 0) { 188 - ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset, 188 + ret = copy_inline_to_page(inode, new_key->offset, 189 189 inline_data, size, datal, comp_type); 190 190 goto out; 191 191 } 192 192 193 - key.objectid = btrfs_ino(BTRFS_I(dst)); 193 + key.objectid = btrfs_ino(inode); 194 194 key.type = BTRFS_EXTENT_DATA_KEY; 195 195 key.offset = 0; 196 196 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ··· 205 205 goto copy_inline_extent; 206 206 } 207 207 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 208 - if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 208 + if (key.objectid == btrfs_ino(inode) && 209 209 key.type == BTRFS_EXTENT_DATA_KEY) { 210 210 /* 211 211 * There's an implicit hole at file offset 0, copy the ··· 214 214 ASSERT(key.offset > 0); 215 215 goto copy_to_page; 216 216 } 217 - } else if (i_size_read(dst) <= datal) { 217 + } else if (i_size_read(&inode->vfs_inode) <= datal) { 218 218 struct btrfs_file_extent_item *ei; 219 219 220 220 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], ··· 236 236 * We have no extent items, or we have an extent at offset 0 which may 237 237 * or may not be inlined. All these cases are dealt the same way. 238 238 */ 239 - if (i_size_read(dst) > datal) { 239 + if (i_size_read(&inode->vfs_inode) > datal) { 240 240 /* 241 241 * At the destination offset 0 we have either a hole, a regular 242 242 * extent or an inline extent larger then the one we want to ··· 270 270 drop_args.start = drop_start; 271 271 drop_args.end = aligned_end; 272 272 drop_args.drop_cache = true; 273 - ret = btrfs_drop_extents(trans, root, BTRFS_I(dst), &drop_args); 273 + ret = btrfs_drop_extents(trans, root, inode, &drop_args); 274 274 if (ret) 275 275 goto out; 276 276 ret = btrfs_insert_empty_item(trans, root, path, new_key, size); ··· 281 281 btrfs_item_ptr_offset(path->nodes[0], 282 282 path->slots[0]), 283 283 size); 284 - btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found); 285 - btrfs_set_inode_full_sync(BTRFS_I(dst)); 286 - ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); 284 + btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found); 285 + btrfs_set_inode_full_sync(inode); 286 + ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end); 287 287 out: 288 288 if (!ret && !trans) { 289 289 /* ··· 318 318 */ 319 319 btrfs_release_path(path); 320 320 321 - ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset, 321 + ret = copy_inline_to_page(inode, new_key->offset, 322 322 inline_data, size, datal, comp_type); 323 323 goto out; 324 324 } ··· 526 526 goto out; 527 527 } 528 528 529 - ret = clone_copy_inline_extent(inode, path, &new_key, 529 + ret = clone_copy_inline_extent(BTRFS_I(inode), path, &new_key, 530 530 drop_start, datal, size, 531 531 comp, buf, &trans); 532 532 if (ret) ··· 617 617 return ret; 618 618 } 619 619 620 - static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2) 620 + static void btrfs_double_mmap_lock(struct btrfs_inode *inode1, struct btrfs_inode *inode2) 621 621 { 622 622 if (inode1 < inode2) 623 623 swap(inode1, inode2); 624 - down_write(&BTRFS_I(inode1)->i_mmap_lock); 625 - down_write_nested(&BTRFS_I(inode2)->i_mmap_lock, SINGLE_DEPTH_NESTING); 624 + down_write(&inode1->i_mmap_lock); 625 + down_write_nested(&inode2->i_mmap_lock, SINGLE_DEPTH_NESTING); 626 626 } 627 627 628 - static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2) 628 + static void btrfs_double_mmap_unlock(struct btrfs_inode *inode1, struct btrfs_inode *inode2) 629 629 { 630 - up_write(&BTRFS_I(inode1)->i_mmap_lock); 631 - up_write(&BTRFS_I(inode2)->i_mmap_lock); 630 + up_write(&inode1->i_mmap_lock); 631 + up_write(&inode2->i_mmap_lock); 632 632 } 633 633 634 - static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, 635 - struct inode *dst, u64 dst_loff) 634 + static int btrfs_extent_same_range(struct btrfs_inode *src, u64 loff, u64 len, 635 + struct btrfs_inode *dst, u64 dst_loff) 636 636 { 637 637 const u64 end = dst_loff + len - 1; 638 638 struct extent_state *cached_state = NULL; 639 - struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info; 639 + struct btrfs_fs_info *fs_info = src->root->fs_info; 640 640 const u64 bs = fs_info->sectorsize; 641 641 int ret; 642 642 ··· 646 646 * because we have already locked the inode's i_mmap_lock in exclusive 647 647 * mode. 648 648 */ 649 - lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state); 650 - ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); 651 - unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state); 649 + lock_extent(&dst->io_tree, dst_loff, end, &cached_state); 650 + ret = btrfs_clone(&src->vfs_inode, &dst->vfs_inode, loff, len, 651 + ALIGN(len, bs), dst_loff, 1); 652 + unlock_extent(&dst->io_tree, dst_loff, end, &cached_state); 652 653 653 654 btrfs_btree_balance_dirty(fs_info); 654 655 ··· 679 678 chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); 680 679 681 680 for (i = 0; i < chunk_count; i++) { 682 - ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, 683 - dst, dst_loff); 681 + ret = btrfs_extent_same_range(BTRFS_I(src), loff, BTRFS_MAX_DEDUPE_LEN, 682 + BTRFS_I(dst), dst_loff); 684 683 if (ret) 685 684 goto out; 686 685 ··· 689 688 } 690 689 691 690 if (tail_len > 0) 692 - ret = btrfs_extent_same_range(src, loff, tail_len, dst, dst_loff); 691 + ret = btrfs_extent_same_range(BTRFS_I(src), loff, tail_len, 692 + BTRFS_I(dst), dst_loff); 693 693 out: 694 694 spin_lock(&root_dst->root_item_lock); 695 695 root_dst->dedupe_in_progress--; ··· 777 775 struct file *file_out, loff_t pos_out, 778 776 loff_t *len, unsigned int remap_flags) 779 777 { 780 - struct inode *inode_in = file_inode(file_in); 781 - struct inode *inode_out = file_inode(file_out); 782 - u64 bs = BTRFS_I(inode_out)->root->fs_info->sectorsize; 778 + struct btrfs_inode *inode_in = BTRFS_I(file_inode(file_in)); 779 + struct btrfs_inode *inode_out = BTRFS_I(file_inode(file_out)); 780 + u64 bs = inode_out->root->fs_info->sectorsize; 783 781 u64 wb_len; 784 782 int ret; 785 783 786 784 if (!(remap_flags & REMAP_FILE_DEDUP)) { 787 - struct btrfs_root *root_out = BTRFS_I(inode_out)->root; 785 + struct btrfs_root *root_out = inode_out->root; 788 786 789 787 if (btrfs_root_readonly(root_out)) 790 788 return -EROFS; 791 789 792 - ASSERT(inode_in->i_sb == inode_out->i_sb); 790 + ASSERT(inode_in->vfs_inode.i_sb == inode_out->vfs_inode.i_sb); 793 791 } 794 792 795 793 /* Don't make the dst file partly checksummed */ 796 - if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) != 797 - (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) { 794 + if ((inode_in->flags & BTRFS_INODE_NODATASUM) != 795 + (inode_out->flags & BTRFS_INODE_NODATASUM)) { 798 796 return -EINVAL; 799 797 } 800 798 ··· 813 811 * to complete so that new file extent items are in the fs tree. 814 812 */ 815 813 if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP)) 816 - wb_len = ALIGN(inode_in->i_size, bs) - ALIGN_DOWN(pos_in, bs); 814 + wb_len = ALIGN(inode_in->vfs_inode.i_size, bs) - ALIGN_DOWN(pos_in, bs); 817 815 else 818 816 wb_len = ALIGN(*len, bs); 819 817 ··· 834 832 * Also we don't need to check ASYNC_EXTENT, as async extent will be 835 833 * CoWed anyway, not affecting nocow part. 836 834 */ 837 - ret = filemap_flush(inode_in->i_mapping); 835 + ret = filemap_flush(inode_in->vfs_inode.i_mapping); 838 836 if (ret < 0) 839 837 return ret; 840 838 841 - ret = btrfs_wait_ordered_range(BTRFS_I(inode_in), ALIGN_DOWN(pos_in, bs), 842 - wb_len); 839 + ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), wb_len); 843 840 if (ret < 0) 844 841 return ret; 845 - ret = btrfs_wait_ordered_range(BTRFS_I(inode_out), ALIGN_DOWN(pos_out, bs), 846 - wb_len); 842 + ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), wb_len); 847 843 if (ret < 0) 848 844 return ret; 849 845 ··· 863 863 struct file *dst_file, loff_t destoff, loff_t len, 864 864 unsigned int remap_flags) 865 865 { 866 - struct inode *src_inode = file_inode(src_file); 867 - struct inode *dst_inode = file_inode(dst_file); 866 + struct btrfs_inode *src_inode = BTRFS_I(file_inode(src_file)); 867 + struct btrfs_inode *dst_inode = BTRFS_I(file_inode(dst_file)); 868 868 bool same_inode = dst_inode == src_inode; 869 869 int ret; 870 870 ··· 872 872 return -EINVAL; 873 873 874 874 if (same_inode) { 875 - btrfs_inode_lock(BTRFS_I(src_inode), BTRFS_ILOCK_MMAP); 875 + btrfs_inode_lock(src_inode, BTRFS_ILOCK_MMAP); 876 876 } else { 877 - lock_two_nondirectories(src_inode, dst_inode); 877 + lock_two_nondirectories(&src_inode->vfs_inode, &dst_inode->vfs_inode); 878 878 btrfs_double_mmap_lock(src_inode, dst_inode); 879 879 } 880 880 ··· 884 884 goto out_unlock; 885 885 886 886 if (remap_flags & REMAP_FILE_DEDUP) 887 - ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff); 887 + ret = btrfs_extent_same(&src_inode->vfs_inode, off, len, 888 + &dst_inode->vfs_inode, destoff); 888 889 else 889 890 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); 890 891 891 892 out_unlock: 892 893 if (same_inode) { 893 - btrfs_inode_unlock(BTRFS_I(src_inode), BTRFS_ILOCK_MMAP); 894 + btrfs_inode_unlock(src_inode, BTRFS_ILOCK_MMAP); 894 895 } else { 895 896 btrfs_double_mmap_unlock(src_inode, dst_inode); 896 - unlock_two_nondirectories(src_inode, dst_inode); 897 + unlock_two_nondirectories(&src_inode->vfs_inode, 898 + &dst_inode->vfs_inode); 897 899 } 898 900 899 901 /*

+16 -14

fs/btrfs/relocation.c

··· 3239 3239 return ret; 3240 3240 } 3241 3241 3242 - static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3243 - struct btrfs_block_group *block_group, 3242 + static int delete_block_group_cache(struct btrfs_block_group *block_group, 3244 3243 struct inode *inode, 3245 3244 u64 ino) 3246 3245 { 3246 + struct btrfs_fs_info *fs_info = block_group->fs_info; 3247 3247 struct btrfs_root *root = fs_info->tree_root; 3248 3248 struct btrfs_trans_handle *trans; 3249 + struct btrfs_inode *btrfs_inode; 3249 3250 int ret = 0; 3250 3251 3251 3252 if (inode) 3252 3253 goto truncate; 3253 3254 3254 - inode = btrfs_iget(ino, root); 3255 - if (IS_ERR(inode)) 3255 + btrfs_inode = btrfs_iget(ino, root); 3256 + if (IS_ERR(btrfs_inode)) 3256 3257 return -ENOENT; 3258 + inode = &btrfs_inode->vfs_inode; 3257 3259 3258 3260 truncate: 3259 3261 ret = btrfs_check_trunc_cache_free_space(fs_info, ··· 3315 3313 } 3316 3314 if (!found) 3317 3315 return -ENOENT; 3318 - ret = delete_block_group_cache(leaf->fs_info, block_group, NULL, 3319 - space_cache_ino); 3316 + ret = delete_block_group_cache(block_group, NULL, space_cache_ino); 3320 3317 return ret; 3321 3318 } 3322 3319 ··· 3762 3761 * the inode is in data relocation tree and its link count is 0 3763 3762 */ 3764 3763 static noinline_for_stack struct inode *create_reloc_inode( 3765 - struct btrfs_fs_info *fs_info, 3766 3764 const struct btrfs_block_group *group) 3767 3765 { 3768 - struct inode *inode = NULL; 3766 + struct btrfs_fs_info *fs_info = group->fs_info; 3767 + struct btrfs_inode *inode = NULL; 3769 3768 struct btrfs_trans_handle *trans; 3770 3769 struct btrfs_root *root; 3771 3770 u64 objectid; ··· 3793 3792 inode = NULL; 3794 3793 goto out; 3795 3794 } 3796 - BTRFS_I(inode)->reloc_block_group_start = group->start; 3795 + inode->reloc_block_group_start = group->start; 3797 3796 3798 - ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 3797 + ret = btrfs_orphan_add(trans, inode); 3799 3798 out: 3800 3799 btrfs_put_root(root); 3801 3800 btrfs_end_transaction(trans); 3802 3801 btrfs_btree_balance_dirty(fs_info); 3803 3802 if (ret) { 3804 - iput(inode); 3803 + if (inode) 3804 + iput(&inode->vfs_inode); 3805 3805 inode = ERR_PTR(ret); 3806 3806 } 3807 - return inode; 3807 + return &inode->vfs_inode; 3808 3808 } 3809 3809 3810 3810 /* ··· 3979 3977 btrfs_free_path(path); 3980 3978 3981 3979 if (!IS_ERR(inode)) 3982 - ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0); 3980 + ret = delete_block_group_cache(rc->block_group, inode, 0); 3983 3981 else 3984 3982 ret = PTR_ERR(inode); 3985 3983 ··· 3988 3986 goto out; 3989 3987 } 3990 3988 3991 - rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3989 + rc->data_inode = create_reloc_inode(rc->block_group); 3992 3990 if (IS_ERR(rc->data_inode)) { 3993 3991 err = PTR_ERR(rc->data_inode); 3994 3992 rc->data_inode = NULL;

+2 -2

fs/btrfs/scrub.c

··· 1380 1380 if (path->nodes[0]) 1381 1381 goto search_forward; 1382 1382 1383 + key.objectid = search_start; 1383 1384 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1384 1385 key.type = BTRFS_METADATA_ITEM_KEY; 1385 1386 else 1386 1387 key.type = BTRFS_EXTENT_ITEM_KEY; 1387 - key.objectid = search_start; 1388 1388 key.offset = (u64)-1; 1389 1389 1390 1390 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); ··· 2497 2497 path->skip_locking = 1; 2498 2498 2499 2499 key.objectid = scrub_dev->devid; 2500 - key.offset = 0ull; 2501 2500 key.type = BTRFS_DEV_EXTENT_KEY; 2501 + key.offset = 0ull; 2502 2502 2503 2503 while (1) { 2504 2504 u64 dev_extent_len;

+253 -291

fs/btrfs/send.c

··· 16 16 #include <linux/compat.h> 17 17 #include <linux/crc32c.h> 18 18 #include <linux/fsverity.h> 19 - 20 19 #include "send.h" 21 20 #include "ctree.h" 22 21 #include "backref.h" ··· 177 178 u64 cur_inode_rdev; 178 179 u64 cur_inode_last_extent; 179 180 u64 cur_inode_next_write_offset; 181 + struct fs_path cur_inode_path; 180 182 bool cur_inode_new; 181 183 bool cur_inode_new_gen; 182 184 bool cur_inode_deleted; ··· 425 425 426 426 static void fs_path_reset(struct fs_path *p) 427 427 { 428 - if (p->reversed) { 428 + if (p->reversed) 429 429 p->start = p->buf + p->buf_len - 1; 430 - p->end = p->start; 431 - *p->start = 0; 432 - } else { 430 + else 433 431 p->start = p->buf; 434 - p->end = p->start; 435 - *p->start = 0; 436 - } 432 + 433 + p->end = p->start; 434 + *p->start = 0; 435 + } 436 + 437 + static void init_path(struct fs_path *p) 438 + { 439 + p->reversed = 0; 440 + p->buf = p->inline_buf; 441 + p->buf_len = FS_PATH_INLINE_SIZE; 442 + fs_path_reset(p); 437 443 } 438 444 439 445 static struct fs_path *fs_path_alloc(void) ··· 449 443 p = kmalloc(sizeof(*p), GFP_KERNEL); 450 444 if (!p) 451 445 return NULL; 452 - p->reversed = 0; 453 - p->buf = p->inline_buf; 454 - p->buf_len = FS_PATH_INLINE_SIZE; 455 - fs_path_reset(p); 446 + init_path(p); 456 447 return p; 457 448 } 458 449 ··· 474 471 kfree(p); 475 472 } 476 473 477 - static int fs_path_len(struct fs_path *p) 474 + static inline int fs_path_len(const struct fs_path *p) 478 475 { 479 476 return p->end - p->start; 480 477 } ··· 490 487 if (p->buf_len >= len) 491 488 return 0; 492 489 493 - if (len > PATH_MAX) { 494 - WARN_ON(1); 495 - return -ENOMEM; 496 - } 490 + if (WARN_ON(len > PATH_MAX)) 491 + return -ENAMETOOLONG; 497 492 498 - path_len = p->end - p->start; 493 + path_len = fs_path_len(p); 499 494 old_buf_len = p->buf_len; 500 495 501 496 /* ··· 534 533 int ret; 535 534 int new_len; 536 535 537 - new_len = p->end - p->start + name_len; 536 + new_len = fs_path_len(p) + name_len; 538 537 if (p->start != p->end) 539 538 new_len++; 540 539 ret = fs_path_ensure_buf(p, new_len); 541 540 if (ret < 0) 542 - goto out; 541 + return ret; 543 542 544 543 if (p->reversed) { 545 544 if (p->start != p->end) ··· 554 553 *p->end = 0; 555 554 } 556 555 557 - out: 558 - return ret; 556 + return 0; 559 557 } 560 558 561 559 static int fs_path_add(struct fs_path *p, const char *name, int name_len) ··· 564 564 565 565 ret = fs_path_prepare_for_add(p, name_len, &prepared); 566 566 if (ret < 0) 567 - goto out; 567 + return ret; 568 568 memcpy(prepared, name, name_len); 569 569 570 - out: 571 - return ret; 570 + return 0; 572 571 } 573 572 574 - static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 573 + static inline int fs_path_add_path(struct fs_path *p, const struct fs_path *p2) 575 574 { 576 - int ret; 577 - char *prepared; 578 - 579 - ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 580 - if (ret < 0) 581 - goto out; 582 - memcpy(prepared, p2->start, p2->end - p2->start); 583 - 584 - out: 585 - return ret; 575 + return fs_path_add(p, p2->start, fs_path_len(p2)); 586 576 } 587 577 588 578 static int fs_path_add_from_extent_buffer(struct fs_path *p, ··· 584 594 585 595 ret = fs_path_prepare_for_add(p, len, &prepared); 586 596 if (ret < 0) 587 - goto out; 597 + return ret; 588 598 589 599 read_extent_buffer(eb, prepared, off, len); 590 600 591 - out: 592 - return ret; 601 + return 0; 593 602 } 594 603 595 604 static int fs_path_copy(struct fs_path *p, struct fs_path *from) ··· 608 619 return; 609 620 610 621 tmp = p->start; 611 - len = p->end - p->start; 622 + len = fs_path_len(p); 612 623 p->start = p->buf; 613 624 p->end = p->start + len; 614 625 memmove(p->start, tmp, len + 1); 615 626 p->reversed = 0; 627 + } 628 + 629 + static inline bool is_current_inode_path(const struct send_ctx *sctx, 630 + const struct fs_path *path) 631 + { 632 + const struct fs_path *cur = &sctx->cur_inode_path; 633 + 634 + return (strncmp(path->start, cur->start, fs_path_len(cur)) == 0); 616 635 } 617 636 618 637 static struct btrfs_path *alloc_path_for_send(void) ··· 737 740 #define TLV_PUT_PATH(sctx, attrtype, p) \ 738 741 do { \ 739 742 ret = tlv_put_string(sctx, attrtype, p->start, \ 740 - p->end - p->start); \ 743 + fs_path_len((p))); \ 741 744 if (ret < 0) \ 742 745 goto tlv_put_failure; \ 743 746 } while(0) ··· 823 826 824 827 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 825 828 if (ret < 0) 826 - goto out; 829 + return ret; 827 830 828 831 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 829 832 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); ··· 831 834 ret = send_cmd(sctx); 832 835 833 836 tlv_put_failure: 834 - out: 835 837 return ret; 836 838 } 837 839 ··· 847 851 848 852 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 849 853 if (ret < 0) 850 - goto out; 854 + return ret; 851 855 852 856 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 853 857 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); ··· 855 859 ret = send_cmd(sctx); 856 860 857 861 tlv_put_failure: 858 - out: 859 862 return ret; 860 863 } 861 864 ··· 870 875 871 876 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 872 877 if (ret < 0) 873 - goto out; 878 + return ret; 874 879 875 880 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 876 881 877 882 ret = send_cmd(sctx); 878 883 879 884 tlv_put_failure: 880 - out: 881 885 return ret; 882 886 } 883 887 ··· 892 898 893 899 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 894 900 if (ret < 0) 895 - goto out; 901 + return ret; 896 902 897 903 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 898 904 899 905 ret = send_cmd(sctx); 900 906 901 907 tlv_put_failure: 902 - out: 903 908 return ret; 904 909 } 905 910 ··· 1890 1897 1891 1898 ret = get_inode_info(sctx->send_root, ino, &info); 1892 1899 if (ret < 0 && ret != -ENOENT) 1893 - goto out; 1900 + return ret; 1894 1901 left_ret = (info.nlink == 0) ? -ENOENT : ret; 1895 1902 left_gen = info.gen; 1896 1903 if (send_gen) ··· 1901 1908 } else { 1902 1909 ret = get_inode_info(sctx->parent_root, ino, &info); 1903 1910 if (ret < 0 && ret != -ENOENT) 1904 - goto out; 1911 + return ret; 1905 1912 right_ret = (info.nlink == 0) ? -ENOENT : ret; 1906 1913 right_gen = info.gen; 1907 1914 if (parent_gen) ··· 1946 1953 ret = -ENOENT; 1947 1954 } 1948 1955 1949 - out: 1950 1956 return ret; 1951 1957 } 1952 1958 ··· 1959 1967 1960 1968 ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen); 1961 1969 if (ret < 0) 1962 - goto out; 1970 + return ret; 1963 1971 1964 1972 if (ret == inode_state_no_change || 1965 1973 ret == inode_state_did_create || 1966 1974 ret == inode_state_will_delete) 1967 - ret = 1; 1968 - else 1969 - ret = 0; 1975 + return 1; 1970 1976 1971 - out: 1972 - return ret; 1977 + return 0; 1973 1978 } 1974 1979 1975 1980 /* ··· 2315 2326 *parent_gen = nce->parent_gen; 2316 2327 ret = fs_path_add(dest, nce->name, nce->name_len); 2317 2328 if (ret < 0) 2318 - goto out; 2319 - ret = nce->ret; 2320 - goto out; 2329 + return ret; 2330 + return nce->ret; 2321 2331 } 2322 2332 } 2323 2333 ··· 2327 2339 */ 2328 2340 ret = is_inode_existent(sctx, ino, gen, NULL, NULL); 2329 2341 if (ret < 0) 2330 - goto out; 2342 + return ret; 2331 2343 2332 2344 if (!ret) { 2333 2345 ret = gen_unique_name(sctx, ino, gen, dest); 2334 2346 if (ret < 0) 2335 - goto out; 2347 + return ret; 2336 2348 ret = 1; 2337 2349 goto out_cache; 2338 2350 } ··· 2348 2360 ret = get_first_ref(sctx->parent_root, ino, 2349 2361 parent_ino, parent_gen, dest); 2350 2362 if (ret < 0) 2351 - goto out; 2363 + return ret; 2352 2364 2353 2365 /* 2354 2366 * Check if the ref was overwritten by an inode's ref that was processed 2355 2367 * earlier. If yes, treat as orphan and return 1. 2356 2368 */ 2357 2369 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2358 - dest->start, dest->end - dest->start); 2370 + dest->start, fs_path_len(dest)); 2359 2371 if (ret < 0) 2360 - goto out; 2372 + return ret; 2361 2373 if (ret) { 2362 2374 fs_path_reset(dest); 2363 2375 ret = gen_unique_name(sctx, ino, gen, dest); 2364 2376 if (ret < 0) 2365 - goto out; 2377 + return ret; 2366 2378 ret = 1; 2367 2379 } 2368 2380 ··· 2371 2383 * Store the result of the lookup in the name cache. 2372 2384 */ 2373 2385 nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL); 2374 - if (!nce) { 2375 - ret = -ENOMEM; 2376 - goto out; 2377 - } 2386 + if (!nce) 2387 + return -ENOMEM; 2378 2388 2379 2389 nce->entry.key = ino; 2380 2390 nce->entry.gen = gen; ··· 2390 2404 nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL); 2391 2405 if (nce_ret < 0) { 2392 2406 kfree(nce); 2393 - ret = nce_ret; 2407 + return nce_ret; 2394 2408 } 2395 2409 2396 - out: 2397 2410 return ret; 2398 2411 } 2399 2412 ··· 2429 2444 u64 parent_inode = 0; 2430 2445 u64 parent_gen = 0; 2431 2446 int stop = 0; 2447 + const bool is_cur_inode = (ino == sctx->cur_ino && gen == sctx->cur_inode_gen); 2448 + 2449 + if (is_cur_inode && fs_path_len(&sctx->cur_inode_path) > 0) { 2450 + if (dest != &sctx->cur_inode_path) 2451 + return fs_path_copy(dest, &sctx->cur_inode_path); 2452 + 2453 + return 0; 2454 + } 2432 2455 2433 2456 name = fs_path_alloc(); 2434 2457 if (!name) { ··· 2488 2495 2489 2496 out: 2490 2497 fs_path_free(name); 2491 - if (!ret) 2498 + if (!ret) { 2492 2499 fs_path_unreverse(dest); 2500 + if (is_cur_inode && dest != &sctx->cur_inode_path) 2501 + ret = fs_path_copy(&sctx->cur_inode_path, dest); 2502 + } 2503 + 2493 2504 return ret; 2494 2505 } 2495 2506 ··· 2588 2591 return ret; 2589 2592 } 2590 2593 2594 + static struct fs_path *get_cur_inode_path(struct send_ctx *sctx) 2595 + { 2596 + if (fs_path_len(&sctx->cur_inode_path) == 0) { 2597 + int ret; 2598 + 2599 + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 2600 + &sctx->cur_inode_path); 2601 + if (ret < 0) 2602 + return ERR_PTR(ret); 2603 + } 2604 + 2605 + return &sctx->cur_inode_path; 2606 + } 2607 + 2608 + static struct fs_path *get_path_for_command(struct send_ctx *sctx, u64 ino, u64 gen) 2609 + { 2610 + struct fs_path *path; 2611 + int ret; 2612 + 2613 + if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen) 2614 + return get_cur_inode_path(sctx); 2615 + 2616 + path = fs_path_alloc(); 2617 + if (!path) 2618 + return ERR_PTR(-ENOMEM); 2619 + 2620 + ret = get_cur_path(sctx, ino, gen, path); 2621 + if (ret < 0) { 2622 + fs_path_free(path); 2623 + return ERR_PTR(ret); 2624 + } 2625 + 2626 + return path; 2627 + } 2628 + 2629 + static void free_path_for_command(const struct send_ctx *sctx, struct fs_path *path) 2630 + { 2631 + if (path != &sctx->cur_inode_path) 2632 + fs_path_free(path); 2633 + } 2634 + 2591 2635 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2592 2636 { 2593 2637 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; ··· 2637 2599 2638 2600 btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size); 2639 2601 2640 - p = fs_path_alloc(); 2641 - if (!p) 2642 - return -ENOMEM; 2602 + p = get_path_for_command(sctx, ino, gen); 2603 + if (IS_ERR(p)) 2604 + return PTR_ERR(p); 2643 2605 2644 2606 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2645 2607 if (ret < 0) 2646 2608 goto out; 2647 2609 2648 - ret = get_cur_path(sctx, ino, gen, p); 2649 - if (ret < 0) 2650 - goto out; 2651 2610 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2652 2611 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2653 2612 ··· 2652 2617 2653 2618 tlv_put_failure: 2654 2619 out: 2655 - fs_path_free(p); 2620 + free_path_for_command(sctx, p); 2656 2621 return ret; 2657 2622 } 2658 2623 ··· 2664 2629 2665 2630 btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode); 2666 2631 2667 - p = fs_path_alloc(); 2668 - if (!p) 2669 - return -ENOMEM; 2632 + p = get_path_for_command(sctx, ino, gen); 2633 + if (IS_ERR(p)) 2634 + return PTR_ERR(p); 2670 2635 2671 2636 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2672 2637 if (ret < 0) 2673 2638 goto out; 2674 2639 2675 - ret = get_cur_path(sctx, ino, gen, p); 2676 - if (ret < 0) 2677 - goto out; 2678 2640 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2679 2641 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2680 2642 ··· 2679 2647 2680 2648 tlv_put_failure: 2681 2649 out: 2682 - fs_path_free(p); 2650 + free_path_for_command(sctx, p); 2683 2651 return ret; 2684 2652 } 2685 2653 ··· 2694 2662 2695 2663 btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr); 2696 2664 2697 - p = fs_path_alloc(); 2698 - if (!p) 2699 - return -ENOMEM; 2665 + p = get_path_for_command(sctx, ino, gen); 2666 + if (IS_ERR(p)) 2667 + return PTR_ERR(p); 2700 2668 2701 2669 ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR); 2702 2670 if (ret < 0) 2703 2671 goto out; 2704 2672 2705 - ret = get_cur_path(sctx, ino, gen, p); 2706 - if (ret < 0) 2707 - goto out; 2708 2673 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2709 2674 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr); 2710 2675 ··· 2709 2680 2710 2681 tlv_put_failure: 2711 2682 out: 2712 - fs_path_free(p); 2683 + free_path_for_command(sctx, p); 2713 2684 return ret; 2714 2685 } 2715 2686 ··· 2722 2693 btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu", 2723 2694 ino, uid, gid); 2724 2695 2725 - p = fs_path_alloc(); 2726 - if (!p) 2727 - return -ENOMEM; 2696 + p = get_path_for_command(sctx, ino, gen); 2697 + if (IS_ERR(p)) 2698 + return PTR_ERR(p); 2728 2699 2729 2700 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2730 2701 if (ret < 0) 2731 2702 goto out; 2732 2703 2733 - ret = get_cur_path(sctx, ino, gen, p); 2734 - if (ret < 0) 2735 - goto out; 2736 2704 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2737 2705 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2738 2706 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); ··· 2738 2712 2739 2713 tlv_put_failure: 2740 2714 out: 2741 - fs_path_free(p); 2715 + free_path_for_command(sctx, p); 2742 2716 return ret; 2743 2717 } 2744 2718 ··· 2755 2729 2756 2730 btrfs_debug(fs_info, "send_utimes %llu", ino); 2757 2731 2758 - p = fs_path_alloc(); 2759 - if (!p) 2760 - return -ENOMEM; 2732 + p = get_path_for_command(sctx, ino, gen); 2733 + if (IS_ERR(p)) 2734 + return PTR_ERR(p); 2761 2735 2762 2736 path = alloc_path_for_send(); 2763 2737 if (!path) { ··· 2782 2756 if (ret < 0) 2783 2757 goto out; 2784 2758 2785 - ret = get_cur_path(sctx, ino, gen, p); 2786 - if (ret < 0) 2787 - goto out; 2788 2759 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2789 2760 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); 2790 2761 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); ··· 2793 2770 2794 2771 tlv_put_failure: 2795 2772 out: 2796 - fs_path_free(p); 2773 + free_path_for_command(sctx, p); 2797 2774 btrfs_free_path(path); 2798 2775 return ret; 2799 2776 } ··· 3129 3106 goto out; 3130 3107 3131 3108 ret = send_rename(sctx, path, orphan); 3109 + if (ret < 0) 3110 + goto out; 3111 + 3112 + if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen) 3113 + ret = fs_path_copy(&sctx->cur_inode_path, orphan); 3132 3114 3133 3115 out: 3134 3116 fs_path_free(orphan); ··· 4186 4158 return ret; 4187 4159 } 4188 4160 4161 + static int rename_current_inode(struct send_ctx *sctx, 4162 + struct fs_path *current_path, 4163 + struct fs_path *new_path) 4164 + { 4165 + int ret; 4166 + 4167 + ret = send_rename(sctx, current_path, new_path); 4168 + if (ret < 0) 4169 + return ret; 4170 + 4171 + ret = fs_path_copy(&sctx->cur_inode_path, new_path); 4172 + if (ret < 0) 4173 + return ret; 4174 + 4175 + return fs_path_copy(current_path, new_path); 4176 + } 4177 + 4189 4178 /* 4190 4179 * This does all the move/link/unlink/rmdir magic. 4191 4180 */ ··· 4217 4172 u64 ow_inode = 0; 4218 4173 u64 ow_gen; 4219 4174 u64 ow_mode; 4220 - int did_overwrite = 0; 4221 - int is_orphan = 0; 4222 4175 u64 last_dir_ino_rm = 0; 4176 + bool did_overwrite = false; 4177 + bool is_orphan = false; 4223 4178 bool can_rename = true; 4224 4179 bool orphanized_dir = false; 4225 4180 bool orphanized_ancestor = false; ··· 4261 4216 if (ret < 0) 4262 4217 goto out; 4263 4218 if (ret) 4264 - did_overwrite = 1; 4219 + did_overwrite = true; 4265 4220 } 4266 4221 if (sctx->cur_inode_new || did_overwrite) { 4267 4222 ret = gen_unique_name(sctx, sctx->cur_ino, 4268 4223 sctx->cur_inode_gen, valid_path); 4269 4224 if (ret < 0) 4270 4225 goto out; 4271 - is_orphan = 1; 4226 + is_orphan = true; 4272 4227 } else { 4273 4228 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4274 4229 valid_path); ··· 4393 4348 if (ret > 0) { 4394 4349 orphanized_ancestor = true; 4395 4350 fs_path_reset(valid_path); 4351 + fs_path_reset(&sctx->cur_inode_path); 4396 4352 ret = get_cur_path(sctx, sctx->cur_ino, 4397 4353 sctx->cur_inode_gen, 4398 4354 valid_path); ··· 4489 4443 * it depending on the inode mode. 4490 4444 */ 4491 4445 if (is_orphan && can_rename) { 4492 - ret = send_rename(sctx, valid_path, cur->full_path); 4446 + ret = rename_current_inode(sctx, valid_path, cur->full_path); 4493 4447 if (ret < 0) 4494 4448 goto out; 4495 - is_orphan = 0; 4496 - ret = fs_path_copy(valid_path, cur->full_path); 4497 - if (ret < 0) 4498 - goto out; 4449 + is_orphan = false; 4499 4450 } else if (can_rename) { 4500 4451 if (S_ISDIR(sctx->cur_inode_mode)) { 4501 4452 /* ··· 4500 4457 * dirs, we always have one new and one deleted 4501 4458 * ref. The deleted ref is ignored later. 4502 4459 */ 4503 - ret = send_rename(sctx, valid_path, 4504 - cur->full_path); 4505 - if (!ret) 4506 - ret = fs_path_copy(valid_path, 4460 + ret = rename_current_inode(sctx, valid_path, 4507 4461 cur->full_path); 4508 4462 if (ret < 0) 4509 4463 goto out; ··· 4547 4507 sctx->cur_inode_gen, valid_path); 4548 4508 if (ret < 0) 4549 4509 goto out; 4550 - is_orphan = 1; 4510 + is_orphan = true; 4551 4511 } 4552 4512 4553 4513 list_for_each_entry(cur, &sctx->deleted_refs, list) { ··· 4593 4553 ret = send_unlink(sctx, cur->full_path); 4594 4554 if (ret < 0) 4595 4555 goto out; 4556 + if (is_current_inode_path(sctx, cur->full_path)) 4557 + fs_path_reset(&sctx->cur_inode_path); 4596 4558 } 4597 4559 ret = dup_ref(cur, &check_dirs); 4598 4560 if (ret < 0) ··· 4743 4701 4744 4702 static int record_new_ref_if_needed(u64 dir, struct fs_path *name, void *ctx) 4745 4703 { 4746 - int ret = 0; 4704 + int ret; 4747 4705 struct send_ctx *sctx = ctx; 4748 4706 struct rb_node *node = NULL; 4749 4707 struct recorded_ref data; ··· 4752 4710 4753 4711 ret = get_inode_gen(sctx->send_root, dir, &dir_gen); 4754 4712 if (ret < 0) 4755 - goto out; 4713 + return ret; 4756 4714 4757 4715 data.dir = dir; 4758 4716 data.dir_gen = dir_gen; ··· 4766 4724 &sctx->new_refs, name, dir, dir_gen, 4767 4725 sctx); 4768 4726 } 4769 - out: 4727 + 4770 4728 return ret; 4771 4729 } 4772 4730 4773 4731 static int record_deleted_ref_if_needed(u64 dir, struct fs_path *name, void *ctx) 4774 4732 { 4775 - int ret = 0; 4733 + int ret; 4776 4734 struct send_ctx *sctx = ctx; 4777 4735 struct rb_node *node = NULL; 4778 4736 struct recorded_ref data; ··· 4781 4739 4782 4740 ret = get_inode_gen(sctx->parent_root, dir, &dir_gen); 4783 4741 if (ret < 0) 4784 - goto out; 4742 + return ret; 4785 4743 4786 4744 data.dir = dir; 4787 4745 data.dir_gen = dir_gen; ··· 4795 4753 &sctx->deleted_refs, name, dir, 4796 4754 dir_gen, sctx); 4797 4755 } 4798 - out: 4756 + 4799 4757 return ret; 4800 4758 } 4801 4759 ··· 4806 4764 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4807 4765 sctx->cmp_key, 0, record_new_ref_if_needed, sctx); 4808 4766 if (ret < 0) 4809 - goto out; 4810 - ret = 0; 4767 + return ret; 4811 4768 4812 - out: 4813 - return ret; 4769 + return 0; 4814 4770 } 4815 4771 4816 4772 static int record_deleted_ref(struct send_ctx *sctx) ··· 4819 4779 sctx->cmp_key, 0, record_deleted_ref_if_needed, 4820 4780 sctx); 4821 4781 if (ret < 0) 4822 - goto out; 4823 - ret = 0; 4782 + return ret; 4824 4783 4825 - out: 4826 - return ret; 4784 + return 0; 4827 4785 } 4828 4786 4829 4787 static int record_changed_ref(struct send_ctx *sctx) 4830 4788 { 4831 - int ret = 0; 4789 + int ret; 4832 4790 4833 4791 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4834 4792 sctx->cmp_key, 0, record_new_ref_if_needed, sctx); 4835 4793 if (ret < 0) 4836 - goto out; 4794 + return ret; 4837 4795 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4838 4796 sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx); 4839 4797 if (ret < 0) 4840 - goto out; 4841 - ret = 0; 4798 + return ret; 4842 4799 4843 - out: 4844 - return ret; 4800 + return 0; 4845 4801 } 4846 4802 4847 4803 /* ··· 4905 4869 } 4906 4870 4907 4871 static int send_set_xattr(struct send_ctx *sctx, 4908 - struct fs_path *path, 4909 4872 const char *name, int name_len, 4910 4873 const char *data, int data_len) 4911 4874 { 4912 - int ret = 0; 4875 + struct fs_path *path; 4876 + int ret; 4877 + 4878 + path = get_cur_inode_path(sctx); 4879 + if (IS_ERR(path)) 4880 + return PTR_ERR(path); 4913 4881 4914 4882 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 4915 4883 if (ret < 0) 4916 - goto out; 4884 + return ret; 4917 4885 4918 4886 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4919 4887 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); ··· 4926 4886 ret = send_cmd(sctx); 4927 4887 4928 4888 tlv_put_failure: 4929 - out: 4930 4889 return ret; 4931 4890 } 4932 4891 ··· 4933 4894 struct fs_path *path, 4934 4895 const char *name, int name_len) 4935 4896 { 4936 - int ret = 0; 4897 + int ret; 4937 4898 4938 4899 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 4939 4900 if (ret < 0) 4940 - goto out; 4901 + return ret; 4941 4902 4942 4903 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4943 4904 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); ··· 4945 4906 ret = send_cmd(sctx); 4946 4907 4947 4908 tlv_put_failure: 4948 - out: 4949 4909 return ret; 4950 4910 } 4951 4911 ··· 4952 4914 const char *name, int name_len, const char *data, 4953 4915 int data_len, void *ctx) 4954 4916 { 4955 - int ret; 4956 4917 struct send_ctx *sctx = ctx; 4957 - struct fs_path *p; 4958 4918 struct posix_acl_xattr_header dummy_acl; 4959 4919 4960 4920 /* Capabilities are emitted by finish_inode_if_needed */ 4961 4921 if (!strncmp(name, XATTR_NAME_CAPS, name_len)) 4962 4922 return 0; 4963 - 4964 - p = fs_path_alloc(); 4965 - if (!p) 4966 - return -ENOMEM; 4967 4923 4968 4924 /* 4969 4925 * This hack is needed because empty acls are stored as zero byte ··· 4975 4943 } 4976 4944 } 4977 4945 4978 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4979 - if (ret < 0) 4980 - goto out; 4981 - 4982 - ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 4983 - 4984 - out: 4985 - fs_path_free(p); 4986 - return ret; 4946 + return send_set_xattr(sctx, name, name_len, data, data_len); 4987 4947 } 4988 4948 4989 4949 static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4990 4950 const char *name, int name_len, 4991 4951 const char *data, int data_len, void *ctx) 4992 4952 { 4993 - int ret; 4994 4953 struct send_ctx *sctx = ctx; 4995 4954 struct fs_path *p; 4996 4955 4997 - p = fs_path_alloc(); 4998 - if (!p) 4999 - return -ENOMEM; 4956 + p = get_cur_inode_path(sctx); 4957 + if (IS_ERR(p)) 4958 + return PTR_ERR(p); 5000 4959 5001 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 5002 - if (ret < 0) 5003 - goto out; 5004 - 5005 - ret = send_remove_xattr(sctx, p, name, name_len); 5006 - 5007 - out: 5008 - fs_path_free(p); 5009 - return ret; 4960 + return send_remove_xattr(sctx, p, name, name_len); 5010 4961 } 5011 4962 5012 4963 static int process_new_xattr(struct send_ctx *sctx) 5013 4964 { 5014 - int ret = 0; 5015 - 5016 - ret = iterate_dir_item(sctx->send_root, sctx->left_path, 5017 - __process_new_xattr, sctx); 5018 - 5019 - return ret; 4965 + return iterate_dir_item(sctx->send_root, sctx->left_path, 4966 + __process_new_xattr, sctx); 5020 4967 } 5021 4968 5022 4969 static int process_deleted_xattr(struct send_ctx *sctx) ··· 5111 5100 5112 5101 static int process_changed_xattr(struct send_ctx *sctx) 5113 5102 { 5114 - int ret = 0; 5103 + int ret; 5115 5104 5116 5105 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 5117 5106 __process_changed_new_xattr, sctx); 5118 5107 if (ret < 0) 5119 - goto out; 5120 - ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 5121 - __process_changed_deleted_xattr, sctx); 5108 + return ret; 5122 5109 5123 - out: 5124 - return ret; 5110 + return iterate_dir_item(sctx->parent_root, sctx->right_path, 5111 + __process_changed_deleted_xattr, sctx); 5125 5112 } 5126 5113 5127 5114 static int process_all_new_xattrs(struct send_ctx *sctx) ··· 5166 5157 5167 5158 ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY); 5168 5159 if (ret < 0) 5169 - goto out; 5160 + return ret; 5170 5161 5171 5162 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 5172 5163 TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM, ··· 5181 5172 ret = send_cmd(sctx); 5182 5173 5183 5174 tlv_put_failure: 5184 - out: 5185 5175 return ret; 5186 5176 } 5187 5177 5188 5178 static int process_verity(struct send_ctx *sctx) 5189 5179 { 5190 5180 int ret = 0; 5191 - struct inode *inode; 5181 + struct btrfs_inode *inode; 5192 5182 struct fs_path *p; 5193 5183 5194 5184 inode = btrfs_iget(sctx->cur_ino, sctx->send_root); 5195 5185 if (IS_ERR(inode)) 5196 5186 return PTR_ERR(inode); 5197 5187 5198 - ret = btrfs_get_verity_descriptor(inode, NULL, 0); 5188 + ret = btrfs_get_verity_descriptor(&inode->vfs_inode, NULL, 0); 5199 5189 if (ret < 0) 5200 5190 goto iput; 5201 5191 ··· 5211 5203 } 5212 5204 } 5213 5205 5214 - ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret); 5206 + ret = btrfs_get_verity_descriptor(&inode->vfs_inode, sctx->verity_descriptor, ret); 5215 5207 if (ret < 0) 5216 5208 goto iput; 5217 5209 5218 - p = fs_path_alloc(); 5219 - if (!p) { 5220 - ret = -ENOMEM; 5210 + p = get_cur_inode_path(sctx); 5211 + if (IS_ERR(p)) { 5212 + ret = PTR_ERR(p); 5221 5213 goto iput; 5222 5214 } 5223 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 5224 - if (ret < 0) 5225 - goto free_path; 5226 5215 5227 5216 ret = send_verity(sctx, p, sctx->verity_descriptor); 5228 - if (ret < 0) 5229 - goto free_path; 5230 - 5231 - free_path: 5232 - fs_path_free(p); 5233 5217 iput: 5234 - iput(inode); 5218 + iput(&inode->vfs_inode); 5235 5219 return ret; 5236 5220 } 5237 5221 ··· 5343 5343 int ret = 0; 5344 5344 struct fs_path *p; 5345 5345 5346 - p = fs_path_alloc(); 5347 - if (!p) 5348 - return -ENOMEM; 5349 - 5350 5346 btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); 5347 + 5348 + p = get_cur_inode_path(sctx); 5349 + if (IS_ERR(p)) 5350 + return PTR_ERR(p); 5351 5351 5352 5352 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 5353 5353 if (ret < 0) 5354 - goto out; 5355 - 5356 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 5357 - if (ret < 0) 5358 - goto out; 5354 + return ret; 5359 5355 5360 5356 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 5361 5357 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 5362 5358 ret = put_file_data(sctx, offset, len); 5363 5359 if (ret < 0) 5364 - goto out; 5360 + return ret; 5365 5361 5366 5362 ret = send_cmd(sctx); 5367 5363 5368 5364 tlv_put_failure: 5369 - out: 5370 - fs_path_free(p); 5371 5365 return ret; 5372 5366 } 5373 5367 ··· 5374 5380 { 5375 5381 int ret = 0; 5376 5382 struct fs_path *p; 5383 + struct fs_path *cur_inode_path; 5377 5384 u64 gen; 5378 5385 5379 5386 btrfs_debug(sctx->send_root->fs_info, 5380 5387 "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu", 5381 5388 offset, len, btrfs_root_id(clone_root->root), 5382 5389 clone_root->ino, clone_root->offset); 5390 + 5391 + cur_inode_path = get_cur_inode_path(sctx); 5392 + if (IS_ERR(cur_inode_path)) 5393 + return PTR_ERR(cur_inode_path); 5383 5394 5384 5395 p = fs_path_alloc(); 5385 5396 if (!p) ··· 5394 5395 if (ret < 0) 5395 5396 goto out; 5396 5397 5397 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 5398 - if (ret < 0) 5399 - goto out; 5400 - 5401 5398 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 5402 5399 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 5403 - TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 5400 + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, cur_inode_path); 5404 5401 5405 5402 if (clone_root->root == sctx->send_root) { 5406 5403 ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen); ··· 5447 5452 int ret = 0; 5448 5453 struct fs_path *p; 5449 5454 5450 - p = fs_path_alloc(); 5451 - if (!p) 5452 - return -ENOMEM; 5455 + p = get_cur_inode_path(sctx); 5456 + if (IS_ERR(p)) 5457 + return PTR_ERR(p); 5453 5458 5454 5459 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 5455 5460 if (ret < 0) 5456 - goto out; 5457 - 5458 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 5459 - if (ret < 0) 5460 - goto out; 5461 + return ret; 5461 5462 5462 5463 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 5463 5464 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); ··· 5462 5471 ret = send_cmd(sctx); 5463 5472 5464 5473 tlv_put_failure: 5465 - out: 5466 - fs_path_free(p); 5467 5474 return ret; 5468 5475 } 5469 5476 ··· 5490 5501 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) 5491 5502 return send_update_extent(sctx, offset, end - offset); 5492 5503 5493 - p = fs_path_alloc(); 5494 - if (!p) 5495 - return -ENOMEM; 5496 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 5497 - if (ret < 0) 5498 - goto tlv_put_failure; 5504 + p = get_cur_inode_path(sctx); 5505 + if (IS_ERR(p)) 5506 + return PTR_ERR(p); 5507 + 5499 5508 while (offset < end) { 5500 5509 u64 len = min(end - offset, read_size); 5501 5510 ··· 5514 5527 } 5515 5528 sctx->cur_inode_next_write_offset = offset; 5516 5529 tlv_put_failure: 5517 - fs_path_free(p); 5518 5530 return ret; 5519 5531 } 5520 5532 ··· 5521 5535 struct btrfs_path *path, u64 offset, 5522 5536 u64 len) 5523 5537 { 5524 - struct btrfs_root *root = sctx->send_root; 5525 - struct btrfs_fs_info *fs_info = root->fs_info; 5526 - struct inode *inode; 5538 + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 5527 5539 struct fs_path *fspath; 5528 5540 struct extent_buffer *leaf = path->nodes[0]; 5529 5541 struct btrfs_key key; ··· 5530 5546 size_t inline_size; 5531 5547 int ret; 5532 5548 5533 - inode = btrfs_iget(sctx->cur_ino, root); 5534 - if (IS_ERR(inode)) 5535 - return PTR_ERR(inode); 5536 - 5537 - fspath = fs_path_alloc(); 5538 - if (!fspath) { 5539 - ret = -ENOMEM; 5540 - goto out; 5541 - } 5549 + fspath = get_cur_inode_path(sctx); 5550 + if (IS_ERR(fspath)) 5551 + return PTR_ERR(fspath); 5542 5552 5543 5553 ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE); 5544 5554 if (ret < 0) 5545 - goto out; 5546 - 5547 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath); 5548 - if (ret < 0) 5549 - goto out; 5555 + return ret; 5550 5556 5551 5557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5552 5558 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); ··· 5552 5578 ret = btrfs_encoded_io_compression_from_extent(fs_info, 5553 5579 btrfs_file_extent_compression(leaf, ei)); 5554 5580 if (ret < 0) 5555 - goto out; 5581 + return ret; 5556 5582 TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret); 5557 5583 5558 5584 ret = put_data_header(sctx, inline_size); 5559 5585 if (ret < 0) 5560 - goto out; 5586 + return ret; 5561 5587 read_extent_buffer(leaf, sctx->send_buf + sctx->send_size, 5562 5588 btrfs_file_extent_inline_start(ei), inline_size); 5563 5589 sctx->send_size += inline_size; ··· 5565 5591 ret = send_cmd(sctx); 5566 5592 5567 5593 tlv_put_failure: 5568 - out: 5569 - fs_path_free(fspath); 5570 - iput(inode); 5571 5594 return ret; 5572 5595 } 5573 5596 ··· 5573 5602 { 5574 5603 struct btrfs_root *root = sctx->send_root; 5575 5604 struct btrfs_fs_info *fs_info = root->fs_info; 5576 - struct inode *inode; 5605 + struct btrfs_inode *inode; 5577 5606 struct fs_path *fspath; 5578 5607 struct extent_buffer *leaf = path->nodes[0]; 5579 5608 struct btrfs_key key; ··· 5588 5617 if (IS_ERR(inode)) 5589 5618 return PTR_ERR(inode); 5590 5619 5591 - fspath = fs_path_alloc(); 5592 - if (!fspath) { 5593 - ret = -ENOMEM; 5620 + fspath = get_cur_inode_path(sctx); 5621 + if (IS_ERR(fspath)) { 5622 + ret = PTR_ERR(fspath); 5594 5623 goto out; 5595 5624 } 5596 5625 5597 5626 ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE); 5598 - if (ret < 0) 5599 - goto out; 5600 - 5601 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath); 5602 5627 if (ret < 0) 5603 5628 goto out; 5604 5629 ··· 5639 5672 * Note that send_buf is a mapping of send_buf_pages, so this is really 5640 5673 * reading into send_buf. 5641 5674 */ 5642 - ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), 5675 + ret = btrfs_encoded_read_regular_fill_pages(inode, 5643 5676 disk_bytenr, disk_num_bytes, 5644 5677 sctx->send_buf_pages + 5645 5678 (data_offset >> PAGE_SHIFT), ··· 5665 5698 5666 5699 tlv_put_failure: 5667 5700 out: 5668 - fs_path_free(fspath); 5669 - iput(inode); 5701 + iput(&inode->vfs_inode); 5670 5702 return ret; 5671 5703 } 5672 5704 ··· 5707 5741 } 5708 5742 5709 5743 if (sctx->cur_inode == NULL) { 5744 + struct btrfs_inode *btrfs_inode; 5710 5745 struct btrfs_root *root = sctx->send_root; 5711 5746 5712 - sctx->cur_inode = btrfs_iget(sctx->cur_ino, root); 5713 - if (IS_ERR(sctx->cur_inode)) { 5714 - int err = PTR_ERR(sctx->cur_inode); 5747 + btrfs_inode = btrfs_iget(sctx->cur_ino, root); 5748 + if (IS_ERR(btrfs_inode)) 5749 + return PTR_ERR(btrfs_inode); 5715 5750 5716 - sctx->cur_inode = NULL; 5717 - return err; 5718 - } 5751 + sctx->cur_inode = &btrfs_inode->vfs_inode; 5719 5752 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 5720 5753 file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping); 5721 5754 ··· 5793 5828 */ 5794 5829 static int send_capabilities(struct send_ctx *sctx) 5795 5830 { 5796 - struct fs_path *fspath = NULL; 5797 5831 struct btrfs_path *path; 5798 5832 struct btrfs_dir_item *di; 5799 5833 struct extent_buffer *leaf; ··· 5818 5854 leaf = path->nodes[0]; 5819 5855 buf_len = btrfs_dir_data_len(leaf, di); 5820 5856 5821 - fspath = fs_path_alloc(); 5822 5857 buf = kmalloc(buf_len, GFP_KERNEL); 5823 - if (!fspath || !buf) { 5858 + if (!buf) { 5824 5859 ret = -ENOMEM; 5825 5860 goto out; 5826 5861 } 5827 5862 5828 - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath); 5829 - if (ret < 0) 5830 - goto out; 5831 - 5832 5863 data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di); 5833 5864 read_extent_buffer(leaf, buf, data_ptr, buf_len); 5834 5865 5835 - ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS, 5866 + ret = send_set_xattr(sctx, XATTR_NAME_CAPS, 5836 5867 strlen(XATTR_NAME_CAPS), buf, buf_len); 5837 5868 out: 5838 5869 kfree(buf); 5839 - fs_path_free(fspath); 5840 5870 btrfs_free_path(path); 5841 5871 return ret; 5842 5872 } ··· 6856 6898 sctx->cur_inode_last_extent = (u64)-1; 6857 6899 sctx->cur_inode_next_write_offset = 0; 6858 6900 sctx->ignore_cur_inode = false; 6901 + fs_path_reset(&sctx->cur_inode_path); 6859 6902 6860 6903 /* 6861 6904 * Set send_progress to current inode. This will tell all get_cur_xxx ··· 8066 8107 btrfs_root_id(root), root->dedupe_in_progress); 8067 8108 } 8068 8109 8069 - long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_args *arg) 8110 + long btrfs_ioctl_send(struct btrfs_root *send_root, const struct btrfs_ioctl_send_args *arg) 8070 8111 { 8071 8112 int ret = 0; 8072 - struct btrfs_root *send_root = inode->root; 8073 8113 struct btrfs_fs_info *fs_info = send_root->fs_info; 8074 8114 struct btrfs_root *clone_root; 8075 8115 struct send_ctx *sctx = NULL; ··· 8131 8173 goto out; 8132 8174 } 8133 8175 8176 + init_path(&sctx->cur_inode_path); 8134 8177 INIT_LIST_HEAD(&sctx->new_refs); 8135 8178 INIT_LIST_HEAD(&sctx->deleted_refs); 8136 8179 ··· 8407 8448 btrfs_lru_cache_clear(&sctx->backref_cache); 8408 8449 btrfs_lru_cache_clear(&sctx->dir_created_cache); 8409 8450 btrfs_lru_cache_clear(&sctx->dir_utimes_cache); 8451 + 8452 + if (sctx->cur_inode_path.buf != sctx->cur_inode_path.inline_buf) 8453 + kfree(sctx->cur_inode_path.buf); 8410 8454 8411 8455 kfree(sctx); 8412 8456 }

+2 -2

fs/btrfs/send.h

··· 11 11 #include <linux/sizes.h> 12 12 #include <linux/align.h> 13 13 14 - struct btrfs_inode; 14 + struct btrfs_root; 15 15 struct btrfs_ioctl_send_args; 16 16 17 17 #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" ··· 182 182 __BTRFS_SEND_A_MAX = 35, 183 183 }; 184 184 185 - long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_args *arg); 185 + long btrfs_ioctl_send(struct btrfs_root *send_root, const struct btrfs_ioctl_send_args *arg); 186 186 187 187 #endif

+1 -1

fs/btrfs/space-info.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - #include "linux/spinlock.h" 3 + #include <linux/spinlock.h> 4 4 #include <linux/minmax.h> 5 5 #include "misc.h" 6 6 #include "ctree.h"

+137 -87

fs/btrfs/subpage.c

··· 2 2 3 3 #include <linux/slab.h> 4 4 #include "messages.h" 5 - #include "ctree.h" 6 5 #include "subpage.h" 7 6 #include "btrfs_inode.h" 8 7 9 8 /* 10 - * Subpage (sectorsize < PAGE_SIZE) support overview: 9 + * Subpage (block size < folio size) support overview: 11 10 * 12 11 * Limitations: 13 12 * ··· 63 64 * This means a slightly higher tree locking latency. 64 65 */ 65 66 66 - #if PAGE_SIZE > SZ_4K 67 - bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping) 68 - { 69 - if (fs_info->sectorsize >= PAGE_SIZE) 70 - return false; 71 - 72 - /* 73 - * Only data pages (either through DIO or compression) can have no 74 - * mapping. And if page->mapping->host is data inode, it's subpage. 75 - * As we have ruled our sectorsize >= PAGE_SIZE case already. 76 - */ 77 - if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host))) 78 - return true; 79 - 80 - /* 81 - * Now the only remaining case is metadata, which we only go subpage 82 - * routine if nodesize < PAGE_SIZE. 83 - */ 84 - if (fs_info->nodesize < PAGE_SIZE) 85 - return true; 86 - return false; 87 - } 88 - #endif 89 - 90 67 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, 91 68 struct folio *folio, enum btrfs_subpage_type type) 92 69 { 93 70 struct btrfs_subpage *subpage; 71 + 72 + /* For metadata we don't support large folio yet. */ 73 + ASSERT(!folio_test_large(folio)); 94 74 95 75 /* 96 76 * We have cases like a dummy extent buffer page, which is not mapped ··· 79 101 ASSERT(folio_test_locked(folio)); 80 102 81 103 /* Either not subpage, or the folio already has private attached. */ 82 - if (!btrfs_is_subpage(fs_info, folio->mapping) || folio_test_private(folio)) 104 + if (folio_test_private(folio)) 105 + return 0; 106 + if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 107 + return 0; 108 + if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 83 109 return 0; 84 110 85 - subpage = btrfs_alloc_subpage(fs_info, type); 111 + subpage = btrfs_alloc_subpage(fs_info, folio_size(folio), type); 86 112 if (IS_ERR(subpage)) 87 113 return PTR_ERR(subpage); 88 114 ··· 94 112 return 0; 95 113 } 96 114 97 - void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio) 115 + void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, 116 + enum btrfs_subpage_type type) 98 117 { 99 118 struct btrfs_subpage *subpage; 100 119 101 120 /* Either not subpage, or the folio already has private attached. */ 102 - if (!btrfs_is_subpage(fs_info, folio->mapping) || !folio_test_private(folio)) 121 + if (!folio_test_private(folio)) 122 + return; 123 + if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 124 + return; 125 + if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 103 126 return; 104 127 105 128 subpage = folio_detach_private(folio); ··· 113 126 } 114 127 115 128 struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, 116 - enum btrfs_subpage_type type) 129 + size_t fsize, enum btrfs_subpage_type type) 117 130 { 118 131 struct btrfs_subpage *ret; 119 132 unsigned int real_size; 120 133 121 - ASSERT(fs_info->sectorsize < PAGE_SIZE); 134 + ASSERT(fs_info->sectorsize < fsize); 122 135 123 136 real_size = struct_size(ret, bitmaps, 124 - BITS_TO_LONGS(btrfs_bitmap_nr_max * fs_info->sectors_per_page)); 137 + BITS_TO_LONGS(btrfs_bitmap_nr_max * 138 + (fsize >> fs_info->sectorsize_bits))); 125 139 ret = kzalloc(real_size, GFP_NOFS); 126 140 if (!ret) 127 141 return ERR_PTR(-ENOMEM); ··· 153 165 { 154 166 struct btrfs_subpage *subpage; 155 167 156 - if (!btrfs_is_subpage(fs_info, folio->mapping)) 168 + if (!btrfs_meta_is_subpage(fs_info)) 157 169 return; 158 170 159 171 ASSERT(folio_test_private(folio) && folio->mapping); ··· 167 179 { 168 180 struct btrfs_subpage *subpage; 169 181 170 - if (!btrfs_is_subpage(fs_info, folio->mapping)) 182 + if (!btrfs_meta_is_subpage(fs_info)) 171 183 return; 172 184 173 185 ASSERT(folio_test_private(folio) && folio->mapping); ··· 194 206 */ 195 207 if (folio->mapping) 196 208 ASSERT(folio_pos(folio) <= start && 197 - start + len <= folio_pos(folio) + PAGE_SIZE); 209 + start + len <= folio_pos(folio) + folio_size(folio)); 198 210 } 199 211 200 212 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 201 213 ({ \ 202 - unsigned int __start_bit; \ 214 + unsigned int __start_bit; \ 215 + const unsigned int blocks_per_folio = \ 216 + btrfs_blocks_per_folio(fs_info, folio); \ 203 217 \ 204 218 btrfs_subpage_assert(fs_info, folio, start, len); \ 205 219 __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ 206 - __start_bit += fs_info->sectors_per_page * btrfs_bitmap_nr_##name; \ 220 + __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ 207 221 __start_bit; \ 208 222 }) 209 223 ··· 223 233 if (folio_pos(folio) >= orig_start + orig_len) 224 234 *len = 0; 225 235 else 226 - *len = min_t(u64, folio_pos(folio) + PAGE_SIZE, 236 + *len = min_t(u64, folio_pos(folio) + folio_size(folio), 227 237 orig_start + orig_len) - *start; 228 238 } 229 239 ··· 286 296 287 297 ASSERT(folio_test_locked(folio)); 288 298 289 - if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { 299 + if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) { 290 300 folio_unlock(folio); 291 301 return; 292 302 } ··· 313 323 struct folio *folio, unsigned long bitmap) 314 324 { 315 325 struct btrfs_subpage *subpage = folio_get_private(folio); 316 - const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked; 326 + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 327 + const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; 317 328 unsigned long flags; 318 329 bool last = false; 319 330 int cleared = 0; 320 331 int bit; 321 332 322 - if (!btrfs_is_subpage(fs_info, folio->mapping)) { 333 + if (!btrfs_is_subpage(fs_info, folio)) { 323 334 folio_unlock(folio); 324 335 return; 325 336 } ··· 332 341 } 333 342 334 343 spin_lock_irqsave(&subpage->lock, flags); 335 - for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) { 344 + for_each_set_bit(bit, &bitmap, blocks_per_folio) { 336 345 if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) 337 346 cleared++; 338 347 } ··· 343 352 folio_unlock(folio); 344 353 } 345 354 346 - #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ 355 + #define subpage_test_bitmap_all_set(fs_info, folio, name) \ 356 + ({ \ 357 + struct btrfs_subpage *subpage = folio_get_private(folio); \ 358 + const unsigned int blocks_per_folio = \ 359 + btrfs_blocks_per_folio(fs_info, folio); \ 360 + \ 347 361 bitmap_test_range_all_set(subpage->bitmaps, \ 348 - fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ 349 - fs_info->sectors_per_page) 362 + blocks_per_folio * btrfs_bitmap_nr_##name, \ 363 + blocks_per_folio); \ 364 + }) 350 365 351 - #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ 366 + #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ 367 + ({ \ 368 + struct btrfs_subpage *subpage = folio_get_private(folio); \ 369 + const unsigned int blocks_per_folio = \ 370 + btrfs_blocks_per_folio(fs_info, folio); \ 371 + \ 352 372 bitmap_test_range_all_zero(subpage->bitmaps, \ 353 - fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ 354 - fs_info->sectors_per_page) 373 + blocks_per_folio * btrfs_bitmap_nr_##name, \ 374 + blocks_per_folio); \ 375 + }) 355 376 356 377 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, 357 378 struct folio *folio, u64 start, u32 len) ··· 375 372 376 373 spin_lock_irqsave(&subpage->lock, flags); 377 374 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 378 - if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) 375 + if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) 379 376 folio_mark_uptodate(folio); 380 377 spin_unlock_irqrestore(&subpage->lock, flags); 381 378 } ··· 429 426 430 427 spin_lock_irqsave(&subpage->lock, flags); 431 428 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 432 - if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) 429 + if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) 433 430 last = true; 434 431 spin_unlock_irqrestore(&subpage->lock, flags); 435 432 return last; ··· 470 467 471 468 spin_lock_irqsave(&subpage->lock, flags); 472 469 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 473 - if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { 470 + if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { 474 471 ASSERT(folio_test_writeback(folio)); 475 472 folio_end_writeback(folio); 476 473 } ··· 501 498 502 499 spin_lock_irqsave(&subpage->lock, flags); 503 500 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 504 - if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) 501 + if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) 505 502 folio_clear_ordered(folio); 506 503 spin_unlock_irqrestore(&subpage->lock, flags); 507 504 } ··· 516 513 517 514 spin_lock_irqsave(&subpage->lock, flags); 518 515 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 519 - if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) 516 + if (subpage_test_bitmap_all_set(fs_info, folio, checked)) 520 517 folio_set_checked(folio); 521 518 spin_unlock_irqrestore(&subpage->lock, flags); 522 519 } ··· 572 569 struct folio *folio, u64 start, u32 len) \ 573 570 { \ 574 571 if (unlikely(!fs_info) || \ 575 - !btrfs_is_subpage(fs_info, folio->mapping)) { \ 572 + !btrfs_is_subpage(fs_info, folio)) { \ 576 573 folio_set_func(folio); \ 577 574 return; \ 578 575 } \ ··· 582 579 struct folio *folio, u64 start, u32 len) \ 583 580 { \ 584 581 if (unlikely(!fs_info) || \ 585 - !btrfs_is_subpage(fs_info, folio->mapping)) { \ 582 + !btrfs_is_subpage(fs_info, folio)) { \ 586 583 folio_clear_func(folio); \ 587 584 return; \ 588 585 } \ ··· 592 589 struct folio *folio, u64 start, u32 len) \ 593 590 { \ 594 591 if (unlikely(!fs_info) || \ 595 - !btrfs_is_subpage(fs_info, folio->mapping)) \ 592 + !btrfs_is_subpage(fs_info, folio)) \ 596 593 return folio_test_func(folio); \ 597 594 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 598 595 } \ ··· 600 597 struct folio *folio, u64 start, u32 len) \ 601 598 { \ 602 599 if (unlikely(!fs_info) || \ 603 - !btrfs_is_subpage(fs_info, folio->mapping)) { \ 600 + !btrfs_is_subpage(fs_info, folio)) { \ 604 601 folio_set_func(folio); \ 605 602 return; \ 606 603 } \ ··· 611 608 struct folio *folio, u64 start, u32 len) \ 612 609 { \ 613 610 if (unlikely(!fs_info) || \ 614 - !btrfs_is_subpage(fs_info, folio->mapping)) { \ 611 + !btrfs_is_subpage(fs_info, folio)) { \ 615 612 folio_clear_func(folio); \ 616 613 return; \ 617 614 } \ ··· 622 619 struct folio *folio, u64 start, u32 len) \ 623 620 { \ 624 621 if (unlikely(!fs_info) || \ 625 - !btrfs_is_subpage(fs_info, folio->mapping)) \ 622 + !btrfs_is_subpage(fs_info, folio)) \ 626 623 return folio_test_func(folio); \ 627 624 btrfs_subpage_clamp_range(folio, &start, &len); \ 628 625 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 626 + } \ 627 + void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \ 628 + { \ 629 + if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 630 + folio_set_func(folio); \ 631 + return; \ 632 + } \ 633 + btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \ 634 + } \ 635 + void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \ 636 + { \ 637 + if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 638 + folio_clear_func(folio); \ 639 + return; \ 640 + } \ 641 + btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \ 642 + } \ 643 + bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \ 644 + { \ 645 + if (!btrfs_meta_is_subpage(eb->fs_info)) \ 646 + return folio_test_func(folio); \ 647 + return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \ 629 648 } 630 649 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, 631 650 folio_test_uptodate); ··· 660 635 IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, 661 636 folio_test_checked); 662 637 663 - #define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \ 638 + #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \ 664 639 { \ 665 - const int sectors_per_page = fs_info->sectors_per_page; \ 640 + const unsigned int blocks_per_folio = \ 641 + btrfs_blocks_per_folio(fs_info, folio); \ 642 + const struct btrfs_subpage *subpage = folio_get_private(folio); \ 666 643 \ 667 - ASSERT(sectors_per_page < BITS_PER_LONG); \ 644 + ASSERT(blocks_per_folio < BITS_PER_LONG); \ 668 645 *dst = bitmap_read(subpage->bitmaps, \ 669 - sectors_per_page * btrfs_bitmap_nr_##name, \ 670 - sectors_per_page); \ 646 + blocks_per_folio * btrfs_bitmap_nr_##name, \ 647 + blocks_per_folio); \ 671 648 } 672 649 673 650 #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ 674 651 { \ 675 - const struct btrfs_subpage *subpage = folio_get_private(folio); \ 676 652 unsigned long bitmap; \ 653 + const unsigned int blocks_per_folio = \ 654 + btrfs_blocks_per_folio(fs_info, folio); \ 677 655 \ 678 - GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap); \ 656 + GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ 679 657 btrfs_warn(fs_info, \ 680 658 "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 681 659 start, len, folio_pos(folio), \ 682 - fs_info->sectors_per_page, &bitmap); \ 660 + blocks_per_folio, &bitmap); \ 683 661 } 684 662 685 663 /* ··· 700 672 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) 701 673 return; 702 674 703 - if (!btrfs_is_subpage(fs_info, folio->mapping)) { 675 + if (!btrfs_is_subpage(fs_info, folio)) { 704 676 ASSERT(!folio_test_dirty(folio)); 705 677 return; 706 678 } ··· 735 707 int ret; 736 708 737 709 ASSERT(folio_test_locked(folio)); 738 - if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) 710 + if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) 739 711 return; 740 712 741 713 subpage = folio_get_private(folio); ··· 749 721 } 750 722 bitmap_set(subpage->bitmaps, start_bit, nbits); 751 723 ret = atomic_add_return(nbits, &subpage->nr_locked); 752 - ASSERT(ret <= fs_info->sectors_per_page); 724 + ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); 753 725 spin_unlock_irqrestore(&subpage->lock, flags); 726 + } 727 + 728 + /* 729 + * Clear the dirty flag for the folio. 730 + * 731 + * If the affected folio is no longer dirty, return true. Otherwise return false. 732 + */ 733 + bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb) 734 + { 735 + bool last; 736 + 737 + if (!btrfs_meta_is_subpage(eb->fs_info)) { 738 + folio_clear_dirty_for_io(folio); 739 + return true; 740 + } 741 + 742 + last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len); 743 + if (last) { 744 + folio_clear_dirty_for_io(folio); 745 + return true; 746 + } 747 + return false; 754 748 } 755 749 756 750 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, 757 751 struct folio *folio, u64 start, u32 len) 758 752 { 759 753 struct btrfs_subpage *subpage; 760 - const u32 sectors_per_page = fs_info->sectors_per_page; 754 + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 761 755 unsigned long uptodate_bitmap; 762 756 unsigned long dirty_bitmap; 763 757 unsigned long writeback_bitmap; ··· 789 739 unsigned long flags; 790 740 791 741 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 792 - ASSERT(sectors_per_page > 1); 742 + ASSERT(blocks_per_folio > 1); 793 743 subpage = folio_get_private(folio); 794 744 795 745 spin_lock_irqsave(&subpage->lock, flags); 796 - GET_SUBPAGE_BITMAP(subpage, fs_info, uptodate, &uptodate_bitmap); 797 - GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, &dirty_bitmap); 798 - GET_SUBPAGE_BITMAP(subpage, fs_info, writeback, &writeback_bitmap); 799 - GET_SUBPAGE_BITMAP(subpage, fs_info, ordered, &ordered_bitmap); 800 - GET_SUBPAGE_BITMAP(subpage, fs_info, checked, &checked_bitmap); 801 - GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &locked_bitmap); 746 + GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); 747 + GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); 748 + GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); 749 + GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); 750 + GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); 751 + GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); 802 752 spin_unlock_irqrestore(&subpage->lock, flags); 803 753 804 754 dump_page(folio_page(folio, 0), "btrfs subpage dump"); 805 755 btrfs_warn(fs_info, 806 756 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", 807 757 start, len, folio_pos(folio), 808 - sectors_per_page, &uptodate_bitmap, 809 - sectors_per_page, &dirty_bitmap, 810 - sectors_per_page, &locked_bitmap, 811 - sectors_per_page, &writeback_bitmap, 812 - sectors_per_page, &ordered_bitmap, 813 - sectors_per_page, &checked_bitmap); 758 + blocks_per_folio, &uptodate_bitmap, 759 + blocks_per_folio, &dirty_bitmap, 760 + blocks_per_folio, &locked_bitmap, 761 + blocks_per_folio, &writeback_bitmap, 762 + blocks_per_folio, &ordered_bitmap, 763 + blocks_per_folio, &checked_bitmap); 814 764 } 815 765 816 766 void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, ··· 821 771 unsigned long flags; 822 772 823 773 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 824 - ASSERT(fs_info->sectors_per_page > 1); 774 + ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); 825 775 subpage = folio_get_private(folio); 826 776 827 777 spin_lock_irqsave(&subpage->lock, flags); 828 - GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, ret_bitmap); 778 + GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); 829 779 spin_unlock_irqrestore(&subpage->lock, flags); 830 780 }

+47 -9

fs/btrfs/subpage.h

··· 6 6 #include <linux/spinlock.h> 7 7 #include <linux/atomic.h> 8 8 #include <linux/sizes.h> 9 + #include "btrfs_inode.h" 10 + #include "fs.h" 9 11 10 12 struct address_space; 11 13 struct folio; 12 - struct btrfs_fs_info; 13 14 14 15 /* 15 16 * Extra info for subpapge bitmap. ··· 70 69 BTRFS_SUBPAGE_DATA, 71 70 }; 72 71 73 - #if PAGE_SIZE > SZ_4K 74 - bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping); 75 - #else 76 - static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, 77 - struct address_space *mapping) 72 + #if PAGE_SIZE > BTRFS_MIN_BLOCKSIZE 73 + /* 74 + * Subpage support for metadata is more complex, as we can have dummy extent 75 + * buffers, where folios have no mapping to determine the owning inode. 76 + * 77 + * Thankfully we only need to check if node size is smaller than page size. 78 + * Even with larger folio support, we will only allocate a folio as large as 79 + * node size. 80 + * Thus if nodesize < PAGE_SIZE, we know metadata needs need to subpage routine. 81 + */ 82 + static inline bool btrfs_meta_is_subpage(const struct btrfs_fs_info *fs_info) 78 83 { 84 + return fs_info->nodesize < PAGE_SIZE; 85 + } 86 + static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, 87 + struct folio *folio) 88 + { 89 + if (folio->mapping && folio->mapping->host) 90 + ASSERT(is_data_inode(BTRFS_I(folio->mapping->host))); 91 + return fs_info->sectorsize < folio_size(folio); 92 + } 93 + #else 94 + static inline bool btrfs_meta_is_subpage(const struct btrfs_fs_info *fs_info) 95 + { 96 + return false; 97 + } 98 + static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, 99 + struct folio *folio) 100 + { 101 + if (folio->mapping && folio->mapping->host) 102 + ASSERT(is_data_inode(BTRFS_I(folio->mapping->host))); 79 103 return false; 80 104 } 81 105 #endif 82 106 83 107 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, 84 108 struct folio *folio, enum btrfs_subpage_type type); 85 - void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio); 109 + void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, 110 + enum btrfs_subpage_type type); 86 111 87 112 /* Allocate additional data where page represents more than one sector */ 88 113 struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, 89 - enum btrfs_subpage_type type); 114 + size_t fsize, enum btrfs_subpage_type type); 90 115 void btrfs_free_subpage(struct btrfs_subpage *subpage); 91 116 92 117 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); ··· 137 110 * btrfs_folio_clamp_*() are similar to btrfs_folio_*(), except the range doesn't 138 111 * need to be inside the page. Those functions will truncate the range 139 112 * automatically. 113 + * 114 + * Both btrfs_folio_*() and btrfs_folio_clamp_*() are for data folios. 115 + * 116 + * For metadata, one should use btrfs_meta_folio_*() helpers instead, and there 117 + * is no clamp version for metadata helpers, as we either go subpage 118 + * (nodesize < PAGE_SIZE) or go regular folio helpers (nodesize >= PAGE_SIZE, 119 + * and our folio is never larger than nodesize). 140 120 */ 141 121 #define DECLARE_BTRFS_SUBPAGE_OPS(name) \ 142 122 void btrfs_subpage_set_##name(const struct btrfs_fs_info *fs_info, \ ··· 163 129 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ 164 130 struct folio *folio, u64 start, u32 len); \ 165 131 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ 166 - struct folio *folio, u64 start, u32 len); 132 + struct folio *folio, u64 start, u32 len); \ 133 + void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb); \ 134 + void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb); \ 135 + bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb); 167 136 168 137 DECLARE_BTRFS_SUBPAGE_OPS(uptodate); 169 138 DECLARE_BTRFS_SUBPAGE_OPS(dirty); ··· 192 155 193 156 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, 194 157 struct folio *folio, u64 start, u32 len); 158 + bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb); 195 159 void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, 196 160 struct folio *folio, 197 161 unsigned long *ret_bitmap);

+3 -3

fs/btrfs/super.c

··· 84 84 u32 thread_pool_size; 85 85 unsigned long long mount_opt; 86 86 unsigned long compress_type:4; 87 - unsigned int compress_level; 87 + int compress_level; 88 88 refcount_t refs; 89 89 }; 90 90 ··· 947 947 static int btrfs_fill_super(struct super_block *sb, 948 948 struct btrfs_fs_devices *fs_devices) 949 949 { 950 - struct inode *inode; 950 + struct btrfs_inode *inode; 951 951 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 952 952 int err; 953 953 ··· 982 982 goto fail_close; 983 983 } 984 984 985 - sb->s_root = d_make_root(inode); 985 + sb->s_root = d_make_root(&inode->vfs_inode); 986 986 if (!sb->s_root) { 987 987 err = -ENOMEM; 988 988 goto fail_close;

+8 -6

fs/btrfs/sysfs.c

··· 411 411 { 412 412 ssize_t ret = 0; 413 413 414 - /* An artificial limit to only support 4K and PAGE_SIZE */ 414 + if (BTRFS_MIN_BLOCKSIZE != SZ_4K && BTRFS_MIN_BLOCKSIZE != PAGE_SIZE) 415 + ret += sysfs_emit_at(buf, ret, "%u ", BTRFS_MIN_BLOCKSIZE); 415 416 if (PAGE_SIZE > SZ_4K) 416 417 ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K); 417 418 ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE); ··· 1343 1342 /* Separate value from input in policy:value format. */ 1344 1343 value_str = strchr(param, ':'); 1345 1344 if (value_str) { 1346 - int ret; 1345 + char *retptr; 1347 1346 1348 1347 *value_str = 0; 1349 1348 value_str++; 1350 1349 if (!value_ret) 1351 1350 return -EINVAL; 1352 - ret = kstrtos64(value_str, 10, value_ret); 1353 - if (ret) 1351 + 1352 + *value_ret = memparse(value_str, &retptr); 1353 + /* There could be any trailing typos after the value. */ 1354 + retptr = skip_spaces(retptr); 1355 + if (*retptr != 0 || *value_ret <= 0) 1354 1356 return -EINVAL; 1355 - if (*value_ret < 0) 1356 - return -ERANGE; 1357 1357 } 1358 1358 #endif 1359 1359

+1

fs/btrfs/sysfs.h

··· 7 7 #include <linux/compiler_types.h> 8 8 #include <linux/kobject.h> 9 9 10 + struct block_device; 10 11 struct btrfs_fs_info; 11 12 struct btrfs_device; 12 13 struct btrfs_fs_devices;

+3 -3

fs/btrfs/tests/extent-io-tests.c

··· 525 525 goto out; 526 526 } 527 527 528 - eb = __alloc_dummy_extent_buffer(fs_info, 0, nodesize); 528 + eb = alloc_dummy_extent_buffer(fs_info, 0); 529 529 if (!eb) { 530 530 test_std_err(TEST_ALLOC_ROOT); 531 531 ret = -ENOMEM; ··· 542 542 * Test again for case where the tree block is sectorsize aligned but 543 543 * not nodesize aligned. 544 544 */ 545 - eb = __alloc_dummy_extent_buffer(fs_info, sectorsize, nodesize); 545 + eb = alloc_dummy_extent_buffer(fs_info, sectorsize); 546 546 if (!eb) { 547 547 test_std_err(TEST_ALLOC_ROOT); 548 548 ret = -ENOMEM; ··· 730 730 goto out; 731 731 } 732 732 733 - eb = __alloc_dummy_extent_buffer(fs_info, SZ_1M, nodesize); 733 + eb = alloc_dummy_extent_buffer(fs_info, SZ_1M); 734 734 if (!eb) { 735 735 test_std_err(TEST_ALLOC_EXTENT_BUFFER); 736 736 ret = -ENOMEM;

+1

fs/btrfs/tests/extent-map-tests.c

··· 1045 1045 ret = btrfs_add_chunk_map(fs_info, map); 1046 1046 if (ret) { 1047 1047 test_err("error adding chunk map to mapping tree"); 1048 + btrfs_free_chunk_map(map); 1048 1049 goto out_free; 1049 1050 } 1050 1051

+26 -13

fs/btrfs/transaction.c

··· 160 160 cache = list_first_entry(&transaction->deleted_bgs, 161 161 struct btrfs_block_group, 162 162 bg_list); 163 + /* 164 + * Not strictly necessary to lock, as no other task will be using a 165 + * block_group on the deleted_bgs list during a transaction abort. 166 + */ 167 + spin_lock(&transaction->fs_info->unused_bgs_lock); 163 168 list_del_init(&cache->bg_list); 169 + spin_unlock(&transaction->fs_info->unused_bgs_lock); 164 170 btrfs_unfreeze_block_group(cache); 165 171 btrfs_put_block_group(cache); 166 172 } ··· 1641 1635 struct btrfs_root *root = pending->root; 1642 1636 struct btrfs_root *parent_root; 1643 1637 struct btrfs_block_rsv *rsv; 1644 - struct inode *parent_inode = &pending->dir->vfs_inode; 1638 + struct btrfs_inode *parent_inode = pending->dir; 1645 1639 struct btrfs_path *path; 1646 1640 struct btrfs_dir_item *dir_item; 1647 1641 struct extent_buffer *tmp; ··· 1667 1661 * filesystem. 1668 1662 */ 1669 1663 nofs_flags = memalloc_nofs_save(); 1670 - pending->error = fscrypt_setup_filename(parent_inode, 1664 + pending->error = fscrypt_setup_filename(&parent_inode->vfs_inode, 1671 1665 &pending->dentry->d_name, 0, 1672 1666 &fname); 1673 1667 memalloc_nofs_restore(nofs_flags); ··· 1696 1690 } 1697 1691 1698 1692 key.objectid = objectid; 1699 - key.offset = (u64)-1; 1700 1693 key.type = BTRFS_ROOT_ITEM_KEY; 1694 + key.offset = (u64)-1; 1701 1695 1702 1696 rsv = trans->block_rsv; 1703 1697 trans->block_rsv = &pending->block_rsv; ··· 1705 1699 trace_btrfs_space_reservation(fs_info, "transaction", 1706 1700 trans->transid, 1707 1701 trans->bytes_reserved, 1); 1708 - parent_root = BTRFS_I(parent_inode)->root; 1702 + parent_root = parent_inode->root; 1709 1703 ret = record_root_in_trans(trans, parent_root, 0); 1710 1704 if (ret) 1711 1705 goto fail; 1712 - cur_time = current_time(parent_inode); 1706 + cur_time = current_time(&parent_inode->vfs_inode); 1713 1707 1714 1708 /* 1715 1709 * insert the directory item 1716 1710 */ 1717 - ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index); 1711 + ret = btrfs_set_inode_index(parent_inode, &index); 1718 1712 if (ret) { 1719 1713 btrfs_abort_transaction(trans, ret); 1720 1714 goto fail; ··· 1722 1716 1723 1717 /* check if there is a file/dir which has the same name. */ 1724 1718 dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, 1725 - btrfs_ino(BTRFS_I(parent_inode)), 1719 + btrfs_ino(parent_inode), 1726 1720 &fname.disk_name, 0); 1727 1721 if (dir_item != NULL && !IS_ERR(dir_item)) { 1728 1722 pending->error = -EEXIST; ··· 1823 1817 */ 1824 1818 ret = btrfs_add_root_ref(trans, objectid, 1825 1819 btrfs_root_id(parent_root), 1826 - btrfs_ino(BTRFS_I(parent_inode)), index, 1820 + btrfs_ino(parent_inode), index, 1827 1821 &fname.disk_name); 1828 1822 if (ret) { 1829 1823 btrfs_abort_transaction(trans, ret); ··· 1861 1855 goto fail; 1862 1856 1863 1857 ret = btrfs_insert_dir_item(trans, &fname.disk_name, 1864 - BTRFS_I(parent_inode), &key, BTRFS_FT_DIR, 1858 + parent_inode, &key, BTRFS_FT_DIR, 1865 1859 index); 1866 1860 if (ret) { 1867 1861 btrfs_abort_transaction(trans, ret); 1868 1862 goto fail; 1869 1863 } 1870 1864 1871 - btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + 1865 + btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + 1872 1866 fname.disk_name.len * 2); 1873 - inode_set_mtime_to_ts(parent_inode, 1874 - inode_set_ctime_current(parent_inode)); 1875 - ret = btrfs_update_inode_fallback(trans, BTRFS_I(parent_inode)); 1867 + inode_set_mtime_to_ts(&parent_inode->vfs_inode, 1868 + inode_set_ctime_current(&parent_inode->vfs_inode)); 1869 + ret = btrfs_update_inode_fallback(trans, parent_inode); 1876 1870 if (ret) { 1877 1871 btrfs_abort_transaction(trans, ret); 1878 1872 goto fail; ··· 2102 2096 2103 2097 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { 2104 2098 btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info); 2099 + /* 2100 + * Not strictly necessary to lock, as no other task will be using a 2101 + * block_group on the new_bgs list during a transaction abort. 2102 + */ 2103 + spin_lock(&fs_info->unused_bgs_lock); 2105 2104 list_del_init(&block_group->bg_list); 2105 + btrfs_put_block_group(block_group); 2106 + spin_unlock(&fs_info->unused_bgs_lock); 2106 2107 } 2107 2108 } 2108 2109

+180 -212

fs/btrfs/tree-log.c

··· 138 138 * and once to do all the other items. 139 139 */ 140 140 141 - static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) 141 + static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) 142 142 { 143 143 unsigned int nofs_flag; 144 - struct inode *inode; 144 + struct btrfs_inode *inode; 145 145 146 146 /* 147 147 * We're holding a transaction handle whether we are logging or ··· 376 376 } 377 377 378 378 /* 379 - * Item overwrite used by replay and tree logging. eb, slot and key all refer 380 - * to the src data we are copying out. 379 + * Item overwrite used by log replay. The given eb, slot and key all refer to 380 + * the source data we are copying out. 381 381 * 382 - * root is the tree we are copying into, and path is a scratch 383 - * path for use in this function (it should be released on entry and 384 - * will be released on exit). 382 + * The given root is for the tree we are copying into, and path is a scratch 383 + * path for use in this function (it should be released on entry and will be 384 + * released on exit). 385 385 * 386 386 * If the key is already in the destination tree the existing item is 387 387 * overwritten. If the existing item isn't big enough, it is extended. ··· 401 401 int save_old_i_size = 0; 402 402 unsigned long src_ptr; 403 403 unsigned long dst_ptr; 404 + struct extent_buffer *dst_eb; 405 + int dst_slot; 404 406 bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; 405 407 406 408 /* ··· 422 420 if (ret < 0) 423 421 return ret; 424 422 423 + dst_eb = path->nodes[0]; 424 + dst_slot = path->slots[0]; 425 + 425 426 if (ret == 0) { 426 427 char *src_copy; 427 - char *dst_copy; 428 - u32 dst_size = btrfs_item_size(path->nodes[0], 429 - path->slots[0]); 428 + const u32 dst_size = btrfs_item_size(dst_eb, dst_slot); 429 + 430 430 if (dst_size != item_size) 431 431 goto insert; 432 432 ··· 436 432 btrfs_release_path(path); 437 433 return 0; 438 434 } 439 - dst_copy = kmalloc(item_size, GFP_NOFS); 440 435 src_copy = kmalloc(item_size, GFP_NOFS); 441 - if (!dst_copy || !src_copy) { 436 + if (!src_copy) { 442 437 btrfs_release_path(path); 443 - kfree(dst_copy); 444 - kfree(src_copy); 445 438 return -ENOMEM; 446 439 } 447 440 448 441 read_extent_buffer(eb, src_copy, src_ptr, item_size); 442 + dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); 443 + ret = memcmp_extent_buffer(dst_eb, src_copy, dst_ptr, item_size); 449 444 450 - dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 451 - read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, 452 - item_size); 453 - ret = memcmp(dst_copy, src_copy, item_size); 454 - 455 - kfree(dst_copy); 456 445 kfree(src_copy); 457 446 /* 458 447 * they have the same contents, just return, this saves ··· 467 470 u64 nbytes; 468 471 u32 mode; 469 472 470 - item = btrfs_item_ptr(path->nodes[0], path->slots[0], 473 + item = btrfs_item_ptr(dst_eb, dst_slot, 471 474 struct btrfs_inode_item); 472 - nbytes = btrfs_inode_nbytes(path->nodes[0], item); 475 + nbytes = btrfs_inode_nbytes(dst_eb, item); 473 476 item = btrfs_item_ptr(eb, slot, 474 477 struct btrfs_inode_item); 475 478 btrfs_set_inode_nbytes(eb, item, nbytes); ··· 511 514 key, item_size); 512 515 path->skip_release_on_error = 0; 513 516 517 + dst_eb = path->nodes[0]; 518 + dst_slot = path->slots[0]; 519 + 514 520 /* make sure any existing item is the correct size */ 515 521 if (ret == -EEXIST || ret == -EOVERFLOW) { 516 - u32 found_size; 517 - found_size = btrfs_item_size(path->nodes[0], 518 - path->slots[0]); 522 + const u32 found_size = btrfs_item_size(dst_eb, dst_slot); 523 + 519 524 if (found_size > item_size) 520 525 btrfs_truncate_item(trans, path, item_size, 1); 521 526 else if (found_size < item_size) ··· 525 526 } else if (ret) { 526 527 return ret; 527 528 } 528 - dst_ptr = btrfs_item_ptr_offset(path->nodes[0], 529 - path->slots[0]); 529 + dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); 530 530 531 531 /* don't overwrite an existing inode if the generation number 532 532 * was logged as zero. This is done when the tree logging code ··· 544 546 dst_item = (struct btrfs_inode_item *)dst_ptr; 545 547 546 548 if (btrfs_inode_generation(eb, src_item) == 0) { 547 - struct extent_buffer *dst_eb = path->nodes[0]; 548 549 const u64 ino_size = btrfs_inode_size(eb, src_item); 549 550 550 551 /* ··· 561 564 } 562 565 563 566 if (S_ISDIR(btrfs_inode_mode(eb, src_item)) && 564 - S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { 567 + S_ISDIR(btrfs_inode_mode(dst_eb, dst_item))) { 565 568 save_old_i_size = 1; 566 - saved_i_size = btrfs_inode_size(path->nodes[0], 567 - dst_item); 569 + saved_i_size = btrfs_inode_size(dst_eb, dst_item); 568 570 } 569 571 } 570 572 571 - copy_extent_buffer(path->nodes[0], eb, dst_ptr, 572 - src_ptr, item_size); 573 + copy_extent_buffer(dst_eb, eb, dst_ptr, src_ptr, item_size); 573 574 574 575 if (save_old_i_size) { 575 576 struct btrfs_inode_item *dst_item; 577 + 576 578 dst_item = (struct btrfs_inode_item *)dst_ptr; 577 - btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); 579 + btrfs_set_inode_size(dst_eb, dst_item, saved_i_size); 578 580 } 579 581 580 582 /* make sure the generation is filled in */ 581 583 if (key->type == BTRFS_INODE_ITEM_KEY) { 582 584 struct btrfs_inode_item *dst_item; 585 + 583 586 dst_item = (struct btrfs_inode_item *)dst_ptr; 584 - if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { 585 - btrfs_set_inode_generation(path->nodes[0], dst_item, 586 - trans->transid); 587 - } 587 + if (btrfs_inode_generation(dst_eb, dst_item) == 0) 588 + btrfs_set_inode_generation(dst_eb, dst_item, trans->transid); 588 589 } 589 590 no_copy: 590 591 btrfs_release_path(path); ··· 608 613 * simple helper to read an inode off the disk from a given root 609 614 * This can only be called for subvolume roots and not for the log 610 615 */ 611 - static noinline struct inode *read_one_inode(struct btrfs_root *root, 612 - u64 objectid) 616 + static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root, 617 + u64 objectid) 613 618 { 614 - struct inode *inode; 619 + struct btrfs_inode *inode; 615 620 616 621 inode = btrfs_iget_logging(objectid, root); 617 622 if (IS_ERR(inode)) 618 - inode = NULL; 623 + return NULL; 619 624 return inode; 620 625 } 621 626 ··· 644 649 u64 start = key->offset; 645 650 u64 nbytes = 0; 646 651 struct btrfs_file_extent_item *item; 647 - struct inode *inode = NULL; 652 + struct btrfs_inode *inode = NULL; 648 653 unsigned long size; 649 654 int ret = 0; 650 655 ··· 683 688 * file. This must be done before the btrfs_drop_extents run 684 689 * so we don't try to drop this extent. 685 690 */ 686 - ret = btrfs_lookup_file_extent(trans, root, path, 687 - btrfs_ino(BTRFS_I(inode)), start, 0); 691 + ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), start, 0); 688 692 689 693 if (ret == 0 && 690 694 (found_type == BTRFS_FILE_EXTENT_REG || 691 695 found_type == BTRFS_FILE_EXTENT_PREALLOC)) { 692 - struct btrfs_file_extent_item cmp1; 693 - struct btrfs_file_extent_item cmp2; 694 - struct btrfs_file_extent_item *existing; 695 - struct extent_buffer *leaf; 696 + struct btrfs_file_extent_item existing; 697 + unsigned long ptr; 696 698 697 - leaf = path->nodes[0]; 698 - existing = btrfs_item_ptr(leaf, path->slots[0], 699 - struct btrfs_file_extent_item); 700 - 701 - read_extent_buffer(eb, &cmp1, (unsigned long)item, 702 - sizeof(cmp1)); 703 - read_extent_buffer(leaf, &cmp2, (unsigned long)existing, 704 - sizeof(cmp2)); 699 + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 700 + read_extent_buffer(path->nodes[0], &existing, ptr, sizeof(existing)); 705 701 706 702 /* 707 703 * we already have a pointer to this exact extent, 708 704 * we don't have to do anything 709 705 */ 710 - if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { 706 + if (memcmp_extent_buffer(eb, &existing, (unsigned long)item, 707 + sizeof(existing)) == 0) { 711 708 btrfs_release_path(path); 712 709 goto out; 713 710 } ··· 710 723 drop_args.start = start; 711 724 drop_args.end = extent_end; 712 725 drop_args.drop_cache = true; 713 - ret = btrfs_drop_extents(trans, root, BTRFS_I(inode), &drop_args); 726 + ret = btrfs_drop_extents(trans, root, inode, &drop_args); 714 727 if (ret) 715 728 goto out; 716 729 ··· 734 747 (unsigned long)item, sizeof(*item)); 735 748 736 749 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 737 - ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 738 750 ins.type = BTRFS_EXTENT_ITEM_KEY; 751 + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 739 752 offset = key->offset - btrfs_file_extent_offset(eb, item); 740 753 741 754 /* ··· 888 901 goto out; 889 902 } 890 903 891 - ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, 892 - extent_end - start); 904 + ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start); 893 905 if (ret) 894 906 goto out; 895 907 896 908 update_inode: 897 - btrfs_update_inode_bytes(BTRFS_I(inode), nbytes, drop_args.bytes_found); 898 - ret = btrfs_update_inode(trans, BTRFS_I(inode)); 909 + btrfs_update_inode_bytes(inode, nbytes, drop_args.bytes_found); 910 + ret = btrfs_update_inode(trans, inode); 899 911 out: 900 - iput(inode); 912 + iput(&inode->vfs_inode); 901 913 return ret; 902 914 } 903 915 ··· 933 947 struct btrfs_dir_item *di) 934 948 { 935 949 struct btrfs_root *root = dir->root; 936 - struct inode *inode; 950 + struct btrfs_inode *inode; 937 951 struct fscrypt_str name; 938 952 struct extent_buffer *leaf; 939 953 struct btrfs_key location; ··· 958 972 if (ret) 959 973 goto out; 960 974 961 - ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), &name); 975 + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); 962 976 out: 963 977 kfree(name.name); 964 - iput(inode); 978 + iput(&inode->vfs_inode); 965 979 return ret; 966 980 } 967 981 ··· 1134 1148 u32 item_size; 1135 1149 u32 cur_offset = 0; 1136 1150 unsigned long base; 1137 - struct inode *victim_parent; 1151 + struct btrfs_inode *victim_parent; 1138 1152 1139 1153 leaf = path->nodes[0]; 1140 1154 ··· 1174 1188 btrfs_release_path(path); 1175 1189 1176 1190 ret = unlink_inode_for_log_replay(trans, 1177 - BTRFS_I(victim_parent), 1191 + victim_parent, 1178 1192 inode, &victim_name); 1179 1193 } 1180 - iput(victim_parent); 1194 + iput(&victim_parent->vfs_inode); 1181 1195 kfree(victim_name.name); 1182 1196 if (ret) 1183 1197 return ret; ··· 1311 1325 ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name); 1312 1326 1313 1327 if (!ret) { 1314 - struct inode *dir; 1328 + struct btrfs_inode *dir; 1315 1329 1316 1330 btrfs_release_path(path); 1317 1331 dir = read_one_inode(root, parent_id); ··· 1320 1334 kfree(name.name); 1321 1335 goto out; 1322 1336 } 1323 - ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), 1324 - inode, &name); 1337 + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); 1325 1338 kfree(name.name); 1326 - iput(dir); 1339 + iput(&dir->vfs_inode); 1327 1340 if (ret) 1328 1341 goto out; 1329 1342 goto again; ··· 1354 1369 struct extent_buffer *eb, int slot, 1355 1370 struct btrfs_key *key) 1356 1371 { 1357 - struct inode *dir = NULL; 1358 - struct inode *inode = NULL; 1372 + struct btrfs_inode *dir = NULL; 1373 + struct btrfs_inode *inode = NULL; 1359 1374 unsigned long ref_ptr; 1360 1375 unsigned long ref_end; 1361 1376 struct fscrypt_str name = { 0 }; ··· 1420 1435 if (ret) 1421 1436 goto out; 1422 1437 1423 - ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), 1424 - btrfs_ino(BTRFS_I(inode)), ref_index, &name); 1438 + ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 1439 + ref_index, &name); 1425 1440 if (ret < 0) { 1426 1441 goto out; 1427 1442 } else if (ret == 0) { ··· 1432 1447 * overwrite any existing back reference, and we don't 1433 1448 * want to create dangling pointers in the directory. 1434 1449 */ 1435 - ret = __add_inode_ref(trans, root, path, log, 1436 - BTRFS_I(dir), BTRFS_I(inode), 1450 + ret = __add_inode_ref(trans, root, path, log, dir, inode, 1437 1451 inode_objectid, parent_objectid, 1438 1452 ref_index, &name); 1439 1453 if (ret) { ··· 1442 1458 } 1443 1459 1444 1460 /* insert our name */ 1445 - ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), 1446 - &name, 0, ref_index); 1461 + ret = btrfs_add_link(trans, dir, inode, &name, 0, ref_index); 1447 1462 if (ret) 1448 1463 goto out; 1449 1464 1450 - ret = btrfs_update_inode(trans, BTRFS_I(inode)); 1465 + ret = btrfs_update_inode(trans, inode); 1451 1466 if (ret) 1452 1467 goto out; 1453 1468 } ··· 1456 1473 kfree(name.name); 1457 1474 name.name = NULL; 1458 1475 if (log_ref_ver) { 1459 - iput(dir); 1476 + iput(&dir->vfs_inode); 1460 1477 dir = NULL; 1461 1478 } 1462 1479 } ··· 1469 1486 * dir index entries exist for a name but there is no inode reference 1470 1487 * item with the same name. 1471 1488 */ 1472 - ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot, 1473 - key); 1489 + ret = unlink_old_inode_refs(trans, root, path, inode, eb, slot, key); 1474 1490 if (ret) 1475 1491 goto out; 1476 1492 ··· 1478 1496 out: 1479 1497 btrfs_release_path(path); 1480 1498 kfree(name.name); 1481 - iput(dir); 1482 - iput(inode); 1499 + if (dir) 1500 + iput(&dir->vfs_inode); 1501 + if (inode) 1502 + iput(&inode->vfs_inode); 1483 1503 return ret; 1484 1504 } 1485 1505 ··· 1595 1611 * will free the inode. 1596 1612 */ 1597 1613 static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1598 - struct inode *inode) 1614 + struct btrfs_inode *inode) 1599 1615 { 1600 - struct btrfs_root *root = BTRFS_I(inode)->root; 1616 + struct btrfs_root *root = inode->root; 1601 1617 struct btrfs_path *path; 1602 1618 int ret; 1603 1619 u64 nlink = 0; 1604 - u64 ino = btrfs_ino(BTRFS_I(inode)); 1620 + const u64 ino = btrfs_ino(inode); 1605 1621 1606 1622 path = btrfs_alloc_path(); 1607 1623 if (!path) 1608 1624 return -ENOMEM; 1609 1625 1610 - ret = count_inode_refs(BTRFS_I(inode), path); 1626 + ret = count_inode_refs(inode, path); 1611 1627 if (ret < 0) 1612 1628 goto out; 1613 1629 1614 1630 nlink = ret; 1615 1631 1616 - ret = count_inode_extrefs(BTRFS_I(inode), path); 1632 + ret = count_inode_extrefs(inode, path); 1617 1633 if (ret < 0) 1618 1634 goto out; 1619 1635 ··· 1621 1637 1622 1638 ret = 0; 1623 1639 1624 - if (nlink != inode->i_nlink) { 1625 - set_nlink(inode, nlink); 1626 - ret = btrfs_update_inode(trans, BTRFS_I(inode)); 1640 + if (nlink != inode->vfs_inode.i_nlink) { 1641 + set_nlink(&inode->vfs_inode, nlink); 1642 + ret = btrfs_update_inode(trans, inode); 1627 1643 if (ret) 1628 1644 goto out; 1629 1645 } 1630 - if (S_ISDIR(inode->i_mode)) 1631 - BTRFS_I(inode)->index_cnt = (u64)-1; 1646 + if (S_ISDIR(inode->vfs_inode.i_mode)) 1647 + inode->index_cnt = (u64)-1; 1632 1648 1633 - if (inode->i_nlink == 0) { 1634 - if (S_ISDIR(inode->i_mode)) { 1649 + if (inode->vfs_inode.i_nlink == 0) { 1650 + if (S_ISDIR(inode->vfs_inode.i_mode)) { 1635 1651 ret = replay_dir_deletes(trans, root, NULL, path, 1636 1652 ino, 1); 1637 1653 if (ret) ··· 1653 1669 { 1654 1670 int ret; 1655 1671 struct btrfs_key key; 1656 - struct inode *inode; 1657 1672 1658 1673 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1659 1674 key.type = BTRFS_ORPHAN_ITEM_KEY; 1660 1675 key.offset = (u64)-1; 1661 1676 while (1) { 1677 + struct btrfs_inode *inode; 1678 + 1662 1679 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1663 1680 if (ret < 0) 1664 1681 break; ··· 1688 1703 } 1689 1704 1690 1705 ret = fixup_inode_link_count(trans, inode); 1691 - iput(inode); 1706 + iput(&inode->vfs_inode); 1692 1707 if (ret) 1693 1708 break; 1694 1709 ··· 1716 1731 { 1717 1732 struct btrfs_key key; 1718 1733 int ret = 0; 1719 - struct inode *inode; 1734 + struct btrfs_inode *inode; 1735 + struct inode *vfs_inode; 1720 1736 1721 1737 inode = read_one_inode(root, objectid); 1722 1738 if (!inode) 1723 1739 return -EIO; 1724 1740 1741 + vfs_inode = &inode->vfs_inode; 1725 1742 key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; 1726 1743 key.type = BTRFS_ORPHAN_ITEM_KEY; 1727 1744 key.offset = objectid; ··· 1732 1745 1733 1746 btrfs_release_path(path); 1734 1747 if (ret == 0) { 1735 - if (!inode->i_nlink) 1736 - set_nlink(inode, 1); 1748 + if (!vfs_inode->i_nlink) 1749 + set_nlink(vfs_inode, 1); 1737 1750 else 1738 - inc_nlink(inode); 1739 - ret = btrfs_update_inode(trans, BTRFS_I(inode)); 1751 + inc_nlink(vfs_inode); 1752 + ret = btrfs_update_inode(trans, inode); 1740 1753 } else if (ret == -EEXIST) { 1741 1754 ret = 0; 1742 1755 } 1743 - iput(inode); 1756 + iput(vfs_inode); 1744 1757 1745 1758 return ret; 1746 1759 } ··· 1756 1769 const struct fscrypt_str *name, 1757 1770 struct btrfs_key *location) 1758 1771 { 1759 - struct inode *inode; 1760 - struct inode *dir; 1772 + struct btrfs_inode *inode; 1773 + struct btrfs_inode *dir; 1761 1774 int ret; 1762 1775 1763 1776 inode = read_one_inode(root, location->objectid); ··· 1766 1779 1767 1780 dir = read_one_inode(root, dirid); 1768 1781 if (!dir) { 1769 - iput(inode); 1782 + iput(&inode->vfs_inode); 1770 1783 return -EIO; 1771 1784 } 1772 1785 1773 - ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, 1774 - 1, index); 1786 + ret = btrfs_add_link(trans, dir, inode, name, 1, index); 1775 1787 1776 1788 /* FIXME, put inode into FIXUP list */ 1777 1789 1778 - iput(inode); 1779 - iput(dir); 1790 + iput(&inode->vfs_inode); 1791 + iput(&dir->vfs_inode); 1780 1792 return ret; 1781 1793 } 1782 1794 ··· 1837 1851 bool index_dst_matches = false; 1838 1852 struct btrfs_key log_key; 1839 1853 struct btrfs_key search_key; 1840 - struct inode *dir; 1854 + struct btrfs_inode *dir; 1841 1855 u8 log_flags; 1842 1856 bool exists; 1843 1857 int ret; ··· 1867 1881 ret = PTR_ERR(dir_dst_di); 1868 1882 goto out; 1869 1883 } else if (dir_dst_di) { 1870 - ret = delete_conflicting_dir_entry(trans, BTRFS_I(dir), path, 1871 - dir_dst_di, &log_key, 1872 - log_flags, exists); 1884 + ret = delete_conflicting_dir_entry(trans, dir, path, dir_dst_di, 1885 + &log_key, log_flags, exists); 1873 1886 if (ret < 0) 1874 1887 goto out; 1875 1888 dir_dst_matches = (ret == 1); ··· 1883 1898 ret = PTR_ERR(index_dst_di); 1884 1899 goto out; 1885 1900 } else if (index_dst_di) { 1886 - ret = delete_conflicting_dir_entry(trans, BTRFS_I(dir), path, 1887 - index_dst_di, &log_key, 1888 - log_flags, exists); 1901 + ret = delete_conflicting_dir_entry(trans, dir, path, index_dst_di, 1902 + &log_key, log_flags, exists); 1889 1903 if (ret < 0) 1890 1904 goto out; 1891 1905 index_dst_matches = (ret == 1); ··· 1939 1955 1940 1956 out: 1941 1957 if (!ret && update_size) { 1942 - btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name.len * 2); 1943 - ret = btrfs_update_inode(trans, BTRFS_I(dir)); 1958 + btrfs_i_size_write(dir, dir->vfs_inode.i_size + name.len * 2); 1959 + ret = btrfs_update_inode(trans, dir); 1944 1960 } 1945 1961 kfree(name.name); 1946 - iput(dir); 1962 + iput(&dir->vfs_inode); 1947 1963 if (!ret && name_added) 1948 1964 ret = 1; 1949 1965 return ret; ··· 2100 2116 struct btrfs_root *log, 2101 2117 struct btrfs_path *path, 2102 2118 struct btrfs_path *log_path, 2103 - struct inode *dir, 2119 + struct btrfs_inode *dir, 2104 2120 struct btrfs_key *dir_key) 2105 2121 { 2106 - struct btrfs_root *root = BTRFS_I(dir)->root; 2122 + struct btrfs_root *root = dir->root; 2107 2123 int ret; 2108 2124 struct extent_buffer *eb; 2109 2125 int slot; 2110 2126 struct btrfs_dir_item *di; 2111 2127 struct fscrypt_str name = { 0 }; 2112 - struct inode *inode = NULL; 2128 + struct btrfs_inode *inode = NULL; 2113 2129 struct btrfs_key location; 2114 2130 2115 2131 /* ··· 2156 2172 if (ret) 2157 2173 goto out; 2158 2174 2159 - inc_nlink(inode); 2160 - ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode), 2161 - &name); 2175 + inc_nlink(&inode->vfs_inode); 2176 + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); 2162 2177 /* 2163 2178 * Unlike dir item keys, dir index keys can only have one name (entry) in 2164 2179 * them, as there are no key collisions since each key has a unique offset ··· 2167 2184 btrfs_release_path(path); 2168 2185 btrfs_release_path(log_path); 2169 2186 kfree(name.name); 2170 - iput(inode); 2187 + if (inode) 2188 + iput(&inode->vfs_inode); 2171 2189 return ret; 2172 2190 } 2173 2191 ··· 2292 2308 struct btrfs_key dir_key; 2293 2309 struct btrfs_key found_key; 2294 2310 struct btrfs_path *log_path; 2295 - struct inode *dir; 2311 + struct btrfs_inode *dir; 2296 2312 2297 2313 dir_key.objectid = dirid; 2298 2314 dir_key.type = BTRFS_DIR_INDEX_KEY; ··· 2369 2385 out: 2370 2386 btrfs_release_path(path); 2371 2387 btrfs_free_path(log_path); 2372 - iput(dir); 2388 + iput(&dir->vfs_inode); 2373 2389 return ret; 2374 2390 } 2375 2391 ··· 2463 2479 */ 2464 2480 if (S_ISREG(mode)) { 2465 2481 struct btrfs_drop_extents_args drop_args = { 0 }; 2466 - struct inode *inode; 2482 + struct btrfs_inode *inode; 2467 2483 u64 from; 2468 2484 2469 2485 inode = read_one_inode(root, key.objectid); ··· 2471 2487 ret = -EIO; 2472 2488 break; 2473 2489 } 2474 - from = ALIGN(i_size_read(inode), 2490 + from = ALIGN(i_size_read(&inode->vfs_inode), 2475 2491 root->fs_info->sectorsize); 2476 2492 drop_args.start = from; 2477 2493 drop_args.end = (u64)-1; 2478 2494 drop_args.drop_cache = true; 2479 - ret = btrfs_drop_extents(wc->trans, root, 2480 - BTRFS_I(inode), 2495 + ret = btrfs_drop_extents(wc->trans, root, inode, 2481 2496 &drop_args); 2482 2497 if (!ret) { 2483 - inode_sub_bytes(inode, 2498 + inode_sub_bytes(&inode->vfs_inode, 2484 2499 drop_args.bytes_found); 2485 2500 /* Update the inode's nbytes. */ 2486 - ret = btrfs_update_inode(wc->trans, 2487 - BTRFS_I(inode)); 2501 + ret = btrfs_update_inode(wc->trans, inode); 2488 2502 } 2489 - iput(inode); 2503 + iput(&inode->vfs_inode); 2490 2504 if (ret) 2491 2505 break; 2492 2506 } ··· 3542 3560 struct btrfs_dir_log_item *item; 3543 3561 3544 3562 key.objectid = dirid; 3545 - key.offset = first_offset; 3546 3563 key.type = BTRFS_DIR_LOG_INDEX_KEY; 3564 + key.offset = first_offset; 3547 3565 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); 3548 3566 /* 3549 3567 * -EEXIST is fine and can happen sporadically when we are logging a ··· 5463 5481 ihold(&curr_inode->vfs_inode); 5464 5482 5465 5483 while (true) { 5466 - struct inode *vfs_inode; 5467 5484 struct btrfs_key key; 5468 5485 struct btrfs_key found_key; 5469 5486 u64 next_index; ··· 5478 5497 struct extent_buffer *leaf = path->nodes[0]; 5479 5498 struct btrfs_dir_item *di; 5480 5499 struct btrfs_key di_key; 5481 - struct inode *di_inode; 5500 + struct btrfs_inode *di_inode; 5482 5501 int log_mode = LOG_INODE_EXISTS; 5483 5502 int type; 5484 5503 ··· 5505 5524 goto out; 5506 5525 } 5507 5526 5508 - if (!need_log_inode(trans, BTRFS_I(di_inode))) { 5509 - btrfs_add_delayed_iput(BTRFS_I(di_inode)); 5527 + if (!need_log_inode(trans, di_inode)) { 5528 + btrfs_add_delayed_iput(di_inode); 5510 5529 break; 5511 5530 } 5512 5531 5513 5532 ctx->log_new_dentries = false; 5514 5533 if (type == BTRFS_FT_DIR) 5515 5534 log_mode = LOG_INODE_ALL; 5516 - ret = btrfs_log_inode(trans, BTRFS_I(di_inode), 5517 - log_mode, ctx); 5518 - btrfs_add_delayed_iput(BTRFS_I(di_inode)); 5535 + ret = btrfs_log_inode(trans, di_inode, log_mode, ctx); 5536 + btrfs_add_delayed_iput(di_inode); 5519 5537 if (ret) 5520 5538 goto out; 5521 5539 if (ctx->log_new_dentries) { ··· 5556 5576 kfree(dir_elem); 5557 5577 5558 5578 btrfs_add_delayed_iput(curr_inode); 5559 - curr_inode = NULL; 5560 5579 5561 - vfs_inode = btrfs_iget_logging(ino, root); 5562 - if (IS_ERR(vfs_inode)) { 5563 - ret = PTR_ERR(vfs_inode); 5580 + curr_inode = btrfs_iget_logging(ino, root); 5581 + if (IS_ERR(curr_inode)) { 5582 + ret = PTR_ERR(curr_inode); 5583 + curr_inode = NULL; 5564 5584 break; 5565 5585 } 5566 - curr_inode = BTRFS_I(vfs_inode); 5567 5586 } 5568 5587 out: 5569 5588 btrfs_free_path(path); ··· 5640 5661 struct btrfs_log_ctx *ctx) 5641 5662 { 5642 5663 struct btrfs_ino_list *ino_elem; 5643 - struct inode *inode; 5664 + struct btrfs_inode *inode; 5644 5665 5645 5666 /* 5646 5667 * It's rare to have a lot of conflicting inodes, in practice it is not ··· 5731 5752 * inode in LOG_INODE_EXISTS mode and rename operations update the log, 5732 5753 * so that the log ends up with the new name and without the old name. 5733 5754 */ 5734 - if (!need_log_inode(trans, BTRFS_I(inode))) { 5735 - btrfs_add_delayed_iput(BTRFS_I(inode)); 5755 + if (!need_log_inode(trans, inode)) { 5756 + btrfs_add_delayed_iput(inode); 5736 5757 return 0; 5737 5758 } 5738 5759 5739 - btrfs_add_delayed_iput(BTRFS_I(inode)); 5760 + btrfs_add_delayed_iput(inode); 5740 5761 5741 5762 ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS); 5742 5763 if (!ino_elem) ··· 5772 5793 */ 5773 5794 while (!list_empty(&ctx->conflict_inodes)) { 5774 5795 struct btrfs_ino_list *curr; 5775 - struct inode *inode; 5796 + struct btrfs_inode *inode; 5776 5797 u64 ino; 5777 5798 u64 parent; 5778 5799 ··· 5808 5829 * dir index key range logged for the directory. So we 5809 5830 * must make sure the deletion is recorded. 5810 5831 */ 5811 - ret = btrfs_log_inode(trans, BTRFS_I(inode), 5812 - LOG_INODE_ALL, ctx); 5813 - btrfs_add_delayed_iput(BTRFS_I(inode)); 5832 + ret = btrfs_log_inode(trans, inode, LOG_INODE_ALL, ctx); 5833 + btrfs_add_delayed_iput(inode); 5814 5834 if (ret) 5815 5835 break; 5816 5836 continue; ··· 5825 5847 * it again because if some other task logged the inode after 5826 5848 * that, we can avoid doing it again. 5827 5849 */ 5828 - if (!need_log_inode(trans, BTRFS_I(inode))) { 5829 - btrfs_add_delayed_iput(BTRFS_I(inode)); 5850 + if (!need_log_inode(trans, inode)) { 5851 + btrfs_add_delayed_iput(inode); 5830 5852 continue; 5831 5853 } 5832 5854 ··· 5837 5859 * well because during a rename we pin the log and update the 5838 5860 * log with the new name before we unpin it. 5839 5861 */ 5840 - ret = btrfs_log_inode(trans, BTRFS_I(inode), LOG_INODE_EXISTS, ctx); 5841 - btrfs_add_delayed_iput(BTRFS_I(inode)); 5862 + ret = btrfs_log_inode(trans, inode, LOG_INODE_EXISTS, ctx); 5863 + btrfs_add_delayed_iput(inode); 5842 5864 if (ret) 5843 5865 break; 5844 5866 } ··· 6329 6351 6330 6352 list_for_each_entry(item, delayed_ins_list, log_list) { 6331 6353 struct btrfs_dir_item *dir_item; 6332 - struct inode *di_inode; 6354 + struct btrfs_inode *di_inode; 6333 6355 struct btrfs_key key; 6334 6356 int log_mode = LOG_INODE_EXISTS; 6335 6357 ··· 6345 6367 break; 6346 6368 } 6347 6369 6348 - if (!need_log_inode(trans, BTRFS_I(di_inode))) { 6349 - btrfs_add_delayed_iput(BTRFS_I(di_inode)); 6370 + if (!need_log_inode(trans, di_inode)) { 6371 + btrfs_add_delayed_iput(di_inode); 6350 6372 continue; 6351 6373 } 6352 6374 ··· 6354 6376 log_mode = LOG_INODE_ALL; 6355 6377 6356 6378 ctx->log_new_dentries = false; 6357 - ret = btrfs_log_inode(trans, BTRFS_I(di_inode), log_mode, ctx); 6379 + ret = btrfs_log_inode(trans, di_inode, log_mode, ctx); 6358 6380 6359 6381 if (!ret && ctx->log_new_dentries) 6360 - ret = log_new_dir_dentries(trans, BTRFS_I(di_inode), ctx); 6382 + ret = log_new_dir_dentries(trans, di_inode, ctx); 6361 6383 6362 - btrfs_add_delayed_iput(BTRFS_I(di_inode)); 6384 + btrfs_add_delayed_iput(di_inode); 6363 6385 6364 6386 if (ret) 6365 6387 break; ··· 6767 6789 ptr = btrfs_item_ptr_offset(leaf, slot); 6768 6790 while (cur_offset < item_size) { 6769 6791 struct btrfs_key inode_key; 6770 - struct inode *dir_inode; 6792 + struct btrfs_inode *dir_inode; 6771 6793 6772 6794 inode_key.type = BTRFS_INODE_ITEM_KEY; 6773 6795 inode_key.offset = 0; ··· 6816 6838 goto out; 6817 6839 } 6818 6840 6819 - if (!need_log_inode(trans, BTRFS_I(dir_inode))) { 6820 - btrfs_add_delayed_iput(BTRFS_I(dir_inode)); 6841 + if (!need_log_inode(trans, dir_inode)) { 6842 + btrfs_add_delayed_iput(dir_inode); 6821 6843 continue; 6822 6844 } 6823 6845 6824 6846 ctx->log_new_dentries = false; 6825 - ret = btrfs_log_inode(trans, BTRFS_I(dir_inode), 6826 - LOG_INODE_ALL, ctx); 6847 + ret = btrfs_log_inode(trans, dir_inode, LOG_INODE_ALL, ctx); 6827 6848 if (!ret && ctx->log_new_dentries) 6828 - ret = log_new_dir_dentries(trans, 6829 - BTRFS_I(dir_inode), ctx); 6830 - btrfs_add_delayed_iput(BTRFS_I(dir_inode)); 6849 + ret = log_new_dir_dentries(trans, dir_inode, ctx); 6850 + btrfs_add_delayed_iput(dir_inode); 6831 6851 if (ret) 6832 6852 goto out; 6833 6853 } ··· 6850 6874 struct extent_buffer *leaf; 6851 6875 int slot; 6852 6876 struct btrfs_key search_key; 6853 - struct inode *inode; 6877 + struct btrfs_inode *inode; 6854 6878 u64 ino; 6855 6879 int ret = 0; 6856 6880 ··· 6865 6889 if (IS_ERR(inode)) 6866 6890 return PTR_ERR(inode); 6867 6891 6868 - if (BTRFS_I(inode)->generation >= trans->transid && 6869 - need_log_inode(trans, BTRFS_I(inode))) 6870 - ret = btrfs_log_inode(trans, BTRFS_I(inode), 6871 - LOG_INODE_EXISTS, ctx); 6872 - btrfs_add_delayed_iput(BTRFS_I(inode)); 6892 + if (inode->generation >= trans->transid && 6893 + need_log_inode(trans, inode)) 6894 + ret = btrfs_log_inode(trans, inode, LOG_INODE_EXISTS, ctx); 6895 + btrfs_add_delayed_iput(inode); 6873 6896 if (ret) 6874 6897 return ret; 6875 6898 ··· 7036 7061 struct btrfs_root *root = inode->root; 7037 7062 struct btrfs_fs_info *fs_info = root->fs_info; 7038 7063 int ret = 0; 7039 - bool log_dentries = false; 7064 + bool log_dentries; 7040 7065 7041 - if (btrfs_test_opt(fs_info, NOTREELOG)) { 7042 - ret = BTRFS_LOG_FORCE_COMMIT; 7043 - goto end_no_trans; 7044 - } 7066 + if (btrfs_test_opt(fs_info, NOTREELOG)) 7067 + return BTRFS_LOG_FORCE_COMMIT; 7045 7068 7046 - if (btrfs_root_refs(&root->root_item) == 0) { 7047 - ret = BTRFS_LOG_FORCE_COMMIT; 7048 - goto end_no_trans; 7049 - } 7069 + if (btrfs_root_refs(&root->root_item) == 0) 7070 + return BTRFS_LOG_FORCE_COMMIT; 7050 7071 7051 7072 /* 7052 7073 * If we're logging an inode from a subvolume created in the current 7053 7074 * transaction we must force a commit since the root is not persisted. 7054 7075 */ 7055 - if (btrfs_root_generation(&root->root_item) == trans->transid) { 7056 - ret = BTRFS_LOG_FORCE_COMMIT; 7057 - goto end_no_trans; 7058 - } 7076 + if (btrfs_root_generation(&root->root_item) == trans->transid) 7077 + return BTRFS_LOG_FORCE_COMMIT; 7059 7078 7060 7079 /* 7061 7080 * Skip already logged inodes or inodes corresponding to tmpfiles ··· 7058 7089 */ 7059 7090 if ((btrfs_inode_in_log(inode, trans->transid) && 7060 7091 list_empty(&ctx->ordered_extents)) || 7061 - inode->vfs_inode.i_nlink == 0) { 7062 - ret = BTRFS_NO_LOG_SYNC; 7063 - goto end_no_trans; 7064 - } 7092 + inode->vfs_inode.i_nlink == 0) 7093 + return BTRFS_NO_LOG_SYNC; 7065 7094 7066 7095 ret = start_log_trans(trans, root, ctx); 7067 7096 if (ret) 7068 - goto end_no_trans; 7097 + return ret; 7069 7098 7070 7099 ret = btrfs_log_inode(trans, inode, inode_only, ctx); 7071 7100 if (ret) ··· 7082 7115 goto end_trans; 7083 7116 } 7084 7117 7085 - if (S_ISDIR(inode->vfs_inode.i_mode) && ctx->log_new_dentries) 7086 - log_dentries = true; 7118 + /* 7119 + * Track if we need to log dentries because ctx->log_new_dentries can 7120 + * be modified in the call chains below. 7121 + */ 7122 + log_dentries = ctx->log_new_dentries; 7087 7123 7088 7124 /* 7089 7125 * On unlink we must make sure all our current and old parent directory ··· 7141 7171 7142 7172 if (log_dentries) 7143 7173 ret = log_new_dir_dentries(trans, inode, ctx); 7144 - else 7145 - ret = 0; 7146 7174 end_trans: 7147 7175 if (ret < 0) { 7148 7176 btrfs_set_log_full_commit(trans); ··· 7150 7182 if (ret) 7151 7183 btrfs_remove_log_ctx(root, ctx); 7152 7184 btrfs_end_log_trans(root); 7153 - end_no_trans: 7185 + 7154 7186 return ret; 7155 7187 } 7156 7188 ··· 7215 7247 7216 7248 again: 7217 7249 key.objectid = BTRFS_TREE_LOG_OBJECTID; 7218 - key.offset = (u64)-1; 7219 7250 key.type = BTRFS_ROOT_ITEM_KEY; 7251 + key.offset = (u64)-1; 7220 7252 7221 7253 while (1) { 7222 7254 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);

+2 -2

fs/btrfs/verity.c

··· 485 485 goto out; 486 486 } 487 487 inode->ro_flags &= ~BTRFS_INODE_RO_VERITY; 488 - btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode); 488 + btrfs_sync_inode_flags_to_i_flags(inode); 489 489 ret = btrfs_update_inode(trans, inode); 490 490 if (ret) { 491 491 btrfs_abort_transaction(trans, ret); ··· 552 552 goto out; 553 553 } 554 554 inode->ro_flags |= BTRFS_INODE_RO_VERITY; 555 - btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode); 555 + btrfs_sync_inode_flags_to_i_flags(inode); 556 556 ret = btrfs_update_inode(trans, inode); 557 557 if (ret) 558 558 goto end_trans;

+8 -8

fs/btrfs/volumes.c

··· 1798 1798 path->skip_locking = 1; 1799 1799 1800 1800 key.objectid = device->devid; 1801 - key.offset = search_start; 1802 1801 key.type = BTRFS_DEV_EXTENT_KEY; 1802 + key.offset = search_start; 1803 1803 1804 1804 ret = btrfs_search_backwards(root, &key, path); 1805 1805 if (ret < 0) ··· 1918 1918 return -ENOMEM; 1919 1919 1920 1920 key.objectid = device->devid; 1921 - key.offset = start; 1922 1921 key.type = BTRFS_DEV_EXTENT_KEY; 1922 + key.offset = start; 1923 1923 again: 1924 1924 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1925 1925 if (ret > 0) { ··· 2721 2721 return -ENOMEM; 2722 2722 2723 2723 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 2724 - key.offset = 0; 2725 2724 key.type = BTRFS_DEV_ITEM_KEY; 2725 + key.offset = 0; 2726 2726 2727 2727 while (1) { 2728 2728 btrfs_reserve_chunk_metadata(trans, false); ··· 3119 3119 return -ENOMEM; 3120 3120 3121 3121 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 3122 - key.offset = chunk_offset; 3123 3122 key.type = BTRFS_CHUNK_ITEM_KEY; 3123 + key.offset = chunk_offset; 3124 3124 3125 3125 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 3126 3126 if (ret < 0) ··· 3577 3577 3578 3578 again: 3579 3579 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 3580 - key.offset = (u64)-1; 3581 3580 key.type = BTRFS_CHUNK_ITEM_KEY; 3581 + key.offset = (u64)-1; 3582 3582 3583 3583 while (1) { 3584 3584 mutex_lock(&fs_info->reclaim_bgs_lock); ··· 4184 4184 bctl->sys.limit = limit_sys; 4185 4185 } 4186 4186 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 4187 - key.offset = (u64)-1; 4188 4187 key.type = BTRFS_CHUNK_ITEM_KEY; 4188 + key.offset = (u64)-1; 4189 4189 4190 4190 while (1) { 4191 4191 if ((!counting && atomic_read(&fs_info->balance_pause_req)) || ··· 5001 5001 5002 5002 again: 5003 5003 key.objectid = device->devid; 5004 - key.offset = (u64)-1; 5005 5004 key.type = BTRFS_DEV_EXTENT_KEY; 5005 + key.offset = (u64)-1; 5006 5006 5007 5007 do { 5008 5008 mutex_lock(&fs_info->reclaim_bgs_lock); ··· 7539 7539 * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). 7540 7540 */ 7541 7541 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 7542 - key.offset = 0; 7543 7542 key.type = 0; 7543 + key.offset = 0; 7544 7544 btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) { 7545 7545 struct extent_buffer *node = path->nodes[1]; 7546 7546

+4

fs/btrfs/volumes.h

··· 7 7 #define BTRFS_VOLUMES_H 8 8 9 9 #include <linux/blk_types.h> 10 + #include <linux/blkdev.h> 10 11 #include <linux/sizes.h> 11 12 #include <linux/atomic.h> 12 13 #include <linux/sort.h> ··· 19 18 #include <linux/completion.h> 20 19 #include <linux/rbtree.h> 21 20 #include <uapi/linux/btrfs.h> 21 + #include <uapi/linux/btrfs_tree.h> 22 22 #include "messages.h" 23 23 #include "rcu-string.h" 24 + #include "extent-io-tree.h" 24 25 25 26 struct block_device; 26 27 struct bdev_handle; 27 28 struct btrfs_fs_info; 28 29 struct btrfs_block_group; 29 30 struct btrfs_trans_handle; 31 + struct btrfs_transaction; 30 32 struct btrfs_zoned_device_info; 31 33 32 34 #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)

+2

fs/btrfs/xattr.h

··· 6 6 #ifndef BTRFS_XATTR_H 7 7 #define BTRFS_XATTR_H 8 8 9 + #include <linux/types.h> 10 + 9 11 struct dentry; 10 12 struct inode; 11 13 struct qstr;

+55 -28

fs/btrfs/zlib.c

··· 94 94 return ERR_PTR(-ENOMEM); 95 95 } 96 96 97 + /* 98 + * Helper for S390x with hardware zlib compression support. 99 + * 100 + * That hardware acceleration requires a buffer size larger than a single page 101 + * to get ideal performance, thus we need to do the memory copy rather than 102 + * use the page cache directly as input buffer. 103 + */ 104 + static int copy_data_into_buffer(struct address_space *mapping, 105 + struct workspace *workspace, u64 filepos, 106 + unsigned long length) 107 + { 108 + u64 cur = filepos; 109 + 110 + /* It's only for hardware accelerated zlib code. */ 111 + ASSERT(zlib_deflate_dfltcc_enabled()); 112 + 113 + while (cur < filepos + length) { 114 + struct folio *folio; 115 + void *data_in; 116 + unsigned int offset; 117 + unsigned long copy_length; 118 + int ret; 119 + 120 + ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio); 121 + if (ret < 0) 122 + return ret; 123 + /* No large folio support yet. */ 124 + ASSERT(!folio_test_large(folio)); 125 + 126 + offset = offset_in_folio(folio, cur); 127 + copy_length = min(folio_size(folio) - offset, 128 + filepos + length - cur); 129 + 130 + data_in = kmap_local_folio(folio, offset); 131 + memcpy(workspace->buf + cur - filepos, data_in, copy_length); 132 + kunmap_local(data_in); 133 + cur += copy_length; 134 + } 135 + return 0; 136 + } 137 + 97 138 int zlib_compress_folios(struct list_head *ws, struct address_space *mapping, 98 139 u64 start, struct folio **folios, unsigned long *out_folios, 99 140 unsigned long *total_in, unsigned long *total_out) ··· 146 105 int nr_folios = 0; 147 106 struct folio *in_folio = NULL; 148 107 struct folio *out_folio = NULL; 149 - unsigned long bytes_left; 150 - unsigned int in_buf_folios; 151 108 unsigned long len = *total_out; 152 109 unsigned long nr_dest_folios = *out_folios; 153 110 const unsigned long max_out = nr_dest_folios * PAGE_SIZE; ··· 189 150 * the workspace buffer if required. 190 151 */ 191 152 if (workspace->strm.avail_in == 0) { 192 - bytes_left = len - workspace->strm.total_in; 193 - in_buf_folios = min(DIV_ROUND_UP(bytes_left, PAGE_SIZE), 194 - workspace->buf_size / PAGE_SIZE); 195 - if (in_buf_folios > 1) { 196 - int i; 153 + unsigned long bytes_left = len - workspace->strm.total_in; 154 + unsigned int copy_length = min(bytes_left, workspace->buf_size); 197 155 198 - /* S390 hardware acceleration path, not subpage. */ 199 - ASSERT(!btrfs_is_subpage( 200 - inode_to_fs_info(mapping->host), 201 - mapping)); 202 - for (i = 0; i < in_buf_folios; i++) { 203 - if (data_in) { 204 - kunmap_local(data_in); 205 - folio_put(in_folio); 206 - data_in = NULL; 207 - } 208 - ret = btrfs_compress_filemap_get_folio(mapping, 209 - start, &in_folio); 210 - if (ret < 0) 211 - goto out; 212 - data_in = kmap_local_folio(in_folio, 0); 213 - copy_page(workspace->buf + i * PAGE_SIZE, 214 - data_in); 215 - start += PAGE_SIZE; 216 - } 156 + /* 157 + * This can only happen when hardware zlib compression is 158 + * enabled. 159 + */ 160 + if (copy_length > PAGE_SIZE) { 161 + ret = copy_data_into_buffer(mapping, workspace, 162 + start, copy_length); 163 + if (ret < 0) 164 + goto out; 165 + start += copy_length; 217 166 workspace->strm.next_in = workspace->buf; 218 - workspace->strm.avail_in = min(bytes_left, 219 - in_buf_folios << PAGE_SHIFT); 167 + workspace->strm.avail_in = copy_length; 220 168 } else { 221 169 unsigned int pg_off; 222 170 unsigned int cur_len; ··· 489 463 490 464 const struct btrfs_compress_op btrfs_zlib_compress = { 491 465 .workspace_manager = &wsm, 466 + .min_level = 1, 492 467 .max_level = 9, 493 468 .default_level = BTRFS_ZLIB_DEFAULT_LEVEL, 494 469 };

+9

fs/btrfs/zoned.c

··· 2111 2111 physical = map->stripes[i].physical; 2112 2112 zinfo = device->zone_info; 2113 2113 2114 + if (!device->bdev) 2115 + continue; 2116 + 2114 2117 if (zinfo->max_active_zones == 0) 2115 2118 continue; 2116 2119 ··· 2275 2272 struct btrfs_zoned_device_info *zinfo = device->zone_info; 2276 2273 unsigned int nofs_flags; 2277 2274 2275 + if (!device->bdev) 2276 + continue; 2277 + 2278 2278 if (zinfo->max_active_zones == 0) 2279 2279 continue; 2280 2280 ··· 2330 2324 2331 2325 if (!btrfs_is_zoned(fs_info)) 2332 2326 return true; 2327 + 2328 + if (test_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags)) 2329 + return false; 2333 2330 2334 2331 /* Check if there is a device with active zones left */ 2335 2332 mutex_lock(&fs_info->chunk_mutex);

+36 -30

fs/btrfs/zstd.c

··· 26 26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17 27 27 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) 28 28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3 29 + #define ZSTD_BTRFS_MIN_LEVEL -15 29 30 #define ZSTD_BTRFS_MAX_LEVEL 15 30 31 /* 307s to avoid pathologically clashing with transaction commit */ 31 32 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ) 32 33 33 - static zstd_parameters zstd_get_btrfs_parameters(unsigned int level, 34 + static zstd_parameters zstd_get_btrfs_parameters(int level, 34 35 size_t src_len) 35 36 { 36 37 zstd_parameters params = zstd_get_params(level, src_len); ··· 46 45 void *mem; 47 46 size_t size; 48 47 char *buf; 49 - unsigned int level; 50 - unsigned int req_level; 48 + int level; 49 + int req_level; 51 50 unsigned long last_used; /* jiffies */ 52 51 struct list_head list; 53 52 struct list_head lru_list; 54 53 zstd_in_buffer in_buf; 55 54 zstd_out_buffer out_buf; 55 + zstd_parameters params; 56 56 }; 57 57 58 58 /* ··· 95 93 return container_of(list, struct workspace, list); 96 94 } 97 95 98 - void zstd_free_workspace(struct list_head *ws); 99 - struct list_head *zstd_alloc_workspace(unsigned int level); 96 + static inline int clip_level(int level) 97 + { 98 + return max(0, level - 1); 99 + } 100 100 101 101 /* 102 102 * Timer callback to free unused workspaces. ··· 127 123 list_for_each_prev_safe(pos, next, &wsm.lru_list) { 128 124 struct workspace *victim = container_of(pos, struct workspace, 129 125 lru_list); 130 - unsigned int level; 126 + int level; 131 127 132 128 if (time_after(victim->last_used, reclaim_threshold)) 133 129 break; ··· 141 137 list_del(&victim->list); 142 138 zstd_free_workspace(&victim->list); 143 139 144 - if (list_empty(&wsm.idle_ws[level - 1])) 145 - clear_bit(level - 1, &wsm.active_map); 140 + if (list_empty(&wsm.idle_ws[level])) 141 + clear_bit(level, &wsm.active_map); 146 142 147 143 } 148 144 ··· 164 160 static void zstd_calc_ws_mem_sizes(void) 165 161 { 166 162 size_t max_size = 0; 167 - unsigned int level; 163 + int level; 168 164 169 - for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) { 165 + for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) { 166 + if (level == 0) 167 + continue; 170 168 zstd_parameters params = 171 169 zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT); 172 170 size_t level_size = ··· 177 171 zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT)); 178 172 179 173 max_size = max_t(size_t, max_size, level_size); 180 - zstd_ws_mem_sizes[level - 1] = max_size; 174 + /* Use level 1 workspace size for all the fast mode negative levels. */ 175 + zstd_ws_mem_sizes[clip_level(level)] = max_size; 181 176 } 182 177 } 183 178 ··· 240 233 * offer the opportunity to reclaim the workspace in favor of allocating an 241 234 * appropriately sized one in the future. 242 235 */ 243 - static struct list_head *zstd_find_workspace(unsigned int level) 236 + static struct list_head *zstd_find_workspace(int level) 244 237 { 245 238 struct list_head *ws; 246 239 struct workspace *workspace; 247 - int i = level - 1; 240 + int i = clip_level(level); 248 241 249 242 spin_lock_bh(&wsm.lock); 250 243 for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) { ··· 254 247 list_del_init(ws); 255 248 /* keep its place if it's a lower level using this */ 256 249 workspace->req_level = level; 257 - if (level == workspace->level) 250 + if (clip_level(level) == workspace->level) 258 251 list_del(&workspace->lru_list); 259 252 if (list_empty(&wsm.idle_ws[i])) 260 253 clear_bit(i, &wsm.active_map); ··· 277 270 * attempt to allocate a new workspace. If we fail to allocate one due to 278 271 * memory pressure, go to sleep waiting for the max level workspace to free up. 279 272 */ 280 - struct list_head *zstd_get_workspace(unsigned int level) 273 + struct list_head *zstd_get_workspace(int level) 281 274 { 282 275 struct list_head *ws; 283 276 unsigned int nofs_flag; ··· 326 319 spin_lock_bh(&wsm.lock); 327 320 328 321 /* A node is only taken off the lru if we are the corresponding level */ 329 - if (workspace->req_level == workspace->level) { 322 + if (clip_level(workspace->req_level) == workspace->level) { 330 323 /* Hide a max level workspace from reclaim */ 331 324 if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) { 332 325 INIT_LIST_HEAD(&workspace->lru_list); ··· 339 332 } 340 333 } 341 334 342 - set_bit(workspace->level - 1, &wsm.active_map); 343 - list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]); 335 + set_bit(workspace->level, &wsm.active_map); 336 + list_add(&workspace->list, &wsm.idle_ws[workspace->level]); 344 337 workspace->req_level = 0; 345 338 346 339 spin_unlock_bh(&wsm.lock); 347 340 348 - if (workspace->level == ZSTD_BTRFS_MAX_LEVEL) 341 + if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL)) 349 342 cond_wake_up(&wsm.wait); 350 343 } 351 344 ··· 358 351 kfree(workspace); 359 352 } 360 353 361 - struct list_head *zstd_alloc_workspace(unsigned int level) 354 + struct list_head *zstd_alloc_workspace(int level) 362 355 { 363 356 struct workspace *workspace; 364 357 ··· 366 359 if (!workspace) 367 360 return ERR_PTR(-ENOMEM); 368 361 369 - workspace->size = zstd_ws_mem_sizes[level - 1]; 370 - workspace->level = level; 362 + /* Use level 1 workspace size for all the fast mode negative levels. */ 363 + workspace->size = zstd_ws_mem_sizes[clip_level(level)]; 364 + workspace->level = clip_level(level); 371 365 workspace->req_level = level; 372 366 workspace->last_used = jiffies; 373 367 workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN); ··· 401 393 const unsigned long nr_dest_folios = *out_folios; 402 394 const u64 orig_end = start + len; 403 395 unsigned long max_out = nr_dest_folios * PAGE_SIZE; 404 - unsigned int pg_off; 405 396 unsigned int cur_len; 406 - zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level, 407 - len); 408 397 398 + workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len); 409 399 *out_folios = 0; 410 400 *total_out = 0; 411 401 *total_in = 0; 412 402 413 403 /* Initialize the stream */ 414 - stream = zstd_init_cstream(&params, len, workspace->mem, 404 + stream = zstd_init_cstream(&workspace->params, len, workspace->mem, 415 405 workspace->size); 416 406 if (unlikely(!stream)) { 417 407 struct btrfs_inode *inode = BTRFS_I(mapping->host); ··· 426 420 ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio); 427 421 if (ret < 0) 428 422 goto out; 429 - pg_off = offset_in_page(start); 430 423 cur_len = btrfs_calc_input_length(orig_end, start); 431 - workspace->in_buf.src = kmap_local_folio(in_folio, pg_off); 424 + workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_page(start)); 432 425 workspace->in_buf.pos = 0; 433 426 workspace->in_buf.size = cur_len; 434 427 ··· 511 506 ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio); 512 507 if (ret < 0) 513 508 goto out; 514 - pg_off = offset_in_page(start); 515 509 cur_len = btrfs_calc_input_length(orig_end, start); 516 - workspace->in_buf.src = kmap_local_folio(in_folio, pg_off); 510 + workspace->in_buf.src = kmap_local_folio(in_folio, 511 + offset_in_page(start)); 517 512 workspace->in_buf.pos = 0; 518 513 workspace->in_buf.size = cur_len; 519 514 } ··· 722 717 const struct btrfs_compress_op btrfs_zstd_compress = { 723 718 /* ZSTD uses own workspace manager */ 724 719 .workspace_manager = NULL, 720 + .min_level = ZSTD_BTRFS_MIN_LEVEL, 725 721 .max_level = ZSTD_BTRFS_MAX_LEVEL, 726 722 .default_level = ZSTD_BTRFS_DEFAULT_LEVEL, 727 723 };

+13 -3

include/uapi/linux/btrfs.h

··· 615 615 */ 616 616 #define BTRFS_DEFRAG_RANGE_COMPRESS 1 617 617 #define BTRFS_DEFRAG_RANGE_START_IO 2 618 + #define BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL 4 618 619 #define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ 620 + BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL | \ 619 621 BTRFS_DEFRAG_RANGE_START_IO) 620 622 621 623 struct btrfs_ioctl_defrag_range_args { ··· 642 640 643 641 /* 644 642 * which compression method to use if turning on compression 645 - * for this defrag operation. If unspecified, zlib will 646 - * be used 643 + * for this defrag operation. If unspecified, zlib will be 644 + * used. If compression level is also being specified, set the 645 + * BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL flag and fill the compress 646 + * member structure instead of the compress_type field. 647 647 */ 648 - __u32 compress_type; 648 + union { 649 + __u32 compress_type; 650 + struct { 651 + __u8 type; 652 + __s8 level; 653 + } compress; 654 + }; 649 655 650 656 /* spare for later */ 651 657 __u32 unused[4];