btrfs: move fiemap code into its own file

+1 -1

fs/btrfs/Makefile

··· 33 33 uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ 34 34 block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ 35 35 subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \ 36 - lru_cache.o raid-stripe-tree.o 36 + lru_cache.o raid-stripe-tree.o fiemap.o 37 37 38 38 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 39 39 btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o

-871

fs/btrfs/extent_io.c

··· 2470 2470 return try_release_extent_state(io_tree, page, mask); 2471 2471 } 2472 2472 2473 - struct btrfs_fiemap_entry { 2474 - u64 offset; 2475 - u64 phys; 2476 - u64 len; 2477 - u32 flags; 2478 - }; 2479 - 2480 - /* 2481 - * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file 2482 - * range from the inode's io tree, unlock the subvolume tree search path, flush 2483 - * the fiemap cache and relock the file range and research the subvolume tree. 2484 - * The value here is something negative that can't be confused with a valid 2485 - * errno value and different from 1 because that's also a return value from 2486 - * fiemap_fill_next_extent() and also it's often used to mean some btree search 2487 - * did not find a key, so make it some distinct negative value. 2488 - */ 2489 - #define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1)) 2490 - 2491 - /* 2492 - * Used to: 2493 - * 2494 - * - Cache the next entry to be emitted to the fiemap buffer, so that we can 2495 - * merge extents that are contiguous and can be grouped as a single one; 2496 - * 2497 - * - Store extents ready to be written to the fiemap buffer in an intermediary 2498 - * buffer. This intermediary buffer is to ensure that in case the fiemap 2499 - * buffer is memory mapped to the fiemap target file, we don't deadlock 2500 - * during btrfs_page_mkwrite(). This is because during fiemap we are locking 2501 - * an extent range in order to prevent races with delalloc flushing and 2502 - * ordered extent completion, which is needed in order to reliably detect 2503 - * delalloc in holes and prealloc extents. And this can lead to a deadlock 2504 - * if the fiemap buffer is memory mapped to the file we are running fiemap 2505 - * against (a silly, useless in practice scenario, but possible) because 2506 - * btrfs_page_mkwrite() will try to lock the same extent range. 2507 - */ 2508 - struct fiemap_cache { 2509 - /* An array of ready fiemap entries. */ 2510 - struct btrfs_fiemap_entry *entries; 2511 - /* Number of entries in the entries array. */ 2512 - int entries_size; 2513 - /* Index of the next entry in the entries array to write to. */ 2514 - int entries_pos; 2515 - /* 2516 - * Once the entries array is full, this indicates what's the offset for 2517 - * the next file extent item we must search for in the inode's subvolume 2518 - * tree after unlocking the extent range in the inode's io tree and 2519 - * releasing the search path. 2520 - */ 2521 - u64 next_search_offset; 2522 - /* 2523 - * This matches struct fiemap_extent_info::fi_mapped_extents, we use it 2524 - * to count ourselves emitted extents and stop instead of relying on 2525 - * fiemap_fill_next_extent() because we buffer ready fiemap entries at 2526 - * the @entries array, and we want to stop as soon as we hit the max 2527 - * amount of extents to map, not just to save time but also to make the 2528 - * logic at extent_fiemap() simpler. 2529 - */ 2530 - unsigned int extents_mapped; 2531 - /* Fields for the cached extent (unsubmitted, not ready, extent). */ 2532 - u64 offset; 2533 - u64 phys; 2534 - u64 len; 2535 - u32 flags; 2536 - bool cached; 2537 - }; 2538 - 2539 - static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo, 2540 - struct fiemap_cache *cache) 2541 - { 2542 - for (int i = 0; i < cache->entries_pos; i++) { 2543 - struct btrfs_fiemap_entry *entry = &cache->entries[i]; 2544 - int ret; 2545 - 2546 - ret = fiemap_fill_next_extent(fieinfo, entry->offset, 2547 - entry->phys, entry->len, 2548 - entry->flags); 2549 - /* 2550 - * Ignore 1 (reached max entries) because we keep track of that 2551 - * ourselves in emit_fiemap_extent(). 2552 - */ 2553 - if (ret < 0) 2554 - return ret; 2555 - } 2556 - cache->entries_pos = 0; 2557 - 2558 - return 0; 2559 - } 2560 - 2561 - /* 2562 - * Helper to submit fiemap extent. 2563 - * 2564 - * Will try to merge current fiemap extent specified by @offset, @phys, 2565 - * @len and @flags with cached one. 2566 - * And only when we fails to merge, cached one will be submitted as 2567 - * fiemap extent. 2568 - * 2569 - * Return value is the same as fiemap_fill_next_extent(). 2570 - */ 2571 - static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, 2572 - struct fiemap_cache *cache, 2573 - u64 offset, u64 phys, u64 len, u32 flags) 2574 - { 2575 - struct btrfs_fiemap_entry *entry; 2576 - u64 cache_end; 2577 - 2578 - /* Set at the end of extent_fiemap(). */ 2579 - ASSERT((flags & FIEMAP_EXTENT_LAST) == 0); 2580 - 2581 - if (!cache->cached) 2582 - goto assign; 2583 - 2584 - /* 2585 - * When iterating the extents of the inode, at extent_fiemap(), we may 2586 - * find an extent that starts at an offset behind the end offset of the 2587 - * previous extent we processed. This happens if fiemap is called 2588 - * without FIEMAP_FLAG_SYNC and there are ordered extents completing 2589 - * after we had to unlock the file range, release the search path, emit 2590 - * the fiemap extents stored in the buffer (cache->entries array) and 2591 - * the lock the remainder of the range and re-search the btree. 2592 - * 2593 - * For example we are in leaf X processing its last item, which is the 2594 - * file extent item for file range [512K, 1M[, and after 2595 - * btrfs_next_leaf() releases the path, there's an ordered extent that 2596 - * completes for the file range [768K, 2M[, and that results in trimming 2597 - * the file extent item so that it now corresponds to the file range 2598 - * [512K, 768K[ and a new file extent item is inserted for the file 2599 - * range [768K, 2M[, which may end up as the last item of leaf X or as 2600 - * the first item of the next leaf - in either case btrfs_next_leaf() 2601 - * will leave us with a path pointing to the new extent item, for the 2602 - * file range [768K, 2M[, since that's the first key that follows the 2603 - * last one we processed. So in order not to report overlapping extents 2604 - * to user space, we trim the length of the previously cached extent and 2605 - * emit it. 2606 - * 2607 - * Upon calling btrfs_next_leaf() we may also find an extent with an 2608 - * offset smaller than or equals to cache->offset, and this happens 2609 - * when we had a hole or prealloc extent with several delalloc ranges in 2610 - * it, but after btrfs_next_leaf() released the path, delalloc was 2611 - * flushed and the resulting ordered extents were completed, so we can 2612 - * now have found a file extent item for an offset that is smaller than 2613 - * or equals to what we have in cache->offset. We deal with this as 2614 - * described below. 2615 - */ 2616 - cache_end = cache->offset + cache->len; 2617 - if (cache_end > offset) { 2618 - if (offset == cache->offset) { 2619 - /* 2620 - * We cached a dealloc range (found in the io tree) for 2621 - * a hole or prealloc extent and we have now found a 2622 - * file extent item for the same offset. What we have 2623 - * now is more recent and up to date, so discard what 2624 - * we had in the cache and use what we have just found. 2625 - */ 2626 - goto assign; 2627 - } else if (offset > cache->offset) { 2628 - /* 2629 - * The extent range we previously found ends after the 2630 - * offset of the file extent item we found and that 2631 - * offset falls somewhere in the middle of that previous 2632 - * extent range. So adjust the range we previously found 2633 - * to end at the offset of the file extent item we have 2634 - * just found, since this extent is more up to date. 2635 - * Emit that adjusted range and cache the file extent 2636 - * item we have just found. This corresponds to the case 2637 - * where a previously found file extent item was split 2638 - * due to an ordered extent completing. 2639 - */ 2640 - cache->len = offset - cache->offset; 2641 - goto emit; 2642 - } else { 2643 - const u64 range_end = offset + len; 2644 - 2645 - /* 2646 - * The offset of the file extent item we have just found 2647 - * is behind the cached offset. This means we were 2648 - * processing a hole or prealloc extent for which we 2649 - * have found delalloc ranges (in the io tree), so what 2650 - * we have in the cache is the last delalloc range we 2651 - * found while the file extent item we found can be 2652 - * either for a whole delalloc range we previously 2653 - * emmitted or only a part of that range. 2654 - * 2655 - * We have two cases here: 2656 - * 2657 - * 1) The file extent item's range ends at or behind the 2658 - * cached extent's end. In this case just ignore the 2659 - * current file extent item because we don't want to 2660 - * overlap with previous ranges that may have been 2661 - * emmitted already; 2662 - * 2663 - * 2) The file extent item starts behind the currently 2664 - * cached extent but its end offset goes beyond the 2665 - * end offset of the cached extent. We don't want to 2666 - * overlap with a previous range that may have been 2667 - * emmitted already, so we emit the currently cached 2668 - * extent and then partially store the current file 2669 - * extent item's range in the cache, for the subrange 2670 - * going the cached extent's end to the end of the 2671 - * file extent item. 2672 - */ 2673 - if (range_end <= cache_end) 2674 - return 0; 2675 - 2676 - if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) 2677 - phys += cache_end - offset; 2678 - 2679 - offset = cache_end; 2680 - len = range_end - cache_end; 2681 - goto emit; 2682 - } 2683 - } 2684 - 2685 - /* 2686 - * Only merges fiemap extents if 2687 - * 1) Their logical addresses are continuous 2688 - * 2689 - * 2) Their physical addresses are continuous 2690 - * So truly compressed (physical size smaller than logical size) 2691 - * extents won't get merged with each other 2692 - * 2693 - * 3) Share same flags 2694 - */ 2695 - if (cache->offset + cache->len == offset && 2696 - cache->phys + cache->len == phys && 2697 - cache->flags == flags) { 2698 - cache->len += len; 2699 - return 0; 2700 - } 2701 - 2702 - emit: 2703 - /* Not mergeable, need to submit cached one */ 2704 - 2705 - if (cache->entries_pos == cache->entries_size) { 2706 - /* 2707 - * We will need to research for the end offset of the last 2708 - * stored extent and not from the current offset, because after 2709 - * unlocking the range and releasing the path, if there's a hole 2710 - * between that end offset and this current offset, a new extent 2711 - * may have been inserted due to a new write, so we don't want 2712 - * to miss it. 2713 - */ 2714 - entry = &cache->entries[cache->entries_size - 1]; 2715 - cache->next_search_offset = entry->offset + entry->len; 2716 - cache->cached = false; 2717 - 2718 - return BTRFS_FIEMAP_FLUSH_CACHE; 2719 - } 2720 - 2721 - entry = &cache->entries[cache->entries_pos]; 2722 - entry->offset = cache->offset; 2723 - entry->phys = cache->phys; 2724 - entry->len = cache->len; 2725 - entry->flags = cache->flags; 2726 - cache->entries_pos++; 2727 - cache->extents_mapped++; 2728 - 2729 - if (cache->extents_mapped == fieinfo->fi_extents_max) { 2730 - cache->cached = false; 2731 - return 1; 2732 - } 2733 - assign: 2734 - cache->cached = true; 2735 - cache->offset = offset; 2736 - cache->phys = phys; 2737 - cache->len = len; 2738 - cache->flags = flags; 2739 - 2740 - return 0; 2741 - } 2742 - 2743 - /* 2744 - * Emit last fiemap cache 2745 - * 2746 - * The last fiemap cache may still be cached in the following case: 2747 - * 0 4k 8k 2748 - * |<- Fiemap range ->| 2749 - * |<------------ First extent ----------->| 2750 - * 2751 - * In this case, the first extent range will be cached but not emitted. 2752 - * So we must emit it before ending extent_fiemap(). 2753 - */ 2754 - static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo, 2755 - struct fiemap_cache *cache) 2756 - { 2757 - int ret; 2758 - 2759 - if (!cache->cached) 2760 - return 0; 2761 - 2762 - ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 2763 - cache->len, cache->flags); 2764 - cache->cached = false; 2765 - if (ret > 0) 2766 - ret = 0; 2767 - return ret; 2768 - } 2769 - 2770 - static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path) 2771 - { 2772 - struct extent_buffer *clone = path->nodes[0]; 2773 - struct btrfs_key key; 2774 - int slot; 2775 - int ret; 2776 - 2777 - path->slots[0]++; 2778 - if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) 2779 - return 0; 2780 - 2781 - /* 2782 - * Add a temporary extra ref to an already cloned extent buffer to 2783 - * prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid 2784 - * the cost of allocating a new one. 2785 - */ 2786 - ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags)); 2787 - atomic_inc(&clone->refs); 2788 - 2789 - ret = btrfs_next_leaf(inode->root, path); 2790 - if (ret != 0) 2791 - goto out; 2792 - 2793 - /* 2794 - * Don't bother with cloning if there are no more file extent items for 2795 - * our inode. 2796 - */ 2797 - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2798 - if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) { 2799 - ret = 1; 2800 - goto out; 2801 - } 2802 - 2803 - /* 2804 - * Important to preserve the start field, for the optimizations when 2805 - * checking if extents are shared (see extent_fiemap()). 2806 - * 2807 - * We must set ->start before calling copy_extent_buffer_full(). If we 2808 - * are on sub-pagesize blocksize, we use ->start to determine the offset 2809 - * into the folio where our eb exists, and if we update ->start after 2810 - * the fact then any subsequent reads of the eb may read from a 2811 - * different offset in the folio than where we originally copied into. 2812 - */ 2813 - clone->start = path->nodes[0]->start; 2814 - /* See the comment at fiemap_search_slot() about why we clone. */ 2815 - copy_extent_buffer_full(clone, path->nodes[0]); 2816 - 2817 - slot = path->slots[0]; 2818 - btrfs_release_path(path); 2819 - path->nodes[0] = clone; 2820 - path->slots[0] = slot; 2821 - out: 2822 - if (ret) 2823 - free_extent_buffer(clone); 2824 - 2825 - return ret; 2826 - } 2827 - 2828 - /* 2829 - * Search for the first file extent item that starts at a given file offset or 2830 - * the one that starts immediately before that offset. 2831 - * Returns: 0 on success, < 0 on error, 1 if not found. 2832 - */ 2833 - static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path, 2834 - u64 file_offset) 2835 - { 2836 - const u64 ino = btrfs_ino(inode); 2837 - struct btrfs_root *root = inode->root; 2838 - struct extent_buffer *clone; 2839 - struct btrfs_key key; 2840 - int slot; 2841 - int ret; 2842 - 2843 - key.objectid = ino; 2844 - key.type = BTRFS_EXTENT_DATA_KEY; 2845 - key.offset = file_offset; 2846 - 2847 - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2848 - if (ret < 0) 2849 - return ret; 2850 - 2851 - if (ret > 0 && path->slots[0] > 0) { 2852 - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); 2853 - if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) 2854 - path->slots[0]--; 2855 - } 2856 - 2857 - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2858 - ret = btrfs_next_leaf(root, path); 2859 - if (ret != 0) 2860 - return ret; 2861 - 2862 - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2863 - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) 2864 - return 1; 2865 - } 2866 - 2867 - /* 2868 - * We clone the leaf and use it during fiemap. This is because while 2869 - * using the leaf we do expensive things like checking if an extent is 2870 - * shared, which can take a long time. In order to prevent blocking 2871 - * other tasks for too long, we use a clone of the leaf. We have locked 2872 - * the file range in the inode's io tree, so we know none of our file 2873 - * extent items can change. This way we avoid blocking other tasks that 2874 - * want to insert items for other inodes in the same leaf or b+tree 2875 - * rebalance operations (triggered for example when someone is trying 2876 - * to push items into this leaf when trying to insert an item in a 2877 - * neighbour leaf). 2878 - * We also need the private clone because holding a read lock on an 2879 - * extent buffer of the subvolume's b+tree will make lockdep unhappy 2880 - * when we check if extents are shared, as backref walking may need to 2881 - * lock the same leaf we are processing. 2882 - */ 2883 - clone = btrfs_clone_extent_buffer(path->nodes[0]); 2884 - if (!clone) 2885 - return -ENOMEM; 2886 - 2887 - slot = path->slots[0]; 2888 - btrfs_release_path(path); 2889 - path->nodes[0] = clone; 2890 - path->slots[0] = slot; 2891 - 2892 - return 0; 2893 - } 2894 - 2895 - /* 2896 - * Process a range which is a hole or a prealloc extent in the inode's subvolume 2897 - * btree. If @disk_bytenr is 0, we are dealing with a hole, otherwise a prealloc 2898 - * extent. The end offset (@end) is inclusive. 2899 - */ 2900 - static int fiemap_process_hole(struct btrfs_inode *inode, 2901 - struct fiemap_extent_info *fieinfo, 2902 - struct fiemap_cache *cache, 2903 - struct extent_state **delalloc_cached_state, 2904 - struct btrfs_backref_share_check_ctx *backref_ctx, 2905 - u64 disk_bytenr, u64 extent_offset, 2906 - u64 extent_gen, 2907 - u64 start, u64 end) 2908 - { 2909 - const u64 i_size = i_size_read(&inode->vfs_inode); 2910 - u64 cur_offset = start; 2911 - u64 last_delalloc_end = 0; 2912 - u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN; 2913 - bool checked_extent_shared = false; 2914 - int ret; 2915 - 2916 - /* 2917 - * There can be no delalloc past i_size, so don't waste time looking for 2918 - * it beyond i_size. 2919 - */ 2920 - while (cur_offset < end && cur_offset < i_size) { 2921 - u64 delalloc_start; 2922 - u64 delalloc_end; 2923 - u64 prealloc_start; 2924 - u64 prealloc_len = 0; 2925 - bool delalloc; 2926 - 2927 - delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, 2928 - delalloc_cached_state, 2929 - &delalloc_start, 2930 - &delalloc_end); 2931 - if (!delalloc) 2932 - break; 2933 - 2934 - /* 2935 - * If this is a prealloc extent we have to report every section 2936 - * of it that has no delalloc. 2937 - */ 2938 - if (disk_bytenr != 0) { 2939 - if (last_delalloc_end == 0) { 2940 - prealloc_start = start; 2941 - prealloc_len = delalloc_start - start; 2942 - } else { 2943 - prealloc_start = last_delalloc_end + 1; 2944 - prealloc_len = delalloc_start - prealloc_start; 2945 - } 2946 - } 2947 - 2948 - if (prealloc_len > 0) { 2949 - if (!checked_extent_shared && fieinfo->fi_extents_max) { 2950 - ret = btrfs_is_data_extent_shared(inode, 2951 - disk_bytenr, 2952 - extent_gen, 2953 - backref_ctx); 2954 - if (ret < 0) 2955 - return ret; 2956 - else if (ret > 0) 2957 - prealloc_flags |= FIEMAP_EXTENT_SHARED; 2958 - 2959 - checked_extent_shared = true; 2960 - } 2961 - ret = emit_fiemap_extent(fieinfo, cache, prealloc_start, 2962 - disk_bytenr + extent_offset, 2963 - prealloc_len, prealloc_flags); 2964 - if (ret) 2965 - return ret; 2966 - extent_offset += prealloc_len; 2967 - } 2968 - 2969 - ret = emit_fiemap_extent(fieinfo, cache, delalloc_start, 0, 2970 - delalloc_end + 1 - delalloc_start, 2971 - FIEMAP_EXTENT_DELALLOC | 2972 - FIEMAP_EXTENT_UNKNOWN); 2973 - if (ret) 2974 - return ret; 2975 - 2976 - last_delalloc_end = delalloc_end; 2977 - cur_offset = delalloc_end + 1; 2978 - extent_offset += cur_offset - delalloc_start; 2979 - cond_resched(); 2980 - } 2981 - 2982 - /* 2983 - * Either we found no delalloc for the whole prealloc extent or we have 2984 - * a prealloc extent that spans i_size or starts at or after i_size. 2985 - */ 2986 - if (disk_bytenr != 0 && last_delalloc_end < end) { 2987 - u64 prealloc_start; 2988 - u64 prealloc_len; 2989 - 2990 - if (last_delalloc_end == 0) { 2991 - prealloc_start = start; 2992 - prealloc_len = end + 1 - start; 2993 - } else { 2994 - prealloc_start = last_delalloc_end + 1; 2995 - prealloc_len = end + 1 - prealloc_start; 2996 - } 2997 - 2998 - if (!checked_extent_shared && fieinfo->fi_extents_max) { 2999 - ret = btrfs_is_data_extent_shared(inode, 3000 - disk_bytenr, 3001 - extent_gen, 3002 - backref_ctx); 3003 - if (ret < 0) 3004 - return ret; 3005 - else if (ret > 0) 3006 - prealloc_flags |= FIEMAP_EXTENT_SHARED; 3007 - } 3008 - ret = emit_fiemap_extent(fieinfo, cache, prealloc_start, 3009 - disk_bytenr + extent_offset, 3010 - prealloc_len, prealloc_flags); 3011 - if (ret) 3012 - return ret; 3013 - } 3014 - 3015 - return 0; 3016 - } 3017 - 3018 - static int fiemap_find_last_extent_offset(struct btrfs_inode *inode, 3019 - struct btrfs_path *path, 3020 - u64 *last_extent_end_ret) 3021 - { 3022 - const u64 ino = btrfs_ino(inode); 3023 - struct btrfs_root *root = inode->root; 3024 - struct extent_buffer *leaf; 3025 - struct btrfs_file_extent_item *ei; 3026 - struct btrfs_key key; 3027 - u64 disk_bytenr; 3028 - int ret; 3029 - 3030 - /* 3031 - * Lookup the last file extent. We're not using i_size here because 3032 - * there might be preallocation past i_size. 3033 - */ 3034 - ret = btrfs_lookup_file_extent(NULL, root, path, ino, (u64)-1, 0); 3035 - /* There can't be a file extent item at offset (u64)-1 */ 3036 - ASSERT(ret != 0); 3037 - if (ret < 0) 3038 - return ret; 3039 - 3040 - /* 3041 - * For a non-existing key, btrfs_search_slot() always leaves us at a 3042 - * slot > 0, except if the btree is empty, which is impossible because 3043 - * at least it has the inode item for this inode and all the items for 3044 - * the root inode 256. 3045 - */ 3046 - ASSERT(path->slots[0] > 0); 3047 - path->slots[0]--; 3048 - leaf = path->nodes[0]; 3049 - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3050 - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { 3051 - /* No file extent items in the subvolume tree. */ 3052 - *last_extent_end_ret = 0; 3053 - return 0; 3054 - } 3055 - 3056 - /* 3057 - * For an inline extent, the disk_bytenr is where inline data starts at, 3058 - * so first check if we have an inline extent item before checking if we 3059 - * have an implicit hole (disk_bytenr == 0). 3060 - */ 3061 - ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); 3062 - if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) { 3063 - *last_extent_end_ret = btrfs_file_extent_end(path); 3064 - return 0; 3065 - } 3066 - 3067 - /* 3068 - * Find the last file extent item that is not a hole (when NO_HOLES is 3069 - * not enabled). This should take at most 2 iterations in the worst 3070 - * case: we have one hole file extent item at slot 0 of a leaf and 3071 - * another hole file extent item as the last item in the previous leaf. 3072 - * This is because we merge file extent items that represent holes. 3073 - */ 3074 - disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); 3075 - while (disk_bytenr == 0) { 3076 - ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); 3077 - if (ret < 0) { 3078 - return ret; 3079 - } else if (ret > 0) { 3080 - /* No file extent items that are not holes. */ 3081 - *last_extent_end_ret = 0; 3082 - return 0; 3083 - } 3084 - leaf = path->nodes[0]; 3085 - ei = btrfs_item_ptr(leaf, path->slots[0], 3086 - struct btrfs_file_extent_item); 3087 - disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); 3088 - } 3089 - 3090 - *last_extent_end_ret = btrfs_file_extent_end(path); 3091 - return 0; 3092 - } 3093 - 3094 - int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, 3095 - u64 start, u64 len) 3096 - { 3097 - const u64 ino = btrfs_ino(inode); 3098 - struct extent_state *cached_state = NULL; 3099 - struct extent_state *delalloc_cached_state = NULL; 3100 - struct btrfs_path *path; 3101 - struct fiemap_cache cache = { 0 }; 3102 - struct btrfs_backref_share_check_ctx *backref_ctx; 3103 - u64 last_extent_end; 3104 - u64 prev_extent_end; 3105 - u64 range_start; 3106 - u64 range_end; 3107 - const u64 sectorsize = inode->root->fs_info->sectorsize; 3108 - bool stopped = false; 3109 - int ret; 3110 - 3111 - cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry); 3112 - cache.entries = kmalloc_array(cache.entries_size, 3113 - sizeof(struct btrfs_fiemap_entry), 3114 - GFP_KERNEL); 3115 - backref_ctx = btrfs_alloc_backref_share_check_ctx(); 3116 - path = btrfs_alloc_path(); 3117 - if (!cache.entries || !backref_ctx || !path) { 3118 - ret = -ENOMEM; 3119 - goto out; 3120 - } 3121 - 3122 - restart: 3123 - range_start = round_down(start, sectorsize); 3124 - range_end = round_up(start + len, sectorsize); 3125 - prev_extent_end = range_start; 3126 - 3127 - lock_extent(&inode->io_tree, range_start, range_end, &cached_state); 3128 - 3129 - ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); 3130 - if (ret < 0) 3131 - goto out_unlock; 3132 - btrfs_release_path(path); 3133 - 3134 - path->reada = READA_FORWARD; 3135 - ret = fiemap_search_slot(inode, path, range_start); 3136 - if (ret < 0) { 3137 - goto out_unlock; 3138 - } else if (ret > 0) { 3139 - /* 3140 - * No file extent item found, but we may have delalloc between 3141 - * the current offset and i_size. So check for that. 3142 - */ 3143 - ret = 0; 3144 - goto check_eof_delalloc; 3145 - } 3146 - 3147 - while (prev_extent_end < range_end) { 3148 - struct extent_buffer *leaf = path->nodes[0]; 3149 - struct btrfs_file_extent_item *ei; 3150 - struct btrfs_key key; 3151 - u64 extent_end; 3152 - u64 extent_len; 3153 - u64 extent_offset = 0; 3154 - u64 extent_gen; 3155 - u64 disk_bytenr = 0; 3156 - u64 flags = 0; 3157 - int extent_type; 3158 - u8 compression; 3159 - 3160 - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3161 - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) 3162 - break; 3163 - 3164 - extent_end = btrfs_file_extent_end(path); 3165 - 3166 - /* 3167 - * The first iteration can leave us at an extent item that ends 3168 - * before our range's start. Move to the next item. 3169 - */ 3170 - if (extent_end <= range_start) 3171 - goto next_item; 3172 - 3173 - backref_ctx->curr_leaf_bytenr = leaf->start; 3174 - 3175 - /* We have in implicit hole (NO_HOLES feature enabled). */ 3176 - if (prev_extent_end < key.offset) { 3177 - const u64 hole_end = min(key.offset, range_end) - 1; 3178 - 3179 - ret = fiemap_process_hole(inode, fieinfo, &cache, 3180 - &delalloc_cached_state, 3181 - backref_ctx, 0, 0, 0, 3182 - prev_extent_end, hole_end); 3183 - if (ret < 0) { 3184 - goto out_unlock; 3185 - } else if (ret > 0) { 3186 - /* fiemap_fill_next_extent() told us to stop. */ 3187 - stopped = true; 3188 - break; 3189 - } 3190 - 3191 - /* We've reached the end of the fiemap range, stop. */ 3192 - if (key.offset >= range_end) { 3193 - stopped = true; 3194 - break; 3195 - } 3196 - } 3197 - 3198 - extent_len = extent_end - key.offset; 3199 - ei = btrfs_item_ptr(leaf, path->slots[0], 3200 - struct btrfs_file_extent_item); 3201 - compression = btrfs_file_extent_compression(leaf, ei); 3202 - extent_type = btrfs_file_extent_type(leaf, ei); 3203 - extent_gen = btrfs_file_extent_generation(leaf, ei); 3204 - 3205 - if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 3206 - disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); 3207 - if (compression == BTRFS_COMPRESS_NONE) 3208 - extent_offset = btrfs_file_extent_offset(leaf, ei); 3209 - } 3210 - 3211 - if (compression != BTRFS_COMPRESS_NONE) 3212 - flags |= FIEMAP_EXTENT_ENCODED; 3213 - 3214 - if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 3215 - flags |= FIEMAP_EXTENT_DATA_INLINE; 3216 - flags |= FIEMAP_EXTENT_NOT_ALIGNED; 3217 - ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 0, 3218 - extent_len, flags); 3219 - } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 3220 - ret = fiemap_process_hole(inode, fieinfo, &cache, 3221 - &delalloc_cached_state, 3222 - backref_ctx, 3223 - disk_bytenr, extent_offset, 3224 - extent_gen, key.offset, 3225 - extent_end - 1); 3226 - } else if (disk_bytenr == 0) { 3227 - /* We have an explicit hole. */ 3228 - ret = fiemap_process_hole(inode, fieinfo, &cache, 3229 - &delalloc_cached_state, 3230 - backref_ctx, 0, 0, 0, 3231 - key.offset, extent_end - 1); 3232 - } else { 3233 - /* We have a regular extent. */ 3234 - if (fieinfo->fi_extents_max) { 3235 - ret = btrfs_is_data_extent_shared(inode, 3236 - disk_bytenr, 3237 - extent_gen, 3238 - backref_ctx); 3239 - if (ret < 0) 3240 - goto out_unlock; 3241 - else if (ret > 0) 3242 - flags |= FIEMAP_EXTENT_SHARED; 3243 - } 3244 - 3245 - ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 3246 - disk_bytenr + extent_offset, 3247 - extent_len, flags); 3248 - } 3249 - 3250 - if (ret < 0) { 3251 - goto out_unlock; 3252 - } else if (ret > 0) { 3253 - /* emit_fiemap_extent() told us to stop. */ 3254 - stopped = true; 3255 - break; 3256 - } 3257 - 3258 - prev_extent_end = extent_end; 3259 - next_item: 3260 - if (fatal_signal_pending(current)) { 3261 - ret = -EINTR; 3262 - goto out_unlock; 3263 - } 3264 - 3265 - ret = fiemap_next_leaf_item(inode, path); 3266 - if (ret < 0) { 3267 - goto out_unlock; 3268 - } else if (ret > 0) { 3269 - /* No more file extent items for this inode. */ 3270 - break; 3271 - } 3272 - cond_resched(); 3273 - } 3274 - 3275 - check_eof_delalloc: 3276 - if (!stopped && prev_extent_end < range_end) { 3277 - ret = fiemap_process_hole(inode, fieinfo, &cache, 3278 - &delalloc_cached_state, backref_ctx, 3279 - 0, 0, 0, prev_extent_end, range_end - 1); 3280 - if (ret < 0) 3281 - goto out_unlock; 3282 - prev_extent_end = range_end; 3283 - } 3284 - 3285 - if (cache.cached && cache.offset + cache.len >= last_extent_end) { 3286 - const u64 i_size = i_size_read(&inode->vfs_inode); 3287 - 3288 - if (prev_extent_end < i_size) { 3289 - u64 delalloc_start; 3290 - u64 delalloc_end; 3291 - bool delalloc; 3292 - 3293 - delalloc = btrfs_find_delalloc_in_range(inode, 3294 - prev_extent_end, 3295 - i_size - 1, 3296 - &delalloc_cached_state, 3297 - &delalloc_start, 3298 - &delalloc_end); 3299 - if (!delalloc) 3300 - cache.flags |= FIEMAP_EXTENT_LAST; 3301 - } else { 3302 - cache.flags |= FIEMAP_EXTENT_LAST; 3303 - } 3304 - } 3305 - 3306 - out_unlock: 3307 - unlock_extent(&inode->io_tree, range_start, range_end, &cached_state); 3308 - 3309 - if (ret == BTRFS_FIEMAP_FLUSH_CACHE) { 3310 - btrfs_release_path(path); 3311 - ret = flush_fiemap_cache(fieinfo, &cache); 3312 - if (ret) 3313 - goto out; 3314 - len -= cache.next_search_offset - start; 3315 - start = cache.next_search_offset; 3316 - goto restart; 3317 - } else if (ret < 0) { 3318 - goto out; 3319 - } 3320 - 3321 - /* 3322 - * Must free the path before emitting to the fiemap buffer because we 3323 - * may have a non-cloned leaf and if the fiemap buffer is memory mapped 3324 - * to a file, a write into it (through btrfs_page_mkwrite()) may trigger 3325 - * waiting for an ordered extent that in order to complete needs to 3326 - * modify that leaf, therefore leading to a deadlock. 3327 - */ 3328 - btrfs_free_path(path); 3329 - path = NULL; 3330 - 3331 - ret = flush_fiemap_cache(fieinfo, &cache); 3332 - if (ret) 3333 - goto out; 3334 - 3335 - ret = emit_last_fiemap_cache(fieinfo, &cache); 3336 - out: 3337 - free_extent_state(delalloc_cached_state); 3338 - kfree(cache.entries); 3339 - btrfs_free_backref_share_ctx(backref_ctx); 3340 - btrfs_free_path(path); 3341 - return ret; 3342 - } 3343 - 3344 2473 static void __free_extent_buffer(struct extent_buffer *eb) 3345 2474 { 3346 2475 kmem_cache_free(extent_buffer_cache, eb);

-2

fs/btrfs/extent_io.h

··· 242 242 int btree_write_cache_pages(struct address_space *mapping, 243 243 struct writeback_control *wbc); 244 244 void btrfs_readahead(struct readahead_control *rac); 245 - int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, 246 - u64 start, u64 len); 247 245 int set_folio_extent_mapped(struct folio *folio); 248 246 int set_page_extent_mapped(struct page *page); 249 247 void clear_page_extent_mapped(struct page *page);

+930

fs/btrfs/fiemap.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "backref.h" 4 + #include "btrfs_inode.h" 5 + #include "fiemap.h" 6 + #include "file.h" 7 + #include "file-item.h" 8 + 9 + struct btrfs_fiemap_entry { 10 + u64 offset; 11 + u64 phys; 12 + u64 len; 13 + u32 flags; 14 + }; 15 + 16 + /* 17 + * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file 18 + * range from the inode's io tree, unlock the subvolume tree search path, flush 19 + * the fiemap cache and relock the file range and research the subvolume tree. 20 + * The value here is something negative that can't be confused with a valid 21 + * errno value and different from 1 because that's also a return value from 22 + * fiemap_fill_next_extent() and also it's often used to mean some btree search 23 + * did not find a key, so make it some distinct negative value. 24 + */ 25 + #define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1)) 26 + 27 + /* 28 + * Used to: 29 + * 30 + * - Cache the next entry to be emitted to the fiemap buffer, so that we can 31 + * merge extents that are contiguous and can be grouped as a single one; 32 + * 33 + * - Store extents ready to be written to the fiemap buffer in an intermediary 34 + * buffer. This intermediary buffer is to ensure that in case the fiemap 35 + * buffer is memory mapped to the fiemap target file, we don't deadlock 36 + * during btrfs_page_mkwrite(). This is because during fiemap we are locking 37 + * an extent range in order to prevent races with delalloc flushing and 38 + * ordered extent completion, which is needed in order to reliably detect 39 + * delalloc in holes and prealloc extents. And this can lead to a deadlock 40 + * if the fiemap buffer is memory mapped to the file we are running fiemap 41 + * against (a silly, useless in practice scenario, but possible) because 42 + * btrfs_page_mkwrite() will try to lock the same extent range. 43 + */ 44 + struct fiemap_cache { 45 + /* An array of ready fiemap entries. */ 46 + struct btrfs_fiemap_entry *entries; 47 + /* Number of entries in the entries array. */ 48 + int entries_size; 49 + /* Index of the next entry in the entries array to write to. */ 50 + int entries_pos; 51 + /* 52 + * Once the entries array is full, this indicates what's the offset for 53 + * the next file extent item we must search for in the inode's subvolume 54 + * tree after unlocking the extent range in the inode's io tree and 55 + * releasing the search path. 56 + */ 57 + u64 next_search_offset; 58 + /* 59 + * This matches struct fiemap_extent_info::fi_mapped_extents, we use it 60 + * to count ourselves emitted extents and stop instead of relying on 61 + * fiemap_fill_next_extent() because we buffer ready fiemap entries at 62 + * the @entries array, and we want to stop as soon as we hit the max 63 + * amount of extents to map, not just to save time but also to make the 64 + * logic at extent_fiemap() simpler. 65 + */ 66 + unsigned int extents_mapped; 67 + /* Fields for the cached extent (unsubmitted, not ready, extent). */ 68 + u64 offset; 69 + u64 phys; 70 + u64 len; 71 + u32 flags; 72 + bool cached; 73 + }; 74 + 75 + static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo, 76 + struct fiemap_cache *cache) 77 + { 78 + for (int i = 0; i < cache->entries_pos; i++) { 79 + struct btrfs_fiemap_entry *entry = &cache->entries[i]; 80 + int ret; 81 + 82 + ret = fiemap_fill_next_extent(fieinfo, entry->offset, 83 + entry->phys, entry->len, 84 + entry->flags); 85 + /* 86 + * Ignore 1 (reached max entries) because we keep track of that 87 + * ourselves in emit_fiemap_extent(). 88 + */ 89 + if (ret < 0) 90 + return ret; 91 + } 92 + cache->entries_pos = 0; 93 + 94 + return 0; 95 + } 96 + 97 + /* 98 + * Helper to submit fiemap extent. 99 + * 100 + * Will try to merge current fiemap extent specified by @offset, @phys, 101 + * @len and @flags with cached one. 102 + * And only when we fails to merge, cached one will be submitted as 103 + * fiemap extent. 104 + * 105 + * Return value is the same as fiemap_fill_next_extent(). 106 + */ 107 + static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, 108 + struct fiemap_cache *cache, 109 + u64 offset, u64 phys, u64 len, u32 flags) 110 + { 111 + struct btrfs_fiemap_entry *entry; 112 + u64 cache_end; 113 + 114 + /* Set at the end of extent_fiemap(). */ 115 + ASSERT((flags & FIEMAP_EXTENT_LAST) == 0); 116 + 117 + if (!cache->cached) 118 + goto assign; 119 + 120 + /* 121 + * When iterating the extents of the inode, at extent_fiemap(), we may 122 + * find an extent that starts at an offset behind the end offset of the 123 + * previous extent we processed. This happens if fiemap is called 124 + * without FIEMAP_FLAG_SYNC and there are ordered extents completing 125 + * after we had to unlock the file range, release the search path, emit 126 + * the fiemap extents stored in the buffer (cache->entries array) and 127 + * the lock the remainder of the range and re-search the btree. 128 + * 129 + * For example we are in leaf X processing its last item, which is the 130 + * file extent item for file range [512K, 1M[, and after 131 + * btrfs_next_leaf() releases the path, there's an ordered extent that 132 + * completes for the file range [768K, 2M[, and that results in trimming 133 + * the file extent item so that it now corresponds to the file range 134 + * [512K, 768K[ and a new file extent item is inserted for the file 135 + * range [768K, 2M[, which may end up as the last item of leaf X or as 136 + * the first item of the next leaf - in either case btrfs_next_leaf() 137 + * will leave us with a path pointing to the new extent item, for the 138 + * file range [768K, 2M[, since that's the first key that follows the 139 + * last one we processed. So in order not to report overlapping extents 140 + * to user space, we trim the length of the previously cached extent and 141 + * emit it. 142 + * 143 + * Upon calling btrfs_next_leaf() we may also find an extent with an 144 + * offset smaller than or equals to cache->offset, and this happens 145 + * when we had a hole or prealloc extent with several delalloc ranges in 146 + * it, but after btrfs_next_leaf() released the path, delalloc was 147 + * flushed and the resulting ordered extents were completed, so we can 148 + * now have found a file extent item for an offset that is smaller than 149 + * or equals to what we have in cache->offset. We deal with this as 150 + * described below. 151 + */ 152 + cache_end = cache->offset + cache->len; 153 + if (cache_end > offset) { 154 + if (offset == cache->offset) { 155 + /* 156 + * We cached a dealloc range (found in the io tree) for 157 + * a hole or prealloc extent and we have now found a 158 + * file extent item for the same offset. What we have 159 + * now is more recent and up to date, so discard what 160 + * we had in the cache and use what we have just found. 161 + */ 162 + goto assign; 163 + } else if (offset > cache->offset) { 164 + /* 165 + * The extent range we previously found ends after the 166 + * offset of the file extent item we found and that 167 + * offset falls somewhere in the middle of that previous 168 + * extent range. So adjust the range we previously found 169 + * to end at the offset of the file extent item we have 170 + * just found, since this extent is more up to date. 171 + * Emit that adjusted range and cache the file extent 172 + * item we have just found. This corresponds to the case 173 + * where a previously found file extent item was split 174 + * due to an ordered extent completing. 175 + */ 176 + cache->len = offset - cache->offset; 177 + goto emit; 178 + } else { 179 + const u64 range_end = offset + len; 180 + 181 + /* 182 + * The offset of the file extent item we have just found 183 + * is behind the cached offset. This means we were 184 + * processing a hole or prealloc extent for which we 185 + * have found delalloc ranges (in the io tree), so what 186 + * we have in the cache is the last delalloc range we 187 + * found while the file extent item we found can be 188 + * either for a whole delalloc range we previously 189 + * emmitted or only a part of that range. 190 + * 191 + * We have two cases here: 192 + * 193 + * 1) The file extent item's range ends at or behind the 194 + * cached extent's end. In this case just ignore the 195 + * current file extent item because we don't want to 196 + * overlap with previous ranges that may have been 197 + * emmitted already; 198 + * 199 + * 2) The file extent item starts behind the currently 200 + * cached extent but its end offset goes beyond the 201 + * end offset of the cached extent. We don't want to 202 + * overlap with a previous range that may have been 203 + * emmitted already, so we emit the currently cached 204 + * extent and then partially store the current file 205 + * extent item's range in the cache, for the subrange 206 + * going the cached extent's end to the end of the 207 + * file extent item. 208 + */ 209 + if (range_end <= cache_end) 210 + return 0; 211 + 212 + if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) 213 + phys += cache_end - offset; 214 + 215 + offset = cache_end; 216 + len = range_end - cache_end; 217 + goto emit; 218 + } 219 + } 220 + 221 + /* 222 + * Only merges fiemap extents if 223 + * 1) Their logical addresses are continuous 224 + * 225 + * 2) Their physical addresses are continuous 226 + * So truly compressed (physical size smaller than logical size) 227 + * extents won't get merged with each other 228 + * 229 + * 3) Share same flags 230 + */ 231 + if (cache->offset + cache->len == offset && 232 + cache->phys + cache->len == phys && 233 + cache->flags == flags) { 234 + cache->len += len; 235 + return 0; 236 + } 237 + 238 + emit: 239 + /* Not mergeable, need to submit cached one */ 240 + 241 + if (cache->entries_pos == cache->entries_size) { 242 + /* 243 + * We will need to research for the end offset of the last 244 + * stored extent and not from the current offset, because after 245 + * unlocking the range and releasing the path, if there's a hole 246 + * between that end offset and this current offset, a new extent 247 + * may have been inserted due to a new write, so we don't want 248 + * to miss it. 249 + */ 250 + entry = &cache->entries[cache->entries_size - 1]; 251 + cache->next_search_offset = entry->offset + entry->len; 252 + cache->cached = false; 253 + 254 + return BTRFS_FIEMAP_FLUSH_CACHE; 255 + } 256 + 257 + entry = &cache->entries[cache->entries_pos]; 258 + entry->offset = cache->offset; 259 + entry->phys = cache->phys; 260 + entry->len = cache->len; 261 + entry->flags = cache->flags; 262 + cache->entries_pos++; 263 + cache->extents_mapped++; 264 + 265 + if (cache->extents_mapped == fieinfo->fi_extents_max) { 266 + cache->cached = false; 267 + return 1; 268 + } 269 + assign: 270 + cache->cached = true; 271 + cache->offset = offset; 272 + cache->phys = phys; 273 + cache->len = len; 274 + cache->flags = flags; 275 + 276 + return 0; 277 + } 278 + 279 + /* 280 + * Emit last fiemap cache 281 + * 282 + * The last fiemap cache may still be cached in the following case: 283 + * 0 4k 8k 284 + * |<- Fiemap range ->| 285 + * |<------------ First extent ----------->| 286 + * 287 + * In this case, the first extent range will be cached but not emitted. 288 + * So we must emit it before ending extent_fiemap(). 289 + */ 290 + static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo, 291 + struct fiemap_cache *cache) 292 + { 293 + int ret; 294 + 295 + if (!cache->cached) 296 + return 0; 297 + 298 + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 299 + cache->len, cache->flags); 300 + cache->cached = false; 301 + if (ret > 0) 302 + ret = 0; 303 + return ret; 304 + } 305 + 306 + static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path) 307 + { 308 + struct extent_buffer *clone = path->nodes[0]; 309 + struct btrfs_key key; 310 + int slot; 311 + int ret; 312 + 313 + path->slots[0]++; 314 + if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) 315 + return 0; 316 + 317 + /* 318 + * Add a temporary extra ref to an already cloned extent buffer to 319 + * prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid 320 + * the cost of allocating a new one. 321 + */ 322 + ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags)); 323 + atomic_inc(&clone->refs); 324 + 325 + ret = btrfs_next_leaf(inode->root, path); 326 + if (ret != 0) 327 + goto out; 328 + 329 + /* 330 + * Don't bother with cloning if there are no more file extent items for 331 + * our inode. 332 + */ 333 + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 334 + if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) { 335 + ret = 1; 336 + goto out; 337 + } 338 + 339 + /* 340 + * Important to preserve the start field, for the optimizations when 341 + * checking if extents are shared (see extent_fiemap()). 342 + * 343 + * We must set ->start before calling copy_extent_buffer_full(). If we 344 + * are on sub-pagesize blocksize, we use ->start to determine the offset 345 + * into the folio where our eb exists, and if we update ->start after 346 + * the fact then any subsequent reads of the eb may read from a 347 + * different offset in the folio than where we originally copied into. 348 + */ 349 + clone->start = path->nodes[0]->start; 350 + /* See the comment at fiemap_search_slot() about why we clone. */ 351 + copy_extent_buffer_full(clone, path->nodes[0]); 352 + 353 + slot = path->slots[0]; 354 + btrfs_release_path(path); 355 + path->nodes[0] = clone; 356 + path->slots[0] = slot; 357 + out: 358 + if (ret) 359 + free_extent_buffer(clone); 360 + 361 + return ret; 362 + } 363 + 364 + /* 365 + * Search for the first file extent item that starts at a given file offset or 366 + * the one that starts immediately before that offset. 367 + * Returns: 0 on success, < 0 on error, 1 if not found. 368 + */ 369 + static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path, 370 + u64 file_offset) 371 + { 372 + const u64 ino = btrfs_ino(inode); 373 + struct btrfs_root *root = inode->root; 374 + struct extent_buffer *clone; 375 + struct btrfs_key key; 376 + int slot; 377 + int ret; 378 + 379 + key.objectid = ino; 380 + key.type = BTRFS_EXTENT_DATA_KEY; 381 + key.offset = file_offset; 382 + 383 + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 384 + if (ret < 0) 385 + return ret; 386 + 387 + if (ret > 0 && path->slots[0] > 0) { 388 + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); 389 + if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) 390 + path->slots[0]--; 391 + } 392 + 393 + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 394 + ret = btrfs_next_leaf(root, path); 395 + if (ret != 0) 396 + return ret; 397 + 398 + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 399 + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) 400 + return 1; 401 + } 402 + 403 + /* 404 + * We clone the leaf and use it during fiemap. This is because while 405 + * using the leaf we do expensive things like checking if an extent is 406 + * shared, which can take a long time. In order to prevent blocking 407 + * other tasks for too long, we use a clone of the leaf. We have locked 408 + * the file range in the inode's io tree, so we know none of our file 409 + * extent items can change. This way we avoid blocking other tasks that 410 + * want to insert items for other inodes in the same leaf or b+tree 411 + * rebalance operations (triggered for example when someone is trying 412 + * to push items into this leaf when trying to insert an item in a 413 + * neighbour leaf). 414 + * We also need the private clone because holding a read lock on an 415 + * extent buffer of the subvolume's b+tree will make lockdep unhappy 416 + * when we check if extents are shared, as backref walking may need to 417 + * lock the same leaf we are processing. 418 + */ 419 + clone = btrfs_clone_extent_buffer(path->nodes[0]); 420 + if (!clone) 421 + return -ENOMEM; 422 + 423 + slot = path->slots[0]; 424 + btrfs_release_path(path); 425 + path->nodes[0] = clone; 426 + path->slots[0] = slot; 427 + 428 + return 0; 429 + } 430 + 431 + /* 432 + * Process a range which is a hole or a prealloc extent in the inode's subvolume 433 + * btree. If @disk_bytenr is 0, we are dealing with a hole, otherwise a prealloc 434 + * extent. The end offset (@end) is inclusive. 435 + */ 436 + static int fiemap_process_hole(struct btrfs_inode *inode, 437 + struct fiemap_extent_info *fieinfo, 438 + struct fiemap_cache *cache, 439 + struct extent_state **delalloc_cached_state, 440 + struct btrfs_backref_share_check_ctx *backref_ctx, 441 + u64 disk_bytenr, u64 extent_offset, 442 + u64 extent_gen, 443 + u64 start, u64 end) 444 + { 445 + const u64 i_size = i_size_read(&inode->vfs_inode); 446 + u64 cur_offset = start; 447 + u64 last_delalloc_end = 0; 448 + u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN; 449 + bool checked_extent_shared = false; 450 + int ret; 451 + 452 + /* 453 + * There can be no delalloc past i_size, so don't waste time looking for 454 + * it beyond i_size. 455 + */ 456 + while (cur_offset < end && cur_offset < i_size) { 457 + u64 delalloc_start; 458 + u64 delalloc_end; 459 + u64 prealloc_start; 460 + u64 prealloc_len = 0; 461 + bool delalloc; 462 + 463 + delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, 464 + delalloc_cached_state, 465 + &delalloc_start, 466 + &delalloc_end); 467 + if (!delalloc) 468 + break; 469 + 470 + /* 471 + * If this is a prealloc extent we have to report every section 472 + * of it that has no delalloc. 473 + */ 474 + if (disk_bytenr != 0) { 475 + if (last_delalloc_end == 0) { 476 + prealloc_start = start; 477 + prealloc_len = delalloc_start - start; 478 + } else { 479 + prealloc_start = last_delalloc_end + 1; 480 + prealloc_len = delalloc_start - prealloc_start; 481 + } 482 + } 483 + 484 + if (prealloc_len > 0) { 485 + if (!checked_extent_shared && fieinfo->fi_extents_max) { 486 + ret = btrfs_is_data_extent_shared(inode, 487 + disk_bytenr, 488 + extent_gen, 489 + backref_ctx); 490 + if (ret < 0) 491 + return ret; 492 + else if (ret > 0) 493 + prealloc_flags |= FIEMAP_EXTENT_SHARED; 494 + 495 + checked_extent_shared = true; 496 + } 497 + ret = emit_fiemap_extent(fieinfo, cache, prealloc_start, 498 + disk_bytenr + extent_offset, 499 + prealloc_len, prealloc_flags); 500 + if (ret) 501 + return ret; 502 + extent_offset += prealloc_len; 503 + } 504 + 505 + ret = emit_fiemap_extent(fieinfo, cache, delalloc_start, 0, 506 + delalloc_end + 1 - delalloc_start, 507 + FIEMAP_EXTENT_DELALLOC | 508 + FIEMAP_EXTENT_UNKNOWN); 509 + if (ret) 510 + return ret; 511 + 512 + last_delalloc_end = delalloc_end; 513 + cur_offset = delalloc_end + 1; 514 + extent_offset += cur_offset - delalloc_start; 515 + cond_resched(); 516 + } 517 + 518 + /* 519 + * Either we found no delalloc for the whole prealloc extent or we have 520 + * a prealloc extent that spans i_size or starts at or after i_size. 521 + */ 522 + if (disk_bytenr != 0 && last_delalloc_end < end) { 523 + u64 prealloc_start; 524 + u64 prealloc_len; 525 + 526 + if (last_delalloc_end == 0) { 527 + prealloc_start = start; 528 + prealloc_len = end + 1 - start; 529 + } else { 530 + prealloc_start = last_delalloc_end + 1; 531 + prealloc_len = end + 1 - prealloc_start; 532 + } 533 + 534 + if (!checked_extent_shared && fieinfo->fi_extents_max) { 535 + ret = btrfs_is_data_extent_shared(inode, 536 + disk_bytenr, 537 + extent_gen, 538 + backref_ctx); 539 + if (ret < 0) 540 + return ret; 541 + else if (ret > 0) 542 + prealloc_flags |= FIEMAP_EXTENT_SHARED; 543 + } 544 + ret = emit_fiemap_extent(fieinfo, cache, prealloc_start, 545 + disk_bytenr + extent_offset, 546 + prealloc_len, prealloc_flags); 547 + if (ret) 548 + return ret; 549 + } 550 + 551 + return 0; 552 + } 553 + 554 + static int fiemap_find_last_extent_offset(struct btrfs_inode *inode, 555 + struct btrfs_path *path, 556 + u64 *last_extent_end_ret) 557 + { 558 + const u64 ino = btrfs_ino(inode); 559 + struct btrfs_root *root = inode->root; 560 + struct extent_buffer *leaf; 561 + struct btrfs_file_extent_item *ei; 562 + struct btrfs_key key; 563 + u64 disk_bytenr; 564 + int ret; 565 + 566 + /* 567 + * Lookup the last file extent. We're not using i_size here because 568 + * there might be preallocation past i_size. 569 + */ 570 + ret = btrfs_lookup_file_extent(NULL, root, path, ino, (u64)-1, 0); 571 + /* There can't be a file extent item at offset (u64)-1 */ 572 + ASSERT(ret != 0); 573 + if (ret < 0) 574 + return ret; 575 + 576 + /* 577 + * For a non-existing key, btrfs_search_slot() always leaves us at a 578 + * slot > 0, except if the btree is empty, which is impossible because 579 + * at least it has the inode item for this inode and all the items for 580 + * the root inode 256. 581 + */ 582 + ASSERT(path->slots[0] > 0); 583 + path->slots[0]--; 584 + leaf = path->nodes[0]; 585 + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 586 + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { 587 + /* No file extent items in the subvolume tree. */ 588 + *last_extent_end_ret = 0; 589 + return 0; 590 + } 591 + 592 + /* 593 + * For an inline extent, the disk_bytenr is where inline data starts at, 594 + * so first check if we have an inline extent item before checking if we 595 + * have an implicit hole (disk_bytenr == 0). 596 + */ 597 + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); 598 + if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) { 599 + *last_extent_end_ret = btrfs_file_extent_end(path); 600 + return 0; 601 + } 602 + 603 + /* 604 + * Find the last file extent item that is not a hole (when NO_HOLES is 605 + * not enabled). This should take at most 2 iterations in the worst 606 + * case: we have one hole file extent item at slot 0 of a leaf and 607 + * another hole file extent item as the last item in the previous leaf. 608 + * This is because we merge file extent items that represent holes. 609 + */ 610 + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); 611 + while (disk_bytenr == 0) { 612 + ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); 613 + if (ret < 0) { 614 + return ret; 615 + } else if (ret > 0) { 616 + /* No file extent items that are not holes. */ 617 + *last_extent_end_ret = 0; 618 + return 0; 619 + } 620 + leaf = path->nodes[0]; 621 + ei = btrfs_item_ptr(leaf, path->slots[0], 622 + struct btrfs_file_extent_item); 623 + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); 624 + } 625 + 626 + *last_extent_end_ret = btrfs_file_extent_end(path); 627 + return 0; 628 + } 629 + 630 + static int extent_fiemap(struct btrfs_inode *inode, 631 + struct fiemap_extent_info *fieinfo, 632 + u64 start, u64 len) 633 + { 634 + const u64 ino = btrfs_ino(inode); 635 + struct extent_state *cached_state = NULL; 636 + struct extent_state *delalloc_cached_state = NULL; 637 + struct btrfs_path *path; 638 + struct fiemap_cache cache = { 0 }; 639 + struct btrfs_backref_share_check_ctx *backref_ctx; 640 + u64 last_extent_end; 641 + u64 prev_extent_end; 642 + u64 range_start; 643 + u64 range_end; 644 + const u64 sectorsize = inode->root->fs_info->sectorsize; 645 + bool stopped = false; 646 + int ret; 647 + 648 + cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry); 649 + cache.entries = kmalloc_array(cache.entries_size, 650 + sizeof(struct btrfs_fiemap_entry), 651 + GFP_KERNEL); 652 + backref_ctx = btrfs_alloc_backref_share_check_ctx(); 653 + path = btrfs_alloc_path(); 654 + if (!cache.entries || !backref_ctx || !path) { 655 + ret = -ENOMEM; 656 + goto out; 657 + } 658 + 659 + restart: 660 + range_start = round_down(start, sectorsize); 661 + range_end = round_up(start + len, sectorsize); 662 + prev_extent_end = range_start; 663 + 664 + lock_extent(&inode->io_tree, range_start, range_end, &cached_state); 665 + 666 + ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); 667 + if (ret < 0) 668 + goto out_unlock; 669 + btrfs_release_path(path); 670 + 671 + path->reada = READA_FORWARD; 672 + ret = fiemap_search_slot(inode, path, range_start); 673 + if (ret < 0) { 674 + goto out_unlock; 675 + } else if (ret > 0) { 676 + /* 677 + * No file extent item found, but we may have delalloc between 678 + * the current offset and i_size. So check for that. 679 + */ 680 + ret = 0; 681 + goto check_eof_delalloc; 682 + } 683 + 684 + while (prev_extent_end < range_end) { 685 + struct extent_buffer *leaf = path->nodes[0]; 686 + struct btrfs_file_extent_item *ei; 687 + struct btrfs_key key; 688 + u64 extent_end; 689 + u64 extent_len; 690 + u64 extent_offset = 0; 691 + u64 extent_gen; 692 + u64 disk_bytenr = 0; 693 + u64 flags = 0; 694 + int extent_type; 695 + u8 compression; 696 + 697 + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 698 + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) 699 + break; 700 + 701 + extent_end = btrfs_file_extent_end(path); 702 + 703 + /* 704 + * The first iteration can leave us at an extent item that ends 705 + * before our range's start. Move to the next item. 706 + */ 707 + if (extent_end <= range_start) 708 + goto next_item; 709 + 710 + backref_ctx->curr_leaf_bytenr = leaf->start; 711 + 712 + /* We have in implicit hole (NO_HOLES feature enabled). */ 713 + if (prev_extent_end < key.offset) { 714 + const u64 hole_end = min(key.offset, range_end) - 1; 715 + 716 + ret = fiemap_process_hole(inode, fieinfo, &cache, 717 + &delalloc_cached_state, 718 + backref_ctx, 0, 0, 0, 719 + prev_extent_end, hole_end); 720 + if (ret < 0) { 721 + goto out_unlock; 722 + } else if (ret > 0) { 723 + /* fiemap_fill_next_extent() told us to stop. */ 724 + stopped = true; 725 + break; 726 + } 727 + 728 + /* We've reached the end of the fiemap range, stop. */ 729 + if (key.offset >= range_end) { 730 + stopped = true; 731 + break; 732 + } 733 + } 734 + 735 + extent_len = extent_end - key.offset; 736 + ei = btrfs_item_ptr(leaf, path->slots[0], 737 + struct btrfs_file_extent_item); 738 + compression = btrfs_file_extent_compression(leaf, ei); 739 + extent_type = btrfs_file_extent_type(leaf, ei); 740 + extent_gen = btrfs_file_extent_generation(leaf, ei); 741 + 742 + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 743 + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); 744 + if (compression == BTRFS_COMPRESS_NONE) 745 + extent_offset = btrfs_file_extent_offset(leaf, ei); 746 + } 747 + 748 + if (compression != BTRFS_COMPRESS_NONE) 749 + flags |= FIEMAP_EXTENT_ENCODED; 750 + 751 + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 752 + flags |= FIEMAP_EXTENT_DATA_INLINE; 753 + flags |= FIEMAP_EXTENT_NOT_ALIGNED; 754 + ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 0, 755 + extent_len, flags); 756 + } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 757 + ret = fiemap_process_hole(inode, fieinfo, &cache, 758 + &delalloc_cached_state, 759 + backref_ctx, 760 + disk_bytenr, extent_offset, 761 + extent_gen, key.offset, 762 + extent_end - 1); 763 + } else if (disk_bytenr == 0) { 764 + /* We have an explicit hole. */ 765 + ret = fiemap_process_hole(inode, fieinfo, &cache, 766 + &delalloc_cached_state, 767 + backref_ctx, 0, 0, 0, 768 + key.offset, extent_end - 1); 769 + } else { 770 + /* We have a regular extent. */ 771 + if (fieinfo->fi_extents_max) { 772 + ret = btrfs_is_data_extent_shared(inode, 773 + disk_bytenr, 774 + extent_gen, 775 + backref_ctx); 776 + if (ret < 0) 777 + goto out_unlock; 778 + else if (ret > 0) 779 + flags |= FIEMAP_EXTENT_SHARED; 780 + } 781 + 782 + ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 783 + disk_bytenr + extent_offset, 784 + extent_len, flags); 785 + } 786 + 787 + if (ret < 0) { 788 + goto out_unlock; 789 + } else if (ret > 0) { 790 + /* emit_fiemap_extent() told us to stop. */ 791 + stopped = true; 792 + break; 793 + } 794 + 795 + prev_extent_end = extent_end; 796 + next_item: 797 + if (fatal_signal_pending(current)) { 798 + ret = -EINTR; 799 + goto out_unlock; 800 + } 801 + 802 + ret = fiemap_next_leaf_item(inode, path); 803 + if (ret < 0) { 804 + goto out_unlock; 805 + } else if (ret > 0) { 806 + /* No more file extent items for this inode. */ 807 + break; 808 + } 809 + cond_resched(); 810 + } 811 + 812 + check_eof_delalloc: 813 + if (!stopped && prev_extent_end < range_end) { 814 + ret = fiemap_process_hole(inode, fieinfo, &cache, 815 + &delalloc_cached_state, backref_ctx, 816 + 0, 0, 0, prev_extent_end, range_end - 1); 817 + if (ret < 0) 818 + goto out_unlock; 819 + prev_extent_end = range_end; 820 + } 821 + 822 + if (cache.cached && cache.offset + cache.len >= last_extent_end) { 823 + const u64 i_size = i_size_read(&inode->vfs_inode); 824 + 825 + if (prev_extent_end < i_size) { 826 + u64 delalloc_start; 827 + u64 delalloc_end; 828 + bool delalloc; 829 + 830 + delalloc = btrfs_find_delalloc_in_range(inode, 831 + prev_extent_end, 832 + i_size - 1, 833 + &delalloc_cached_state, 834 + &delalloc_start, 835 + &delalloc_end); 836 + if (!delalloc) 837 + cache.flags |= FIEMAP_EXTENT_LAST; 838 + } else { 839 + cache.flags |= FIEMAP_EXTENT_LAST; 840 + } 841 + } 842 + 843 + out_unlock: 844 + unlock_extent(&inode->io_tree, range_start, range_end, &cached_state); 845 + 846 + if (ret == BTRFS_FIEMAP_FLUSH_CACHE) { 847 + btrfs_release_path(path); 848 + ret = flush_fiemap_cache(fieinfo, &cache); 849 + if (ret) 850 + goto out; 851 + len -= cache.next_search_offset - start; 852 + start = cache.next_search_offset; 853 + goto restart; 854 + } else if (ret < 0) { 855 + goto out; 856 + } 857 + 858 + /* 859 + * Must free the path before emitting to the fiemap buffer because we 860 + * may have a non-cloned leaf and if the fiemap buffer is memory mapped 861 + * to a file, a write into it (through btrfs_page_mkwrite()) may trigger 862 + * waiting for an ordered extent that in order to complete needs to 863 + * modify that leaf, therefore leading to a deadlock. 864 + */ 865 + btrfs_free_path(path); 866 + path = NULL; 867 + 868 + ret = flush_fiemap_cache(fieinfo, &cache); 869 + if (ret) 870 + goto out; 871 + 872 + ret = emit_last_fiemap_cache(fieinfo, &cache); 873 + out: 874 + free_extent_state(delalloc_cached_state); 875 + kfree(cache.entries); 876 + btrfs_free_backref_share_ctx(backref_ctx); 877 + btrfs_free_path(path); 878 + return ret; 879 + } 880 + 881 + int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 882 + u64 start, u64 len) 883 + { 884 + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); 885 + int ret; 886 + 887 + ret = fiemap_prep(inode, fieinfo, start, &len, 0); 888 + if (ret) 889 + return ret; 890 + 891 + /* 892 + * fiemap_prep() called filemap_write_and_wait() for the whole possible 893 + * file range (0 to LLONG_MAX), but that is not enough if we have 894 + * compression enabled. The first filemap_fdatawrite_range() only kicks 895 + * in the compression of data (in an async thread) and will return 896 + * before the compression is done and writeback is started. A second 897 + * filemap_fdatawrite_range() is needed to wait for the compression to 898 + * complete and writeback to start. We also need to wait for ordered 899 + * extents to complete, because our fiemap implementation uses mainly 900 + * file extent items to list the extents, searching for extent maps 901 + * only for file ranges with holes or prealloc extents to figure out 902 + * if we have delalloc in those ranges. 903 + */ 904 + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { 905 + ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX); 906 + if (ret) 907 + return ret; 908 + } 909 + 910 + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); 911 + 912 + /* 913 + * We did an initial flush to avoid holding the inode's lock while 914 + * triggering writeback and waiting for the completion of IO and ordered 915 + * extents. Now after we locked the inode we do it again, because it's 916 + * possible a new write may have happened in between those two steps. 917 + */ 918 + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { 919 + ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX); 920 + if (ret) { 921 + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); 922 + return ret; 923 + } 924 + } 925 + 926 + ret = extent_fiemap(btrfs_inode, fieinfo, start, len); 927 + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); 928 + 929 + return ret; 930 + }

+11

fs/btrfs/fiemap.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef BTRFS_FIEMAP_H 4 + #define BTRFS_FIEMAP_H 5 + 6 + #include <linux/fiemap.h> 7 + 8 + int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 9 + u64 start, u64 len); 10 + 11 + #endif /* BTRFS_FIEMAP_H */

+1 -51

fs/btrfs/inode.c

··· 70 70 #include "orphan.h" 71 71 #include "backref.h" 72 72 #include "raid-stripe-tree.h" 73 + #include "fiemap.h" 73 74 74 75 struct btrfs_iget_args { 75 76 u64 ino; ··· 7928 7927 7929 7928 return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 7930 7929 IOMAP_DIO_PARTIAL, &data, done_before); 7931 - } 7932 - 7933 - static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 7934 - u64 start, u64 len) 7935 - { 7936 - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); 7937 - int ret; 7938 - 7939 - ret = fiemap_prep(inode, fieinfo, start, &len, 0); 7940 - if (ret) 7941 - return ret; 7942 - 7943 - /* 7944 - * fiemap_prep() called filemap_write_and_wait() for the whole possible 7945 - * file range (0 to LLONG_MAX), but that is not enough if we have 7946 - * compression enabled. The first filemap_fdatawrite_range() only kicks 7947 - * in the compression of data (in an async thread) and will return 7948 - * before the compression is done and writeback is started. A second 7949 - * filemap_fdatawrite_range() is needed to wait for the compression to 7950 - * complete and writeback to start. We also need to wait for ordered 7951 - * extents to complete, because our fiemap implementation uses mainly 7952 - * file extent items to list the extents, searching for extent maps 7953 - * only for file ranges with holes or prealloc extents to figure out 7954 - * if we have delalloc in those ranges. 7955 - */ 7956 - if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { 7957 - ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX); 7958 - if (ret) 7959 - return ret; 7960 - } 7961 - 7962 - btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); 7963 - 7964 - /* 7965 - * We did an initial flush to avoid holding the inode's lock while 7966 - * triggering writeback and waiting for the completion of IO and ordered 7967 - * extents. Now after we locked the inode we do it again, because it's 7968 - * possible a new write may have happened in between those two steps. 7969 - */ 7970 - if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { 7971 - ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX); 7972 - if (ret) { 7973 - btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); 7974 - return ret; 7975 - } 7976 - } 7977 - 7978 - ret = extent_fiemap(btrfs_inode, fieinfo, start, len); 7979 - btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); 7980 - 7981 - return ret; 7982 7930 } 7983 7931 7984 7932 /*