Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: introduce EXTENT_DIO_LOCKED

In order to support dropping the extent lock during a read we need a way
to make sure that direct reads and direct writes for overlapping ranges
are protected from each other. To accomplish this introduce another
lock bit specifically for direct io. Subsequent patches will utilize
this to protect direct IO operations.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Josef Bacik and committed by
David Sterba
7e2a5950 df2825e9

+58 -35
+24 -31
fs/btrfs/extent-io-tree.c
··· 126 126 * Empty an io tree, removing and freeing every extent state record from the 127 127 * tree. This should be called once we are sure no other task can access the 128 128 * tree anymore, so no tree updates happen after we empty the tree and there 129 - * aren't any waiters on any extent state record (EXTENT_LOCKED bit is never 129 + * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never 130 130 * set on any extent state when calling this function). 131 131 */ 132 132 void extent_io_tree_release(struct extent_io_tree *tree) ··· 141 141 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { 142 142 /* Clear node to keep free_extent_state() happy. */ 143 143 RB_CLEAR_NODE(&state->rb_node); 144 - ASSERT(!(state->state & EXTENT_LOCKED)); 144 + ASSERT(!(state->state & EXTENT_LOCK_BITS)); 145 145 /* 146 146 * No need for a memory barrier here, as we are holding the tree 147 147 * lock and we only change the waitqueue while holding that lock ··· 399 399 */ 400 400 static void merge_state(struct extent_io_tree *tree, struct extent_state *state) 401 401 { 402 - if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 402 + if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 403 403 return; 404 404 405 405 merge_prev_state(tree, state); ··· 445 445 struct rb_node *parent = NULL; 446 446 const u64 start = state->start - 1; 447 447 const u64 end = state->end + 1; 448 - const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)); 448 + const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); 449 449 450 450 set_state_bits(tree, state, bits, changeset); 451 451 ··· 616 616 * inserting elements in the tree, so the gfp mask is used to indicate which 617 617 * allocations or sleeping are allowed. 618 618 * 619 - * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given 620 - * range from the tree regardless of state (ie for truncate). 621 - * 622 619 * The range [start, end] is inclusive. 623 620 * 624 621 * This takes the tree lock, and returns 0 on success and < 0 on error. ··· 644 647 if (bits & EXTENT_DELALLOC) 645 648 bits |= EXTENT_NORESERVE; 646 649 647 - wake = (bits & EXTENT_LOCKED) ? 1 : 0; 648 - if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 650 + wake = ((bits & EXTENT_LOCK_BITS) ? 1 : 0); 651 + if (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) 649 652 clear = 1; 650 653 again: 651 654 if (!prealloc) { ··· 858 861 static void cache_state(struct extent_state *state, 859 862 struct extent_state **cached_ptr) 860 863 { 861 - return cache_state_if_flags(state, cached_ptr, 862 - EXTENT_LOCKED | EXTENT_BOUNDARY); 864 + return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); 863 865 } 864 866 865 867 /* ··· 1059 1063 int ret = 0; 1060 1064 u64 last_start; 1061 1065 u64 last_end; 1062 - u32 exclusive_bits = (bits & EXTENT_LOCKED); 1066 + u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); 1063 1067 gfp_t mask; 1064 1068 1065 1069 set_gfp_mask_from_bits(&bits, &mask); ··· 1808 1812 u32 bits, struct extent_changeset *changeset) 1809 1813 { 1810 1814 /* 1811 - * We don't support EXTENT_LOCKED yet, as current changeset will 1812 - * record any bits changed, so for EXTENT_LOCKED case, it will 1813 - * either fail with -EEXIST or changeset will record the whole 1814 - * range. 1815 + * We don't support EXTENT_LOCK_BITS yet, as current changeset will 1816 + * record any bits changed, so for EXTENT_LOCK_BITS case, it will either 1817 + * fail with -EEXIST or changeset will record the whole range. 1815 1818 */ 1816 - ASSERT(!(bits & EXTENT_LOCKED)); 1819 + ASSERT(!(bits & EXTENT_LOCK_BITS)); 1817 1820 1818 1821 return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); 1819 1822 } ··· 1821 1826 u32 bits, struct extent_changeset *changeset) 1822 1827 { 1823 1828 /* 1824 - * Don't support EXTENT_LOCKED case, same reason as 1829 + * Don't support EXTENT_LOCK_BITS case, same reason as 1825 1830 * set_record_extent_bits(). 1826 1831 */ 1827 - ASSERT(!(bits & EXTENT_LOCKED)); 1832 + ASSERT(!(bits & EXTENT_LOCK_BITS)); 1828 1833 1829 1834 return __clear_extent_bit(tree, start, end, bits, NULL, changeset); 1830 1835 } 1831 1836 1832 - int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1833 - struct extent_state **cached) 1837 + bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1838 + struct extent_state **cached) 1834 1839 { 1835 1840 int err; 1836 1841 u64 failed_start; 1837 1842 1838 - err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1843 + err = __set_extent_bit(tree, start, end, bits, &failed_start, 1839 1844 NULL, cached, NULL); 1840 1845 if (err == -EEXIST) { 1841 1846 if (failed_start > start) 1842 - clear_extent_bit(tree, start, failed_start - 1, 1843 - EXTENT_LOCKED, cached); 1847 + clear_extent_bit(tree, start, failed_start - 1, bits, cached); 1844 1848 return 0; 1845 1849 } 1846 1850 return 1; ··· 1849 1855 * Either insert or lock state struct between start and end use mask to tell 1850 1856 * us if waiting is desired. 1851 1857 */ 1852 - int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1853 - struct extent_state **cached_state) 1858 + int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 1859 + struct extent_state **cached_state) 1854 1860 { 1855 1861 struct extent_state *failed_state = NULL; 1856 1862 int err; 1857 1863 u64 failed_start; 1858 1864 1859 - err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, 1865 + err = __set_extent_bit(tree, start, end, bits, &failed_start, 1860 1866 &failed_state, cached_state, NULL); 1861 1867 while (err == -EEXIST) { 1862 1868 if (failed_start != start) 1863 1869 clear_extent_bit(tree, start, failed_start - 1, 1864 - EXTENT_LOCKED, cached_state); 1870 + bits, cached_state); 1865 1871 1866 - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, 1867 - &failed_state); 1868 - err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 1872 + wait_extent_bit(tree, failed_start, end, bits, &failed_state); 1873 + err = __set_extent_bit(tree, start, end, bits, 1869 1874 &failed_start, &failed_state, 1870 1875 cached_state, NULL); 1871 1876 }
+34 -4
fs/btrfs/extent-io-tree.h
··· 19 19 ENUM_BIT(EXTENT_DIRTY), 20 20 ENUM_BIT(EXTENT_UPTODATE), 21 21 ENUM_BIT(EXTENT_LOCKED), 22 + ENUM_BIT(EXTENT_DIO_LOCKED), 22 23 ENUM_BIT(EXTENT_NEW), 23 24 ENUM_BIT(EXTENT_DELALLOC), 24 25 ENUM_BIT(EXTENT_DEFRAG), ··· 67 66 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \ 68 67 EXTENT_ADD_INODE_BYTES | \ 69 68 EXTENT_CLEAR_ALL_BITS) 69 + 70 + #define EXTENT_LOCK_BITS (EXTENT_LOCKED | EXTENT_DIO_LOCKED) 70 71 71 72 /* 72 73 * Redefined bits above which are used only in the device allocation tree, ··· 137 134 void extent_io_tree_init(struct btrfs_fs_info *fs_info, 138 135 struct extent_io_tree *tree, unsigned int owner); 139 136 void extent_io_tree_release(struct extent_io_tree *tree); 137 + int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 138 + struct extent_state **cached); 139 + bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, 140 + struct extent_state **cached); 140 141 141 - int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 142 - struct extent_state **cached); 142 + static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 143 + struct extent_state **cached) 144 + { 145 + return __lock_extent(tree, start, end, EXTENT_LOCKED, cached); 146 + } 143 147 144 - int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 145 - struct extent_state **cached); 148 + static inline bool try_lock_extent(struct extent_io_tree *tree, u64 start, 149 + u64 end, struct extent_state **cached) 150 + { 151 + return __try_lock_extent(tree, start, end, EXTENT_LOCKED, cached); 152 + } 146 153 147 154 int __init extent_state_init_cachep(void); 148 155 void __cold extent_state_free_cachep(void); ··· 225 212 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 226 213 u64 *end, u64 max_bytes, 227 214 struct extent_state **cached_state); 215 + static inline int lock_dio_extent(struct extent_io_tree *tree, u64 start, 216 + u64 end, struct extent_state **cached) 217 + { 218 + return __lock_extent(tree, start, end, EXTENT_DIO_LOCKED, cached); 219 + } 220 + 221 + static inline bool try_lock_dio_extent(struct extent_io_tree *tree, u64 start, 222 + u64 end, struct extent_state **cached) 223 + { 224 + return __try_lock_extent(tree, start, end, EXTENT_DIO_LOCKED, cached); 225 + } 226 + 227 + static inline int unlock_dio_extent(struct extent_io_tree *tree, u64 start, 228 + u64 end, struct extent_state **cached) 229 + { 230 + return __clear_extent_bit(tree, start, end, EXTENT_DIO_LOCKED, cached, NULL); 231 + } 228 232 229 233 #endif /* BTRFS_EXTENT_IO_TREE_H */