Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: make ext4_es_lookup_extent() pass out the extent seq counter

When querying extents in the extent status tree, we should hold the
data_sem if we want to obtain the sequence number as a valid cookie
simultaneously. However, currently, ext4_map_blocks() calls
ext4_es_lookup_extent() without holding data_sem. Therefore, we should
acquire i_es_lock instead, which also ensures that the sequence cookie
and the extent remain consistent. Consequently, make
ext4_es_lookup_extent() to pass out the sequence number when necessary.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-ID: <20251013015128.499308-4-yi.zhang@huaweicloud.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Zhang Yi and committed by
Theodore Ts'o
7da5565c dd064d51

+10 -8
+1 -1
fs/ext4/extents.c
··· 2213 2213 while (block <= end) { 2214 2214 next = 0; 2215 2215 flags = 0; 2216 - if (!ext4_es_lookup_extent(inode, block, &next, &es)) 2216 + if (!ext4_es_lookup_extent(inode, block, &next, &es, NULL)) 2217 2217 break; 2218 2218 if (ext4_es_is_unwritten(&es)) 2219 2219 flags |= FIEMAP_EXTENT_UNWRITTEN;
+4 -2
fs/ext4/extents_status.c
··· 1039 1039 * Return: 1 on found, 0 on not 1040 1040 */ 1041 1041 int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 1042 - ext4_lblk_t *next_lblk, 1043 - struct extent_status *es) 1042 + ext4_lblk_t *next_lblk, struct extent_status *es, 1043 + u64 *pseq) 1044 1044 { 1045 1045 struct ext4_es_tree *tree; 1046 1046 struct ext4_es_stats *stats; ··· 1099 1099 } else 1100 1100 *next_lblk = 0; 1101 1101 } 1102 + if (pseq) 1103 + *pseq = EXT4_I(inode)->i_es_seq; 1102 1104 } else { 1103 1105 percpu_counter_inc(&stats->es_stats_cache_misses); 1104 1106 }
+1 -1
fs/ext4/extents_status.h
··· 148 148 struct extent_status *es); 149 149 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 150 150 ext4_lblk_t *next_lblk, 151 - struct extent_status *es); 151 + struct extent_status *es, u64 *pseq); 152 152 extern bool ext4_es_scan_range(struct inode *inode, 153 153 int (*matching_fn)(struct extent_status *es), 154 154 ext4_lblk_t lblk, ext4_lblk_t end);
+4 -4
fs/ext4/inode.c
··· 649 649 * extent status tree. 650 650 */ 651 651 if (flags & EXT4_GET_BLOCKS_PRE_IO && 652 - ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 652 + ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, NULL)) { 653 653 if (ext4_es_is_written(&es)) 654 654 return retval; 655 655 } ··· 723 723 ext4_check_map_extents_env(inode); 724 724 725 725 /* Lookup extent status tree firstly */ 726 - if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 726 + if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, NULL)) { 727 727 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { 728 728 map->m_pblk = ext4_es_pblock(&es) + 729 729 map->m_lblk - es.es_lblk; ··· 1908 1908 ext4_check_map_extents_env(inode); 1909 1909 1910 1910 /* Lookup extent status tree firstly */ 1911 - if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 1911 + if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, NULL)) { 1912 1912 map->m_len = min_t(unsigned int, map->m_len, 1913 1913 es.es_len - (map->m_lblk - es.es_lblk)); 1914 1914 ··· 1961 1961 * is held in write mode, before inserting a new da entry in 1962 1962 * the extent status tree. 1963 1963 */ 1964 - if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 1964 + if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, NULL)) { 1965 1965 map->m_len = min_t(unsigned int, map->m_len, 1966 1966 es.es_len - (map->m_lblk - es.es_lblk)); 1967 1967