ext4: Fix race in ext4_inode_info.i_cached_extent

If two CPU's simultaneously call ext4_ext_get_blocks() at the same
time, there is nothing protecting the i_cached_extent structure from
being used and updated at the same time. This could potentially cause
the wrong location on disk to be read or written to, including
potentially causing the corruption of the block group descriptors
and/or inode table.

This bug has been in the ext4 code since almost the very beginning of
ext4's development. Fortunately once the data is stored in the page
cache cache, ext4_get_blocks() doesn't need to be called, so trying to
replicate this problem to the point where we could identify its root
cause was *extremely* difficult. Many thanks to Kevin Shanahan for
working over several months to be able to reproduce this easily so we
could finally nail down the cause of the corruption.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

+12 -5
+12 -5
fs/ext4/extents.c
··· 1841 { 1842 struct ext4_ext_cache *cex; 1843 BUG_ON(len == 0); 1844 cex = &EXT4_I(inode)->i_cached_extent; 1845 cex->ec_type = type; 1846 cex->ec_block = block; 1847 cex->ec_len = len; 1848 cex->ec_start = start; 1849 } 1850 1851 /* ··· 1904 struct ext4_extent *ex) 1905 { 1906 struct ext4_ext_cache *cex; 1907 1908 cex = &EXT4_I(inode)->i_cached_extent; 1909 1910 /* has cache valid data? */ 1911 if (cex->ec_type == EXT4_EXT_CACHE_NO) 1912 - return EXT4_EXT_CACHE_NO; 1913 1914 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 1915 cex->ec_type != EXT4_EXT_CACHE_EXTENT); ··· 1925 ext_debug("%u cached by %u:%u:%llu\n", 1926 block, 1927 cex->ec_block, cex->ec_len, cex->ec_start); 1928 - return cex->ec_type; 1929 } 1930 - 1931 - /* not in cache */ 1932 - return EXT4_EXT_CACHE_NO; 1933 } 1934 1935 /*
··· 1841 { 1842 struct ext4_ext_cache *cex; 1843 BUG_ON(len == 0); 1844 + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1845 cex = &EXT4_I(inode)->i_cached_extent; 1846 cex->ec_type = type; 1847 cex->ec_block = block; 1848 cex->ec_len = len; 1849 cex->ec_start = start; 1850 + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1851 } 1852 1853 /* ··· 1902 struct ext4_extent *ex) 1903 { 1904 struct ext4_ext_cache *cex; 1905 + int ret = EXT4_EXT_CACHE_NO; 1906 1907 + /* 1908 + * We borrow i_block_reservation_lock to protect i_cached_extent 1909 + */ 1910 + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1911 cex = &EXT4_I(inode)->i_cached_extent; 1912 1913 /* has cache valid data? */ 1914 if (cex->ec_type == EXT4_EXT_CACHE_NO) 1915 + goto errout; 1916 1917 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 1918 cex->ec_type != EXT4_EXT_CACHE_EXTENT); ··· 1918 ext_debug("%u cached by %u:%u:%llu\n", 1919 block, 1920 cex->ec_block, cex->ec_len, cex->ec_start); 1921 + ret = cex->ec_type; 1922 } 1923 + errout: 1924 + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1925 + return ret; 1926 } 1927 1928 /*