Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: Fix max file size and logical block counting of extent format file

Kazuya Mio reported that he was able to hit BUG_ON(next == lblock)
in ext4_ext_put_gap_in_cache() while creating a sparse file in extent
format and fill the tail of file up to its end. We will hit the BUG_ON
when we write the last block (2^32-1) into the sparse file.

The root cause of the problem lies in the fact that we specifically set
s_maxbytes so that block at s_maxbytes fit into on-disk extent format,
which is 32 bit long. However, we are not storing start and end block
number, but rather start block number and length in blocks. It means
that in order to cover extent from 0 to EXT_MAX_BLOCK we need
EXT_MAX_BLOCK+1 to fit into len (because we counting block 0 as well) -
and it does not.

The only way to fix it without changing the meaning of the struct
ext4_extent members is, as Kazuya Mio suggested, to lower s_maxbytes
by one fs block so we can cover the whole extent we can get by the
on-disk extent format.

Also in many places EXT_MAX_BLOCK is used as length instead of maximum
logical block number as the name suggests, it is all a bit messy. So
this commit renames it to EXT_MAX_BLOCKS and change its usage in some
places to actually be maximum number of blocks in the extent.

The bug which this commit fixes can be reproduced as follows:

dd if=/dev/zero of=/mnt/mp1/file bs=<blocksize> count=1 seek=$((2**32-2))
sync
dd if=/dev/zero of=/mnt/mp1/file bs=<blocksize> count=1 seek=$((2**32-1))

Reported-by: Kazuya Mio <k-mio@sx.jp.nec.com>
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

authored by

Lukas Czerner and committed by
Theodore Ts'o
f17722f9 5def1360

+39 -27
+5 -2
fs/ext4/ext4_extents.h
··· 133 133 #define EXT_BREAK 1 134 134 #define EXT_REPEAT 2 135 135 136 - /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ 137 - #define EXT_MAX_BLOCK 0xffffffff 136 + /* 137 + * Maximum number of logical blocks in a file; ext4_extent's ee_block is 138 + * __le32. 139 + */ 140 + #define EXT_MAX_BLOCKS 0xffffffff 138 141 139 142 /* 140 143 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+17 -17
fs/ext4/extents.c
··· 1408 1408 1409 1409 /* 1410 1410 * ext4_ext_next_allocated_block: 1411 - * returns allocated block in subsequent extent or EXT_MAX_BLOCK. 1411 + * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. 1412 1412 * NOTE: it considers block number from index entry as 1413 1413 * allocated block. Thus, index entries have to be consistent 1414 1414 * with leaves. ··· 1422 1422 depth = path->p_depth; 1423 1423 1424 1424 if (depth == 0 && path->p_ext == NULL) 1425 - return EXT_MAX_BLOCK; 1425 + return EXT_MAX_BLOCKS; 1426 1426 1427 1427 while (depth >= 0) { 1428 1428 if (depth == path->p_depth) { ··· 1439 1439 depth--; 1440 1440 } 1441 1441 1442 - return EXT_MAX_BLOCK; 1442 + return EXT_MAX_BLOCKS; 1443 1443 } 1444 1444 1445 1445 /* 1446 1446 * ext4_ext_next_leaf_block: 1447 - * returns first allocated block from next leaf or EXT_MAX_BLOCK 1447 + * returns first allocated block from next leaf or EXT_MAX_BLOCKS 1448 1448 */ 1449 1449 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, 1450 1450 struct ext4_ext_path *path) ··· 1456 1456 1457 1457 /* zero-tree has no leaf blocks at all */ 1458 1458 if (depth == 0) 1459 - return EXT_MAX_BLOCK; 1459 + return EXT_MAX_BLOCKS; 1460 1460 1461 1461 /* go to index block */ 1462 1462 depth--; ··· 1469 1469 depth--; 1470 1470 } 1471 1471 1472 - return EXT_MAX_BLOCK; 1472 + return EXT_MAX_BLOCKS; 1473 1473 } 1474 1474 1475 1475 /* ··· 1677 1677 */ 1678 1678 if (b2 < b1) { 1679 1679 b2 = ext4_ext_next_allocated_block(path); 1680 - if (b2 == EXT_MAX_BLOCK) 1680 + if (b2 == EXT_MAX_BLOCKS) 1681 1681 goto out; 1682 1682 } 1683 1683 1684 1684 /* check for wrap through zero on extent logical start block*/ 1685 1685 if (b1 + len1 < b1) { 1686 - len1 = EXT_MAX_BLOCK - b1; 1686 + len1 = EXT_MAX_BLOCKS - b1; 1687 1687 newext->ee_len = cpu_to_le16(len1); 1688 1688 ret = 1; 1689 1689 } ··· 1767 1767 fex = EXT_LAST_EXTENT(eh); 1768 1768 next = ext4_ext_next_leaf_block(inode, path); 1769 1769 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1770 - && next != EXT_MAX_BLOCK) { 1770 + && next != EXT_MAX_BLOCKS) { 1771 1771 ext_debug("next leaf block - %d\n", next); 1772 1772 BUG_ON(npath != NULL); 1773 1773 npath = ext4_ext_find_extent(inode, next, NULL); ··· 1887 1887 BUG_ON(func == NULL); 1888 1888 BUG_ON(inode == NULL); 1889 1889 1890 - while (block < last && block != EXT_MAX_BLOCK) { 1890 + while (block < last && block != EXT_MAX_BLOCKS) { 1891 1891 num = last - block; 1892 1892 /* find extent for this block */ 1893 1893 down_read(&EXT4_I(inode)->i_data_sem); ··· 2020 2020 if (ex == NULL) { 2021 2021 /* there is no extent yet, so gap is [0;-] */ 2022 2022 lblock = 0; 2023 - len = EXT_MAX_BLOCK; 2023 + len = EXT_MAX_BLOCKS; 2024 2024 ext_debug("cache gap(whole file):"); 2025 2025 } else if (block < le32_to_cpu(ex->ee_block)) { 2026 2026 lblock = block; ··· 2350 2350 * never happen because at least one of the end points 2351 2351 * needs to be on the edge of the extent. 2352 2352 */ 2353 - if (end == EXT_MAX_BLOCK) { 2353 + if (end == EXT_MAX_BLOCKS - 1) { 2354 2354 ext_debug(" bad truncate %u:%u\n", 2355 2355 start, end); 2356 2356 block = 0; ··· 2398 2398 * If this is a truncate, this condition 2399 2399 * should never happen 2400 2400 */ 2401 - if (end == EXT_MAX_BLOCK) { 2401 + if (end == EXT_MAX_BLOCKS - 1) { 2402 2402 ext_debug(" bad truncate %u:%u\n", 2403 2403 start, end); 2404 2404 err = -EIO; ··· 2478 2478 * we need to remove it from the leaf 2479 2479 */ 2480 2480 if (num == 0) { 2481 - if (end != EXT_MAX_BLOCK) { 2481 + if (end != EXT_MAX_BLOCKS - 1) { 2482 2482 /* 2483 2483 * For hole punching, we need to scoot all the 2484 2484 * extents up when an extent is removed so that ··· 3699 3699 3700 3700 last_block = (inode->i_size + sb->s_blocksize - 1) 3701 3701 >> EXT4_BLOCK_SIZE_BITS(sb); 3702 - err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); 3702 + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 3703 3703 3704 3704 /* In a multi-transaction truncate, we only make the final 3705 3705 * transaction synchronous. ··· 4347 4347 4348 4348 start_blk = start >> inode->i_sb->s_blocksize_bits; 4349 4349 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; 4350 - if (last_blk >= EXT_MAX_BLOCK) 4351 - last_blk = EXT_MAX_BLOCK-1; 4350 + if (last_blk >= EXT_MAX_BLOCKS) 4351 + last_blk = EXT_MAX_BLOCKS-1; 4352 4352 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 4353 4353 4354 4354 /*
+5 -5
fs/ext4/move_extent.c
··· 1002 1002 return -EINVAL; 1003 1003 } 1004 1004 1005 - if ((orig_start > EXT_MAX_BLOCK) || 1006 - (donor_start > EXT_MAX_BLOCK) || 1007 - (*len > EXT_MAX_BLOCK) || 1008 - (orig_start + *len > EXT_MAX_BLOCK)) { 1005 + if ((orig_start >= EXT_MAX_BLOCKS) || 1006 + (donor_start >= EXT_MAX_BLOCKS) || 1007 + (*len > EXT_MAX_BLOCKS) || 1008 + (orig_start + *len >= EXT_MAX_BLOCKS)) { 1009 1009 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 1010 - "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, 1010 + "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, 1011 1011 orig_inode->i_ino, donor_inode->i_ino); 1012 1012 return -EINVAL; 1013 1013 }
+12 -3
fs/ext4/super.c
··· 2243 2243 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2244 2244 * so that won't be a limiting factor. 2245 2245 * 2246 + * However there is other limiting factor. We do store extents in the form 2247 + * of starting block and length, hence the resulting length of the extent 2248 + * covering maximum file size must fit into on-disk format containers as 2249 + * well. Given that length is always by 1 unit bigger than max unit (because 2250 + * we count 0 as well) we have to lower the s_maxbytes by one fs block. 2251 + * 2246 2252 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2247 2253 */ 2248 2254 static loff_t ext4_max_size(int blkbits, int has_huge_files) ··· 2270 2264 upper_limit <<= blkbits; 2271 2265 } 2272 2266 2273 - /* 32-bit extent-start container, ee_block */ 2274 - res = 1LL << 32; 2267 + /* 2268 + * 32-bit extent-start container, ee_block. We lower the maxbytes 2269 + * by one fs block, so ee_len can cover the extent of maximum file 2270 + * size 2271 + */ 2272 + res = (1LL << 32) - 1; 2275 2273 res <<= blkbits; 2276 - res -= 1; 2277 2274 2278 2275 /* Sanity check against vm- & vfs- imposed limits */ 2279 2276 if (res > upper_limit)