Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: validate system chunk array at btrfs_validate_super()

Currently btrfs_validate_super() only does a very basic check on the
array chunk size (not too large than the available space, but not too
small to contain no chunk).

The more comprehensive checks (the regular chunk checks and size check
inside the system chunk array) are all done inside btrfs_read_sys_array().

It's not a big deal, but it also means we do not do any validation on
the system chunk array at super block writeback time either.

Do the following modification to centralize the system chunk array
checks into btrfs_validate_super():

- Make chunk_err() helper accept stack chunk pointer
If @leaf parameter is NULL, then the @chunk pointer will be a pointer
to the chunk item, other than the offset inside the leaf.

And since @leaf can be NULL, add a new @fs_info parameter for that
case.

- Make btrfs_check_chunk_valid() handle stack chunk pointer
The same as chunk_err(), a new @fs_info parameter, and if @leaf is
NULL, then @chunk will be a pointer to a stack chunk.

If @chunk is NULL, then all needed btrfs_chunk members will be read
using the stack helper instead of the leaf helper.
This means we need to read out all the needed member at the beginning
of the function.

Furthermore, at super block read time, fs_info->sectorsize is not yet
initialized, we need one extra @sectorsize parameter to grab the
correct sectorsize.

- Introduce a helper validate_sys_chunk_array()
* Validate the disk key.
* Validate the size before we access the full chunk items.
* Do the full chunk item validation.

- Call validate_sys_chunk_array() at btrfs_validate_super()

- Simplify the checks inside btrfs_read_sys_array()
Now the checks will be converted to an ASSERT().

- Simplify the checks inside read_one_chunk()
Now that all chunk items inside system chunk array and chunk tree are
verified, there is no need to verify them again inside read_one_chunk().

This change has the following advantages:

- More comprehensive checks at write time
And unlike the sys_chunk_array read routine, this time we do not need
to allocate a dummy extent buffer to do the check.
All the checks done here require no new memory allocation.

- Slightly improved readability when iterating the system chunk array

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Qu Wenruo and committed by
David Sterba
2a9bb78c 4e4d058e

+139 -104
+67
fs/btrfs/disk-io.c
··· 2327 2327 return ret; 2328 2328 } 2329 2329 2330 + static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info, 2331 + const struct btrfs_super_block *sb) 2332 + { 2333 + unsigned int cur = 0; /* Offset inside the sys chunk array */ 2334 + /* 2335 + * At sb read time, fs_info is not fully initialized. Thus we have 2336 + * to use super block sectorsize, which should have been validated. 2337 + */ 2338 + const u32 sectorsize = btrfs_super_sectorsize(sb); 2339 + u32 sys_array_size = btrfs_super_sys_array_size(sb); 2340 + 2341 + if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { 2342 + btrfs_err(fs_info, "system chunk array too big %u > %u", 2343 + sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); 2344 + return -EUCLEAN; 2345 + } 2346 + 2347 + while (cur < sys_array_size) { 2348 + struct btrfs_disk_key *disk_key; 2349 + struct btrfs_chunk *chunk; 2350 + struct btrfs_key key; 2351 + u64 type; 2352 + u16 num_stripes; 2353 + u32 len; 2354 + int ret; 2355 + 2356 + disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur); 2357 + len = sizeof(*disk_key); 2358 + 2359 + if (cur + len > sys_array_size) 2360 + goto short_read; 2361 + cur += len; 2362 + 2363 + btrfs_disk_key_to_cpu(&key, disk_key); 2364 + if (key.type != BTRFS_CHUNK_ITEM_KEY) { 2365 + btrfs_err(fs_info, 2366 + "unexpected item type %u in sys_array at offset %u", 2367 + key.type, cur); 2368 + return -EUCLEAN; 2369 + } 2370 + chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur); 2371 + num_stripes = btrfs_stack_chunk_num_stripes(chunk); 2372 + if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size) 2373 + goto short_read; 2374 + type = btrfs_stack_chunk_type(chunk); 2375 + if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { 2376 + btrfs_err(fs_info, 2377 + "invalid chunk type %llu in sys_array at offset %u", 2378 + type, cur); 2379 + return -EUCLEAN; 2380 + } 2381 + ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset, 2382 + sectorsize); 2383 + if (ret < 0) 2384 + return ret; 2385 + cur += btrfs_chunk_item_size(num_stripes); 2386 + } 2387 + return 0; 2388 + short_read: 2389 + btrfs_err(fs_info, 2390 + "super block sys chunk array short read, cur=%u sys_array_size=%u", 2391 + cur, sys_array_size); 2392 + return -EUCLEAN; 2393 + } 2394 + 2330 2395 /* 2331 2396 * Real super block validation 2332 2397 * NOTE: super csum type and incompat features will not be checked here. ··· 2559 2494 btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); 2560 2495 ret = -EINVAL; 2561 2496 } 2497 + 2498 + ret = validate_sys_chunk_array(fs_info, sb); 2562 2499 2563 2500 /* 2564 2501 * Obvious sys_chunk_array corruptions, it must hold at least one key
+54 -42
fs/btrfs/tree-checker.c
··· 764 764 return 0; 765 765 } 766 766 767 - __printf(4, 5) 767 + __printf(5, 6) 768 768 __cold 769 - static void chunk_err(const struct extent_buffer *leaf, 769 + static void chunk_err(const struct btrfs_fs_info *fs_info, 770 + const struct extent_buffer *leaf, 770 771 const struct btrfs_chunk *chunk, u64 logical, 771 772 const char *fmt, ...) 772 773 { 773 - const struct btrfs_fs_info *fs_info = leaf->fs_info; 774 - bool is_sb; 774 + bool is_sb = !leaf; 775 775 struct va_format vaf; 776 776 va_list args; 777 777 int i; 778 778 int slot = -1; 779 - 780 - /* Only superblock eb is able to have such small offset */ 781 - is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); 782 779 783 780 if (!is_sb) { 784 781 /* ··· 809 812 /* 810 813 * The common chunk check which could also work on super block sys chunk array. 811 814 * 815 + * If @leaf is NULL, then @chunk must be an on-stack chunk item. 816 + * (For superblock sys_chunk array, and fs_info->sectorsize is unreliable) 817 + * 812 818 * Return -EUCLEAN if anything is corrupted. 813 819 * Return 0 if everything is OK. 814 820 */ 815 - int btrfs_check_chunk_valid(struct extent_buffer *leaf, 816 - struct btrfs_chunk *chunk, u64 logical) 821 + int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info, 822 + const struct extent_buffer *leaf, 823 + const struct btrfs_chunk *chunk, u64 logical, 824 + u32 sectorsize) 817 825 { 818 - struct btrfs_fs_info *fs_info = leaf->fs_info; 819 826 u64 length; 820 827 u64 chunk_end; 821 828 u64 stripe_len; ··· 827 826 u16 sub_stripes; 828 827 u64 type; 829 828 u64 features; 829 + u32 chunk_sector_size; 830 830 bool mixed = false; 831 831 int raid_index; 832 832 int nparity; 833 833 int ncopies; 834 834 835 - length = btrfs_chunk_length(leaf, chunk); 836 - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 837 - num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 838 - sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 839 - type = btrfs_chunk_type(leaf, chunk); 835 + if (leaf) { 836 + length = btrfs_chunk_length(leaf, chunk); 837 + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 838 + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 839 + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 840 + type = btrfs_chunk_type(leaf, chunk); 841 + chunk_sector_size = btrfs_chunk_sector_size(leaf, chunk); 842 + } else { 843 + length = btrfs_stack_chunk_length(chunk); 844 + stripe_len = btrfs_stack_chunk_stripe_len(chunk); 845 + num_stripes = btrfs_stack_chunk_num_stripes(chunk); 846 + sub_stripes = btrfs_stack_chunk_sub_stripes(chunk); 847 + type = btrfs_stack_chunk_type(chunk); 848 + chunk_sector_size = btrfs_stack_chunk_sector_size(chunk); 849 + } 840 850 raid_index = btrfs_bg_flags_to_raid_index(type); 841 851 ncopies = btrfs_raid_array[raid_index].ncopies; 842 852 nparity = btrfs_raid_array[raid_index].nparity; 843 853 844 854 if (unlikely(!num_stripes)) { 845 - chunk_err(leaf, chunk, logical, 855 + chunk_err(fs_info, leaf, chunk, logical, 846 856 "invalid chunk num_stripes, have %u", num_stripes); 847 857 return -EUCLEAN; 848 858 } 849 859 if (unlikely(num_stripes < ncopies)) { 850 - chunk_err(leaf, chunk, logical, 860 + chunk_err(fs_info, leaf, chunk, logical, 851 861 "invalid chunk num_stripes < ncopies, have %u < %d", 852 862 num_stripes, ncopies); 853 863 return -EUCLEAN; 854 864 } 855 865 if (unlikely(nparity && num_stripes == nparity)) { 856 - chunk_err(leaf, chunk, logical, 866 + chunk_err(fs_info, leaf, chunk, logical, 857 867 "invalid chunk num_stripes == nparity, have %u == %d", 858 868 num_stripes, nparity); 859 869 return -EUCLEAN; 860 870 } 861 - if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) { 862 - chunk_err(leaf, chunk, logical, 871 + if (unlikely(!IS_ALIGNED(logical, sectorsize))) { 872 + chunk_err(fs_info, leaf, chunk, logical, 863 873 "invalid chunk logical, have %llu should aligned to %u", 864 - logical, fs_info->sectorsize); 874 + logical, sectorsize); 865 875 return -EUCLEAN; 866 876 } 867 - if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) { 868 - chunk_err(leaf, chunk, logical, 877 + if (unlikely(chunk_sector_size != sectorsize)) { 878 + chunk_err(fs_info, leaf, chunk, logical, 869 879 "invalid chunk sectorsize, have %u expect %u", 870 - btrfs_chunk_sector_size(leaf, chunk), 871 - fs_info->sectorsize); 880 + chunk_sector_size, sectorsize); 872 881 return -EUCLEAN; 873 882 } 874 - if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) { 875 - chunk_err(leaf, chunk, logical, 883 + if (unlikely(!length || !IS_ALIGNED(length, sectorsize))) { 884 + chunk_err(fs_info, leaf, chunk, logical, 876 885 "invalid chunk length, have %llu", length); 877 886 return -EUCLEAN; 878 887 } 879 888 if (unlikely(check_add_overflow(logical, length, &chunk_end))) { 880 - chunk_err(leaf, chunk, logical, 889 + chunk_err(fs_info, leaf, chunk, logical, 881 890 "invalid chunk logical start and length, have logical start %llu length %llu", 882 891 logical, length); 883 892 return -EUCLEAN; 884 893 } 885 894 if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { 886 - chunk_err(leaf, chunk, logical, 895 + chunk_err(fs_info, leaf, chunk, logical, 887 896 "invalid chunk stripe length: %llu", 888 897 stripe_len); 889 898 return -EUCLEAN; ··· 907 896 * Thus it should be a good way to catch obvious bitflips. 908 897 */ 909 898 if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) { 910 - chunk_err(leaf, chunk, logical, 899 + chunk_err(fs_info, leaf, chunk, logical, 911 900 "chunk length too large: have %llu limit %llu", 912 901 length, btrfs_stripe_nr_to_offset(U32_MAX)); 913 902 return -EUCLEAN; 914 903 } 915 904 if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | 916 905 BTRFS_BLOCK_GROUP_PROFILE_MASK))) { 917 - chunk_err(leaf, chunk, logical, 906 + chunk_err(fs_info, leaf, chunk, logical, 918 907 "unrecognized chunk type: 0x%llx", 919 908 ~(BTRFS_BLOCK_GROUP_TYPE_MASK | 920 - BTRFS_BLOCK_GROUP_PROFILE_MASK) & 921 - btrfs_chunk_type(leaf, chunk)); 909 + BTRFS_BLOCK_GROUP_PROFILE_MASK) & type); 922 910 return -EUCLEAN; 923 911 } 924 912 925 913 if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && 926 914 (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) { 927 - chunk_err(leaf, chunk, logical, 915 + chunk_err(fs_info, leaf, chunk, logical, 928 916 "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", 929 917 type & BTRFS_BLOCK_GROUP_PROFILE_MASK); 930 918 return -EUCLEAN; 931 919 } 932 920 if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) { 933 - chunk_err(leaf, chunk, logical, 921 + chunk_err(fs_info, leaf, chunk, logical, 934 922 "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", 935 923 type, BTRFS_BLOCK_GROUP_TYPE_MASK); 936 924 return -EUCLEAN; ··· 938 928 if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) && 939 929 (type & (BTRFS_BLOCK_GROUP_METADATA | 940 930 BTRFS_BLOCK_GROUP_DATA)))) { 941 - chunk_err(leaf, chunk, logical, 931 + chunk_err(fs_info, leaf, chunk, logical, 942 932 "system chunk with data or metadata type: 0x%llx", 943 933 type); 944 934 return -EUCLEAN; ··· 951 941 if (!mixed) { 952 942 if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) && 953 943 (type & BTRFS_BLOCK_GROUP_DATA))) { 954 - chunk_err(leaf, chunk, logical, 944 + chunk_err(fs_info, leaf, chunk, logical, 955 945 "mixed chunk type in non-mixed mode: 0x%llx", type); 956 946 return -EUCLEAN; 957 947 } ··· 973 963 num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) || 974 964 ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && 975 965 num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) { 976 - chunk_err(leaf, chunk, logical, 966 + chunk_err(fs_info, leaf, chunk, logical, 977 967 "invalid num_stripes:sub_stripes %u:%u for profile %llu", 978 968 num_stripes, sub_stripes, 979 969 type & BTRFS_BLOCK_GROUP_PROFILE_MASK); ··· 993 983 struct btrfs_chunk *chunk, 994 984 struct btrfs_key *key, int slot) 995 985 { 986 + struct btrfs_fs_info *fs_info = leaf->fs_info; 996 987 int num_stripes; 997 988 998 989 if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) { 999 - chunk_err(leaf, chunk, key->offset, 990 + chunk_err(fs_info, leaf, chunk, key->offset, 1000 991 "invalid chunk item size: have %u expect [%zu, %u)", 1001 992 btrfs_item_size(leaf, slot), 1002 993 sizeof(struct btrfs_chunk), 1003 - BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); 994 + BTRFS_LEAF_DATA_SIZE(fs_info)); 1004 995 return -EUCLEAN; 1005 996 } 1006 997 ··· 1012 1001 1013 1002 if (unlikely(btrfs_chunk_item_size(num_stripes) != 1014 1003 btrfs_item_size(leaf, slot))) { 1015 - chunk_err(leaf, chunk, key->offset, 1004 + chunk_err(fs_info, leaf, chunk, key->offset, 1016 1005 "invalid chunk item size: have %u expect %lu", 1017 1006 btrfs_item_size(leaf, slot), 1018 1007 btrfs_chunk_item_size(num_stripes)); 1019 1008 return -EUCLEAN; 1020 1009 } 1021 1010 out: 1022 - return btrfs_check_chunk_valid(leaf, chunk, key->offset); 1011 + return btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset, 1012 + fs_info->sectorsize); 1023 1013 } 1024 1014 1025 1015 __printf(3, 4)
+5 -2
fs/btrfs/tree-checker.h
··· 10 10 #include <uapi/linux/btrfs_tree.h> 11 11 12 12 struct extent_buffer; 13 + struct btrfs_fs_info; 13 14 struct btrfs_chunk; 14 15 struct btrfs_key; 15 16 ··· 67 66 int btrfs_check_leaf(struct extent_buffer *leaf); 68 67 int btrfs_check_node(struct extent_buffer *node); 69 68 70 - int btrfs_check_chunk_valid(struct extent_buffer *leaf, 71 - struct btrfs_chunk *chunk, u64 logical); 69 + int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info, 70 + const struct extent_buffer *leaf, 71 + const struct btrfs_chunk *chunk, u64 logical, 72 + u32 sectorsize); 72 73 int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner); 73 74 int btrfs_verify_level_key(struct extent_buffer *eb, 74 75 const struct btrfs_tree_parent_check *check);
+13 -60
fs/btrfs/volumes.c
··· 7004 7004 warn_32bit_meta_chunk(fs_info, logical, length, type); 7005 7005 #endif 7006 7006 7007 - /* 7008 - * Only need to verify chunk item if we're reading from sys chunk array, 7009 - * as chunk item in tree block is already verified by tree-checker. 7010 - */ 7011 - if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { 7012 - ret = btrfs_check_chunk_valid(leaf, chunk, logical); 7013 - if (ret) 7014 - return ret; 7015 - } 7016 - 7017 7007 map = btrfs_find_chunk_map(fs_info, logical, 1); 7018 7008 7019 7009 /* already mapped? */ ··· 7261 7271 { 7262 7272 struct btrfs_super_block *super_copy = fs_info->super_copy; 7263 7273 struct extent_buffer *sb; 7264 - struct btrfs_disk_key *disk_key; 7265 - struct btrfs_chunk *chunk; 7266 7274 u8 *array_ptr; 7267 7275 unsigned long sb_array_offset; 7268 7276 int ret = 0; 7269 - u32 num_stripes; 7270 7277 u32 array_size; 7271 - u32 len = 0; 7272 7278 u32 cur_offset; 7273 - u64 type; 7274 7279 struct btrfs_key key; 7275 7280 7276 7281 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize); ··· 7288 7303 cur_offset = 0; 7289 7304 7290 7305 while (cur_offset < array_size) { 7291 - disk_key = (struct btrfs_disk_key *)array_ptr; 7292 - len = sizeof(*disk_key); 7293 - if (cur_offset + len > array_size) 7294 - goto out_short_read; 7306 + struct btrfs_chunk *chunk; 7307 + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)array_ptr; 7308 + u32 len = sizeof(*disk_key); 7309 + 7310 + /* 7311 + * The sys_chunk_array has been already verified at super block 7312 + * read time. Only do ASSERT()s for basic checks. 7313 + */ 7314 + ASSERT(cur_offset + len <= array_size); 7295 7315 7296 7316 btrfs_disk_key_to_cpu(&key, disk_key); 7297 7317 ··· 7304 7314 sb_array_offset += len; 7305 7315 cur_offset += len; 7306 7316 7307 - if (key.type != BTRFS_CHUNK_ITEM_KEY) { 7308 - btrfs_err(fs_info, 7309 - "unexpected item type %u in sys_array at offset %u", 7310 - (u32)key.type, cur_offset); 7311 - ret = -EIO; 7312 - break; 7313 - } 7317 + ASSERT(key.type == BTRFS_CHUNK_ITEM_KEY); 7314 7318 7315 7319 chunk = (struct btrfs_chunk *)sb_array_offset; 7316 - /* 7317 - * At least one btrfs_chunk with one stripe must be present, 7318 - * exact stripe count check comes afterwards 7319 - */ 7320 - len = btrfs_chunk_item_size(1); 7321 - if (cur_offset + len > array_size) 7322 - goto out_short_read; 7320 + ASSERT(btrfs_chunk_type(sb, chunk) & BTRFS_BLOCK_GROUP_SYSTEM); 7323 7321 7324 - num_stripes = btrfs_chunk_num_stripes(sb, chunk); 7325 - if (!num_stripes) { 7326 - btrfs_err(fs_info, 7327 - "invalid number of stripes %u in sys_array at offset %u", 7328 - num_stripes, cur_offset); 7329 - ret = -EIO; 7330 - break; 7331 - } 7322 + len = btrfs_chunk_item_size(btrfs_chunk_num_stripes(sb, chunk)); 7332 7323 7333 - type = btrfs_chunk_type(sb, chunk); 7334 - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { 7335 - btrfs_err(fs_info, 7336 - "invalid chunk type %llu in sys_array at offset %u", 7337 - type, cur_offset); 7338 - ret = -EIO; 7339 - break; 7340 - } 7341 - 7342 - len = btrfs_chunk_item_size(num_stripes); 7343 - if (cur_offset + len > array_size) 7344 - goto out_short_read; 7324 + ASSERT(cur_offset + len <= array_size); 7345 7325 7346 7326 ret = read_one_chunk(&key, sb, chunk); 7347 7327 if (ret) ··· 7324 7364 clear_extent_buffer_uptodate(sb); 7325 7365 free_extent_buffer_stale(sb); 7326 7366 return ret; 7327 - 7328 - out_short_read: 7329 - btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u", 7330 - len, cur_offset); 7331 - clear_extent_buffer_uptodate(sb); 7332 - free_extent_buffer_stale(sb); 7333 - return -EIO; 7334 7367 } 7335 7368 7336 7369 /*