Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: change on-disk layout to support extended metadata checksumming

Define flags and change structure definitions to allow checksumming of
ext4 metadata.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

authored by

Darrick J. Wong and committed by
Theodore Ts'o
e6153918 f8489128

+63 -9
+39 -8
fs/ext4/ext4.h
··· 298 298 __le16 bg_free_inodes_count_lo;/* Free inodes count */ 299 299 __le16 bg_used_dirs_count_lo; /* Directories count */ 300 300 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ 301 - __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ 301 + __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ 302 + __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ 303 + __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ 302 304 __le16 bg_itable_unused_lo; /* Unused inodes count */ 303 305 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ 304 306 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ ··· 310 308 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ 311 309 __le16 bg_used_dirs_count_hi; /* Directories count MSB */ 312 310 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ 313 - __u32 bg_reserved2[3]; 311 + __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ 312 + __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ 313 + __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ 314 + __u32 bg_reserved; 314 315 }; 315 316 316 317 /* ··· 655 650 __le16 l_i_file_acl_high; 656 651 __le16 l_i_uid_high; /* these 2 fields */ 657 652 __le16 l_i_gid_high; /* were reserved2[0] */ 658 - __u32 l_i_reserved2; 653 + __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ 654 + __le16 l_i_reserved; 659 655 } linux2; 660 656 struct { 661 657 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ ··· 672 666 } masix2; 673 667 } osd2; /* OS dependent 2 */ 674 668 __le16 i_extra_isize; 675 - __le16 i_pad1; 669 + __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ 676 670 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ 677 671 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ 678 672 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ ··· 774 768 #define i_gid_low i_gid 775 769 #define i_uid_high osd2.linux2.l_i_uid_high 776 770 #define i_gid_high osd2.linux2.l_i_gid_high 777 - #define i_reserved2 osd2.linux2.l_i_reserved2 771 + #define i_checksum_lo osd2.linux2.l_i_checksum_lo 778 772 779 773 #elif defined(__GNU__) 780 774 ··· 1007 1001 #define EXT4_ERRORS_PANIC 3 /* Panic */ 1008 1002 #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE 1009 1003 1004 + /* Metadata checksum algorithm codes */ 1005 + #define EXT4_CRC32C_CHKSUM 1 1006 + 1010 1007 /* 1011 1008 * Structure of the super block 1012 1009 */ ··· 1096 1087 __le64 s_mmp_block; /* Block for multi-mount protection */ 1097 1088 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1098 1089 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1099 - __u8 s_reserved_char_pad; 1090 + __u8 s_checksum_type; /* metadata checksum algorithm used */ 1100 1091 __le16 s_reserved_pad; 1101 1092 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ 1102 1093 __le32 s_snapshot_inum; /* Inode number of active snapshot */ ··· 1122 1113 __le32 s_usr_quota_inum; /* inode for tracking user quota */ 1123 1114 __le32 s_grp_quota_inum; /* inode for tracking group quota */ 1124 1115 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ 1125 - __le32 s_reserved[109]; /* Padding to the end of the block */ 1116 + __le32 s_reserved[108]; /* Padding to the end of the block */ 1117 + __le32 s_checksum; /* crc32c(superblock) */ 1126 1118 }; 1127 1119 1128 1120 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) ··· 1424 1414 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1425 1415 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1426 1416 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1417 + /* 1418 + * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When 1419 + * METADATA_CSUM is set, group descriptor checksums use the same algorithm as 1420 + * all other data structures' checksums. However, the METADATA_CSUM and 1421 + * GDT_CSUM bits are mutually exclusive. 1422 + */ 1427 1423 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 1428 1424 1429 1425 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 ··· 1543 1527 }; 1544 1528 1545 1529 /* 1530 + * This is a bogus directory entry at the end of each leaf block that 1531 + * records checksums. 1532 + */ 1533 + struct ext4_dir_entry_tail { 1534 + __le32 det_reserved_zero1; /* Pretend to be unused */ 1535 + __le16 det_rec_len; /* 12 */ 1536 + __u8 det_reserved_zero2; /* Zero name length */ 1537 + __u8 det_reserved_ft; /* 0xDE, fake file type */ 1538 + __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ 1539 + }; 1540 + 1541 + /* 1546 1542 * Ext4 directory file types. Only the low 3 bits are used. The 1547 1543 * other bits are reserved for now. 1548 1544 */ ··· 1568 1540 #define EXT4_FT_SYMLINK 7 1569 1541 1570 1542 #define EXT4_FT_MAX 8 1543 + 1544 + #define EXT4_FT_DIR_CSUM 0xDE 1571 1545 1572 1546 /* 1573 1547 * EXT4_DIR_PAD defines the directory entries boundaries ··· 1771 1741 __le16 mmp_check_interval; 1772 1742 1773 1743 __le16 mmp_pad1; 1774 - __le32 mmp_pad2[227]; 1744 + __le32 mmp_pad2[226]; 1745 + __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ 1775 1746 }; 1776 1747 1777 1748 /* arguments passed to the mmp thread */
+13
fs/ext4/ext4_extents.h
··· 63 63 * ext4_inode has i_block array (60 bytes total). 64 64 * The first 12 bytes store ext4_extent_header; 65 65 * the remainder stores an array of ext4_extent. 66 + * For non-inode extent blocks, ext4_extent_tail 67 + * follows the array. 66 68 */ 69 + 70 + /* 71 + * This is the extent tail on-disk structure. 72 + * All other extent structures are 12 bytes long. It turns out that 73 + * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which 74 + * covers all valid ext4 block sizes. Therefore, this tail structure can be 75 + * crammed into the end of the block without having to rebalance the tree. 76 + */ 77 + struct ext4_extent_tail { 78 + __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ 79 + }; 67 80 68 81 /* 69 82 * This is the extent on-disk structure.
+8
fs/ext4/namei.c
··· 145 145 u16 size; 146 146 }; 147 147 148 + /* 149 + * This goes at the end of each htree block. 150 + */ 151 + struct dx_tail { 152 + u32 dt_reserved; 153 + __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ 154 + }; 155 + 148 156 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); 149 157 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); 150 158 static inline unsigned dx_get_hash(struct dx_entry *entry);
+3 -1
fs/ext4/xattr.h
··· 27 27 __le32 h_refcount; /* reference count */ 28 28 __le32 h_blocks; /* number of disk blocks used */ 29 29 __le32 h_hash; /* hash value of all attributes */ 30 - __u32 h_reserved[4]; /* zero right now */ 30 + __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ 31 + /* id = inum if refcount=1, blknum otherwise */ 32 + __u32 h_reserved[3]; /* zero right now */ 31 33 }; 32 34 33 35 struct ext4_xattr_ibody_header {