Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

jbd2: fix descriptor block size handling errors with journal_csum

It turns out that there are some serious problems with the on-disk
format of journal checksum v2. The foremost is that the function to
calculate descriptor tag size returns sizes that are too big. This
causes alignment issues on some architectures and is compounded by the
fact that some parts of jbd2 use the structure size (incorrectly) to
determine the presence of a 64bit journal instead of checking the
feature flags.

Therefore, introduce journal checksum v3, which enlarges the
descriptor block tag format to allow for full 32-bit checksums of
journal blocks, fix the journal tag function to return the correct
sizes, and fix the jbd2 recovery code to use feature flags to
determine 64bitness.

Add a few function helpers so we don't have to open-code quite so
many pieces.

Switching to a 16-byte block size was found to increase journal size
overhead by a maximum of 0.1%, to convert a 32-bit journal with no
checksumming to a 32-bit journal with checksum v3 enabled.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reported-by: TR Reardon <thomas_reardon@hotmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org

authored by

Darrick J. Wong and committed by
Theodore Ts'o
db9ee220 022eaa75

+95 -49
+3 -2
fs/ext4/super.c
··· 3181 3181 3182 3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3183 3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3184 - /* journal checksum v2 */ 3184 + /* journal checksum v3 */ 3185 3185 compat = 0; 3186 - incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; 3186 + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; 3187 3187 } else { 3188 3188 /* journal checksum v1 */ 3189 3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM; ··· 3205 3205 jbd2_journal_clear_features(sbi->s_journal, 3206 3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3207 3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3208 + JBD2_FEATURE_INCOMPAT_CSUM_V3 | 3208 3209 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3209 3210 } 3210 3211
+12 -9
fs/jbd2/commit.c
··· 97 97 struct commit_header *h; 98 98 __u32 csum; 99 99 100 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 100 + if (!jbd2_journal_has_csum_v2or3(j)) 101 101 return; 102 102 103 103 h = (struct commit_header *)(bh->b_data); ··· 313 313 return checksum; 314 314 } 315 315 316 - static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 316 + static void write_tag_block(journal_t *j, journal_block_tag_t *tag, 317 317 unsigned long long block) 318 318 { 319 319 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 320 - if (tag_bytes > JBD2_TAG_SIZE32) 320 + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT)) 321 321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 322 322 } 323 323 ··· 327 327 struct jbd2_journal_block_tail *tail; 328 328 __u32 csum; 329 329 330 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 330 + if (!jbd2_journal_has_csum_v2or3(j)) 331 331 return; 332 332 333 333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - ··· 340 340 static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, 341 341 struct buffer_head *bh, __u32 sequence) 342 342 { 343 + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; 343 344 struct page *page = bh->b_page; 344 345 __u8 *addr; 345 346 __u32 csum32; 346 347 __be32 seq; 347 348 348 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 349 + if (!jbd2_journal_has_csum_v2or3(j)) 349 350 return; 350 351 351 352 seq = cpu_to_be32(sequence); ··· 356 355 bh->b_size); 357 356 kunmap_atomic(addr); 358 357 359 - /* We only have space to store the lower 16 bits of the crc32c. */ 360 - tag->t_checksum = cpu_to_be16(csum32); 358 + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) 359 + tag3->t_checksum = cpu_to_be32(csum32); 360 + else 361 + tag->t_checksum = cpu_to_be16(csum32); 361 362 } 362 363 /* 363 364 * jbd2_journal_commit_transaction ··· 399 396 LIST_HEAD(io_bufs); 400 397 LIST_HEAD(log_bufs); 401 398 402 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 399 + if (jbd2_journal_has_csum_v2or3(journal)) 403 400 csum_size = sizeof(struct jbd2_journal_block_tail); 404 401 405 402 /* ··· 693 690 tag_flag |= JBD2_FLAG_SAME_UUID; 694 691 695 692 tag = (journal_block_tag_t *) tagp; 696 - write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 693 + write_tag_block(journal, tag, jh2bh(jh)->b_blocknr); 697 694 tag->t_flags = cpu_to_be16(tag_flag); 698 695 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], 699 696 commit_transaction->t_tid);
+37 -19
fs/jbd2/journal.c
··· 124 124 /* Checksumming functions */ 125 125 static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 126 126 { 127 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 127 + if (!jbd2_journal_has_csum_v2or3(j)) 128 128 return 1; 129 129 130 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; ··· 145 145 146 146 static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 147 147 { 148 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 148 + if (!jbd2_journal_has_csum_v2or3(j)) 149 149 return 1; 150 150 151 151 return sb->s_checksum == jbd2_superblock_csum(j, sb); ··· 153 153 154 154 static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 155 155 { 156 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 156 + if (!jbd2_journal_has_csum_v2or3(j)) 157 157 return; 158 158 159 159 sb->s_checksum = jbd2_superblock_csum(j, sb); ··· 1522 1522 goto out; 1523 1523 } 1524 1524 1525 - if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1526 - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1525 + if (jbd2_journal_has_csum_v2or3(journal) && 1526 + JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { 1527 1527 /* Can't have checksum v1 and v2 on at the same time! */ 1528 1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " 1529 + "at the same time!\n"); 1530 + goto out; 1531 + } 1532 + 1533 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && 1534 + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 1535 + /* Can't have checksum v2 and v3 at the same time! */ 1536 + printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " 1529 1537 "at the same time!\n"); 1530 1538 goto out; 1531 1539 } ··· 1544 1536 } 1545 1537 1546 1538 /* Load the checksum driver */ 1547 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1539 + if (jbd2_journal_has_csum_v2or3(journal)) { 1548 1540 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1549 1541 if (IS_ERR(journal->j_chksum_driver)) { 1550 1542 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); ··· 1561 1553 } 1562 1554 1563 1555 /* Precompute checksum seed for all metadata */ 1564 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1556 + if (jbd2_journal_has_csum_v2or3(journal)) 1565 1557 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1566 1558 sizeof(sb->s_uuid)); 1567 1559 ··· 1821 1813 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1822 1814 return 0; 1823 1815 1824 - /* Asking for checksumming v2 and v1? Only give them v2. */ 1825 - if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && 1816 + /* If enabling v2 checksums, turn on v3 instead */ 1817 + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { 1818 + incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; 1819 + incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; 1820 + } 1821 + 1822 + /* Asking for checksumming v3 and v1? Only give them v3. */ 1823 + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && 1826 1824 compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1827 1825 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1828 1826 ··· 1837 1823 1838 1824 sb = journal->j_superblock; 1839 1825 1840 - /* If enabling v2 checksums, update superblock */ 1841 - if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1826 + /* If enabling v3 checksums, update superblock */ 1827 + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 1842 1828 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1843 1829 sb->s_feature_compat &= 1844 1830 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); ··· 1856 1842 } 1857 1843 1858 1844 /* Precompute checksum seed for all metadata */ 1859 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, 1860 - JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1845 + if (jbd2_journal_has_csum_v2or3(journal)) 1861 1846 journal->j_csum_seed = jbd2_chksum(journal, ~0, 1862 1847 sb->s_uuid, 1863 1848 sizeof(sb->s_uuid)); ··· 1865 1852 /* If enabling v1 checksums, downgrade superblock */ 1866 1853 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1867 1854 sb->s_feature_incompat &= 1868 - ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); 1855 + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | 1856 + JBD2_FEATURE_INCOMPAT_CSUM_V3); 1869 1857 1870 1858 sb->s_feature_compat |= cpu_to_be32(compat); 1871 1859 sb->s_feature_ro_compat |= cpu_to_be32(ro); ··· 2179 2165 */ 2180 2166 size_t journal_tag_bytes(journal_t *journal) 2181 2167 { 2182 - journal_block_tag_t tag; 2183 - size_t x = 0; 2168 + size_t sz; 2169 + 2170 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) 2171 + return sizeof(journal_block_tag3_t); 2172 + 2173 + sz = sizeof(journal_block_tag_t); 2184 2174 2185 2175 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 2186 - x += sizeof(tag.t_checksum); 2176 + sz += sizeof(__u16); 2187 2177 2188 2178 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 2189 - return x + JBD2_TAG_SIZE64; 2179 + return sz; 2190 2180 else 2191 - return x + JBD2_TAG_SIZE32; 2181 + return sz - sizeof(__u32); 2192 2182 } 2193 2183 2194 2184 /*
+15 -11
fs/jbd2/recovery.c
··· 181 181 __be32 provided; 182 182 __u32 calculated; 183 183 184 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 + if (!jbd2_journal_has_csum_v2or3(j)) 185 185 return 1; 186 186 187 187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - ··· 205 205 int nr = 0, size = journal->j_blocksize; 206 206 int tag_bytes = journal_tag_bytes(journal); 207 207 208 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 208 + if (jbd2_journal_has_csum_v2or3(journal)) 209 209 size -= sizeof(struct jbd2_journal_block_tail); 210 210 211 211 tagp = &bh->b_data[sizeof(journal_header_t)]; ··· 338 338 return err; 339 339 } 340 340 341 - static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) 341 + static inline unsigned long long read_tag_block(journal_t *journal, 342 + journal_block_tag_t *tag) 342 343 { 343 344 unsigned long long block = be32_to_cpu(tag->t_blocknr); 344 - if (tag_bytes > JBD2_TAG_SIZE32) 345 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 345 346 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 346 347 return block; 347 348 } ··· 385 384 __be32 provided; 386 385 __u32 calculated; 387 386 388 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 387 + if (!jbd2_journal_has_csum_v2or3(j)) 389 388 return 1; 390 389 391 390 h = buf; ··· 400 399 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 401 400 void *buf, __u32 sequence) 402 401 { 402 + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; 403 403 __u32 csum32; 404 404 __be32 seq; 405 405 406 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 406 + if (!jbd2_journal_has_csum_v2or3(j)) 407 407 return 1; 408 408 409 409 seq = cpu_to_be32(sequence); 410 410 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 411 411 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 412 412 413 - return tag->t_checksum == cpu_to_be16(csum32); 413 + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) 414 + return tag3->t_checksum == cpu_to_be32(csum32); 415 + else 416 + return tag->t_checksum == cpu_to_be16(csum32); 414 417 } 415 418 416 419 static int do_one_pass(journal_t *journal, ··· 518 513 switch(blocktype) { 519 514 case JBD2_DESCRIPTOR_BLOCK: 520 515 /* Verify checksum first */ 521 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, 522 - JBD2_FEATURE_INCOMPAT_CSUM_V2)) 516 + if (jbd2_journal_has_csum_v2or3(journal)) 523 517 descr_csum_size = 524 518 sizeof(struct jbd2_journal_block_tail); 525 519 if (descr_csum_size > 0 && ··· 579 575 unsigned long long blocknr; 580 576 581 577 J_ASSERT(obh != NULL); 582 - blocknr = read_tag_block(tag_bytes, 578 + blocknr = read_tag_block(journal, 583 579 tag); 584 580 585 581 /* If the block has been ··· 818 814 __be32 provided; 819 815 __u32 calculated; 820 816 821 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 817 + if (!jbd2_journal_has_csum_v2or3(j)) 822 818 return 1; 823 819 824 820 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
+3 -3
fs/jbd2/revoke.c
··· 91 91 #include <linux/list.h> 92 92 #include <linux/init.h> 93 93 #include <linux/bio.h> 94 - #endif 95 94 #include <linux/log2.h> 95 + #endif 96 96 97 97 static struct kmem_cache *jbd2_revoke_record_cache; 98 98 static struct kmem_cache *jbd2_revoke_table_cache; ··· 597 597 offset = *offsetp; 598 598 599 599 /* Do we need to leave space at the end for a checksum? */ 600 - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 600 + if (jbd2_journal_has_csum_v2or3(journal)) 601 601 csum_size = sizeof(struct jbd2_journal_revoke_tail); 602 602 603 603 /* Make sure we have a descriptor with space left for the record */ ··· 644 644 struct jbd2_journal_revoke_tail *tail; 645 645 __u32 csum; 646 646 647 - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 647 + if (!jbd2_journal_has_csum_v2or3(j)) 648 648 return; 649 649 650 650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
+25 -5
include/linux/jbd2.h
··· 159 159 * journal_block_tag (in the descriptor). The other h_chksum* fields are 160 160 * not used. 161 161 * 162 - * Checksum v1 and v2 are mutually exclusive features. 162 + * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses 163 + * journal_block_tag3_t to store a full 32-bit checksum. Everything else 164 + * is the same as v2. 165 + * 166 + * Checksum v1, v2, and v3 are mutually exclusive features. 163 167 */ 164 168 struct commit_header { 165 169 __be32 h_magic; ··· 183 179 * raw struct shouldn't be used for pointer math or sizeof() - use 184 180 * journal_tag_bytes(journal) instead to compute this. 185 181 */ 182 + typedef struct journal_block_tag3_s 183 + { 184 + __be32 t_blocknr; /* The on-disk block number */ 185 + __be32 t_flags; /* See below */ 186 + __be32 t_blocknr_high; /* most-significant high 32bits. */ 187 + __be32 t_checksum; /* crc32c(uuid+seq+block) */ 188 + } journal_block_tag3_t; 189 + 186 190 typedef struct journal_block_tag_s 187 191 { 188 192 __be32 t_blocknr; /* The on-disk block number */ ··· 198 186 __be16 t_flags; /* See below */ 199 187 __be32 t_blocknr_high; /* most-significant high 32bits. */ 200 188 } journal_block_tag_t; 201 - 202 - #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) 203 - #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) 204 189 205 190 /* Tail of descriptor block, for checksumming */ 206 191 struct jbd2_journal_block_tail { ··· 293 284 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 294 285 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 295 286 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 287 + #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 296 288 297 289 /* Features known to this kernel version: */ 298 290 #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM ··· 301 291 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ 302 292 JBD2_FEATURE_INCOMPAT_64BIT | \ 303 293 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ 304 - JBD2_FEATURE_INCOMPAT_CSUM_V2) 294 + JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ 295 + JBD2_FEATURE_INCOMPAT_CSUM_V3) 305 296 306 297 #ifdef __KERNEL__ 307 298 ··· 1306 1295 1307 1296 extern int jbd2_journal_blocks_per_page(struct inode *inode); 1308 1297 extern size_t journal_tag_bytes(journal_t *journal); 1298 + 1299 + static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) 1300 + { 1301 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) || 1302 + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) 1303 + return 1; 1304 + 1305 + return 0; 1306 + } 1309 1307 1310 1308 /* 1311 1309 * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for