Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: improve checksum error messages

new helpers:
- bch2_csum_to_text()
- bch2_csum_err_msg()

standardize our checksum error messages a bit, and print out the
checksums a bit more nicely.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+78 -29
+12 -6
fs/bcachefs/btree_io.c
··· 1042 1042 1043 1043 nonce = btree_nonce(i, b->written << 9); 1044 1044 1045 - csum_bad = bch2_crc_cmp(b->data->csum, 1046 - csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data)); 1045 + struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); 1046 + csum_bad = bch2_crc_cmp(b->data->csum, csum); 1047 1047 if (csum_bad) 1048 1048 bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); 1049 1049 ··· 1051 1051 -BCH_ERR_btree_node_read_err_want_retry, 1052 1052 c, ca, b, i, 1053 1053 bset_bad_csum, 1054 - "invalid checksum"); 1054 + "%s", 1055 + (printbuf_reset(&buf), 1056 + bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), 1057 + buf.buf)); 1055 1058 1056 1059 ret = bset_encrypt(c, i, b->written << 9); 1057 1060 if (bch2_fs_fatal_err_on(ret, c, ··· 1083 1080 "unknown checksum type %llu", BSET_CSUM_TYPE(i)); 1084 1081 1085 1082 nonce = btree_nonce(i, b->written << 9); 1086 - csum_bad = bch2_crc_cmp(bne->csum, 1087 - csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne)); 1083 + struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); 1084 + csum_bad = bch2_crc_cmp(bne->csum, csum); 1088 1085 if (csum_bad) 1089 1086 bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); 1090 1087 ··· 1092 1089 -BCH_ERR_btree_node_read_err_want_retry, 1093 1090 c, ca, b, i, 1094 1091 bset_bad_csum, 1095 - "invalid checksum"); 1092 + "%s", 1093 + (printbuf_reset(&buf), 1094 + bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), 1095 + buf.buf)); 1096 1096 1097 1097 ret = bset_encrypt(c, i, b->written << 9); 1098 1098 if (bch2_fs_fatal_err_on(ret, c,
+23
fs/bcachefs/checksum.h
··· 45 45 bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\ 46 46 }) 47 47 48 + static inline void bch2_csum_to_text(struct printbuf *out, 49 + enum bch_csum_type type, 50 + struct bch_csum csum) 51 + { 52 + const u8 *p = (u8 *) &csum; 53 + unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16; 54 + 55 + for (unsigned i = 0; i < bytes; i++) 56 + prt_hex_byte(out, p[i]); 57 + } 58 + 59 + static inline void bch2_csum_err_msg(struct printbuf *out, 60 + enum bch_csum_type type, 61 + struct bch_csum expected, 62 + struct bch_csum got) 63 + { 64 + prt_printf(out, "checksum error: got "); 65 + bch2_csum_to_text(out, type, got); 66 + prt_str(out, " should be "); 67 + bch2_csum_to_text(out, type, expected); 68 + prt_printf(out, " type %s", bch2_csum_types[type]); 69 + } 70 + 48 71 int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); 49 72 int bch2_request_key(struct bch_sb *, struct bch_key *); 50 73 #ifndef __KERNEL__
+8 -3
fs/bcachefs/io_read.c
··· 642 642 goto out; 643 643 } 644 644 645 + struct printbuf buf = PRINTBUF; 646 + buf.atomic++; 647 + prt_str(&buf, "data "); 648 + bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); 649 + 645 650 bch_err_inum_offset_ratelimited(ca, 646 651 rbio->read_pos.inode, 647 652 rbio->read_pos.offset << 9, 648 - "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", 649 - rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, 650 - csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); 653 + "data %s", buf.buf); 654 + printbuf_exit(&buf); 655 + 651 656 bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); 652 657 bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 653 658 goto out;
+31 -14
fs/bcachefs/journal_io.c
··· 27 27 }}; 28 28 } 29 29 30 - static bool jset_csum_good(struct bch_fs *c, struct jset *j) 30 + static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum) 31 31 { 32 - return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) && 33 - !bch2_crc_cmp(j->csum, 34 - csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j)); 32 + if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) { 33 + *csum = (struct bch_csum) {}; 34 + return false; 35 + } 36 + 37 + *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j); 38 + return !bch2_crc_cmp(j->csum, *csum); 35 39 } 36 40 37 41 static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) ··· 938 934 u64 offset = bucket_to_sector(ca, ja->buckets[bucket]), 939 935 end = offset + ca->mi.bucket_size; 940 936 bool saw_bad = false, csum_good; 937 + struct printbuf err = PRINTBUF; 941 938 int ret = 0; 942 939 943 940 pr_debug("reading %u", bucket); ··· 971 966 * found on a different device, and missing or 972 967 * no journal entries will be handled later 973 968 */ 974 - return 0; 969 + goto out; 975 970 } 976 971 977 972 j = buf->data; ··· 988 983 ret = journal_read_buf_realloc(buf, 989 984 vstruct_bytes(j)); 990 985 if (ret) 991 - return ret; 986 + goto err; 992 987 } 993 988 goto reread; 994 989 case JOURNAL_ENTRY_NONE: 995 990 if (!saw_bad) 996 - return 0; 991 + goto out; 997 992 /* 998 993 * On checksum error we don't really trust the size 999 994 * field of the journal entry we read, so try reading ··· 1002 997 sectors = block_sectors(c); 1003 998 goto next_block; 1004 999 default: 1005 - return ret; 1000 + goto err; 1006 1001 } 1007 1002 1008 1003 /* ··· 1012 1007 * bucket: 1013 1008 */ 1014 1009 if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket]) 1015 - return 0; 1010 + goto out; 1016 1011 1017 1012 ja->bucket_seq[bucket] = le64_to_cpu(j->seq); 1018 1013 1019 - csum_good = jset_csum_good(c, j); 1014 + enum bch_csum_type csum_type = JSET_CSUM_TYPE(j); 1015 + struct bch_csum csum; 1016 + csum_good = jset_csum_good(c, j, &csum); 1017 + 1020 1018 if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum, 1021 - "journal checksum error")) 1019 + "%s", 1020 + (printbuf_reset(&err), 1021 + prt_str(&err, "journal "), 1022 + bch2_csum_err_msg(&err, csum_type, j->csum, csum), 1023 + err.buf))) 1022 1024 saw_bad = true; 1023 1025 1024 1026 ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), 1025 1027 j->encrypted_start, 1026 1028 vstruct_end(j) - (void *) j->encrypted_start); 1027 1029 bch2_fs_fatal_err_on(ret, c, 1028 - "error decrypting journal entry: %i", ret); 1030 + "error decrypting journal entry: %s", 1031 + bch2_err_str(ret)); 1029 1032 1030 1033 mutex_lock(&jlist->lock); 1031 1034 ret = journal_entry_add(c, ca, (struct journal_ptr) { ··· 1052 1039 case JOURNAL_ENTRY_ADD_OUT_OF_RANGE: 1053 1040 break; 1054 1041 default: 1055 - return ret; 1042 + goto err; 1056 1043 } 1057 1044 next_block: 1058 1045 pr_debug("next"); ··· 1061 1048 j = ((void *) j) + (sectors << 9); 1062 1049 } 1063 1050 1064 - return 0; 1051 + out: 1052 + ret = 0; 1053 + err: 1054 + printbuf_exit(&err); 1055 + return ret; 1065 1056 } 1066 1057 1067 1058 static CLOSURE_CALLBACK(bch2_journal_read_device)
+4 -6
fs/bcachefs/super-io.c
··· 612 612 613 613 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) 614 614 { 615 - struct bch_csum csum; 616 615 size_t bytes; 617 616 int ret; 618 617 reread: ··· 652 653 goto reread; 653 654 } 654 655 655 - if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) { 656 + enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); 657 + if (csum_type >= BCH_CSUM_NR) { 656 658 prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); 657 659 return -BCH_ERR_invalid_sb_csum_type; 658 660 } 659 661 660 662 /* XXX: verify MACs */ 661 - csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb), 662 - null_nonce(), sb->sb); 663 - 663 + struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb); 664 664 if (bch2_crc_cmp(csum, sb->sb->csum)) { 665 - prt_printf(err, "bad checksum"); 665 + bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum); 666 666 return -BCH_ERR_invalid_sb_csum; 667 667 } 668 668