bcachefs: improve checksum error messages

new helpers:
 - bch2_csum_to_text()
 - bch2_csum_err_msg()

standardize our checksum error messages a bit, and print out the
checksums a bit more nicely.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-01-05 11:59:03 -05:00
parent 2d02bfb01b
commit 4819b66e29
5 changed files with 78 additions and 29 deletions

View File

@ -1042,8 +1042,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
nonce = btree_nonce(i, b->written << 9);
csum_bad = bch2_crc_cmp(b->data->csum,
csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
csum_bad = bch2_crc_cmp(b->data->csum, csum);
if (csum_bad)
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
@ -1051,7 +1051,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
-BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i,
bset_bad_csum,
"invalid checksum");
"%s",
(printbuf_reset(&buf),
bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
buf.buf));
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
@ -1080,8 +1083,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
nonce = btree_nonce(i, b->written << 9);
csum_bad = bch2_crc_cmp(bne->csum,
csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
csum_bad = bch2_crc_cmp(bne->csum, csum);
if (csum_bad)
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
@ -1089,7 +1092,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
-BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i,
bset_bad_csum,
"invalid checksum");
"%s",
(printbuf_reset(&buf),
bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
buf.buf));
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,

View File

@ -45,6 +45,29 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
})
static inline void bch2_csum_to_text(struct printbuf *out,
enum bch_csum_type type,
struct bch_csum csum)
{
const u8 *p = (u8 *) &csum;
unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16;
for (unsigned i = 0; i < bytes; i++)
prt_hex_byte(out, p[i]);
}
static inline void bch2_csum_err_msg(struct printbuf *out,
enum bch_csum_type type,
struct bch_csum expected,
struct bch_csum got)
{
prt_printf(out, "checksum error: got ");
bch2_csum_to_text(out, type, got);
prt_str(out, " should be ");
bch2_csum_to_text(out, type, expected);
prt_printf(out, " type %s", bch2_csum_types[type]);
}
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
int bch2_request_key(struct bch_sb *, struct bch_key *);
#ifndef __KERNEL__

View File

@ -642,12 +642,17 @@ static void __bch2_read_endio(struct work_struct *work)
goto out;
}
struct printbuf buf = PRINTBUF;
buf.atomic++;
prt_str(&buf, "data ");
bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
bch_err_inum_offset_ratelimited(ca,
rbio->read_pos.inode,
rbio->read_pos.offset << 9,
"data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
"data %s", buf.buf);
printbuf_exit(&buf);
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
goto out;

View File

@ -27,11 +27,15 @@ static struct nonce journal_nonce(const struct jset *jset)
}};
}
static bool jset_csum_good(struct bch_fs *c, struct jset *j)
static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum)
{
return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) &&
!bch2_crc_cmp(j->csum,
csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j));
if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) {
*csum = (struct bch_csum) {};
return false;
}
*csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
return !bch2_crc_cmp(j->csum, *csum);
}
static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
@ -934,6 +938,7 @@ static int journal_read_bucket(struct bch_dev *ca,
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
end = offset + ca->mi.bucket_size;
bool saw_bad = false, csum_good;
struct printbuf err = PRINTBUF;
int ret = 0;
pr_debug("reading %u", bucket);
@ -966,7 +971,7 @@ static int journal_read_bucket(struct bch_dev *ca,
* found on a different device, and missing or
* no journal entries will be handled later
*/
return 0;
goto out;
}
j = buf->data;
@ -983,12 +988,12 @@ static int journal_read_bucket(struct bch_dev *ca,
ret = journal_read_buf_realloc(buf,
vstruct_bytes(j));
if (ret)
return ret;
goto err;
}
goto reread;
case JOURNAL_ENTRY_NONE:
if (!saw_bad)
return 0;
goto out;
/*
* On checksum error we don't really trust the size
* field of the journal entry we read, so try reading
@ -997,7 +1002,7 @@ static int journal_read_bucket(struct bch_dev *ca,
sectors = block_sectors(c);
goto next_block;
default:
return ret;
goto err;
}
/*
@ -1007,20 +1012,28 @@ static int journal_read_bucket(struct bch_dev *ca,
* bucket:
*/
if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
return 0;
goto out;
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
csum_good = jset_csum_good(c, j);
enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
struct bch_csum csum;
csum_good = jset_csum_good(c, j, &csum);
if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
"journal checksum error"))
"%s",
(printbuf_reset(&err),
prt_str(&err, "journal "),
bch2_csum_err_msg(&err, csum_type, j->csum, csum),
err.buf)))
saw_bad = true;
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
j->encrypted_start,
vstruct_end(j) - (void *) j->encrypted_start);
bch2_fs_fatal_err_on(ret, c,
"error decrypting journal entry: %i", ret);
"error decrypting journal entry: %s",
bch2_err_str(ret));
mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, (struct journal_ptr) {
@ -1039,7 +1052,7 @@ static int journal_read_bucket(struct bch_dev *ca,
case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
break;
default:
return ret;
goto err;
}
next_block:
pr_debug("next");
@ -1048,7 +1061,11 @@ static int journal_read_bucket(struct bch_dev *ca,
j = ((void *) j) + (sectors << 9);
}
return 0;
out:
ret = 0;
err:
printbuf_exit(&err);
return ret;
}
static CLOSURE_CALLBACK(bch2_journal_read_device)

View File

@ -612,7 +612,6 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
{
struct bch_csum csum;
size_t bytes;
int ret;
reread:
@ -653,17 +652,16 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf
goto reread;
}
if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
if (csum_type >= BCH_CSUM_NR) {
prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
return -BCH_ERR_invalid_sb_csum_type;
}
/* XXX: verify MACs */
csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
null_nonce(), sb->sb);
struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
if (bch2_crc_cmp(csum, sb->sb->csum)) {
prt_printf(err, "bad checksum");
bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
return -BCH_ERR_invalid_sb_csum;
}