bcachefs: Fsck for need_discard & freespace btrees

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-02-17 03:11:39 -05:00 committed by Kent Overstreet
parent caece7fe3f
commit 5add07d56a
5 changed files with 442 additions and 1 deletions

View File

@ -580,6 +580,332 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
return 0; return 0;
} }
static int bch2_check_alloc_key(struct btree_trans *trans,
struct btree_iter *alloc_iter)
{
struct bch_fs *c = trans->c;
struct btree_iter discard_iter, freespace_iter;
struct bch_alloc_v4 a;
unsigned discard_key_type, freespace_key_type;
struct bkey_s_c alloc_k, k;
struct printbuf buf = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
int ret;
alloc_k = bch2_btree_iter_peek(alloc_iter);
if (!alloc_k.k)
return 0;
ret = bkey_err(alloc_k);
if (ret)
return ret;
bch2_alloc_to_v4(alloc_k, &a);
discard_key_type = bucket_state(a) == BUCKET_need_discard
? KEY_TYPE_set : 0;
freespace_key_type = bucket_state(a) == BUCKET_free
? KEY_TYPE_set : 0;
bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard,
alloc_k.k->p, 0);
bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace,
alloc_freespace_pos(alloc_k.k->p, a), 0);
k = bch2_btree_iter_peek_slot(&discard_iter);
ret = bkey_err(k);
if (ret)
goto err;
if (fsck_err_on(k.k->type != discard_key_type, c,
"incorrect key in need_discard btree (got %s should be %s)\n"
" %s",
bch2_bkey_types[k.k->type],
bch2_bkey_types[discard_key_type],
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
struct bkey_i *update =
bch2_trans_kmalloc(trans, sizeof(*update));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
bkey_init(&update->k);
update->k.type = discard_key_type;
update->k.p = discard_iter.pos;
ret = bch2_trans_update(trans, &discard_iter, update, 0);
if (ret)
goto err;
}
k = bch2_btree_iter_peek_slot(&freespace_iter);
ret = bkey_err(k);
if (ret)
goto err;
if (fsck_err_on(k.k->type != freespace_key_type, c,
"incorrect key in freespace btree (got %s should be %s)\n"
" %s",
bch2_bkey_types[k.k->type],
bch2_bkey_types[freespace_key_type],
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
struct bkey_i *update =
bch2_trans_kmalloc(trans, sizeof(*update));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
bkey_init(&update->k);
update->k.type = freespace_key_type;
update->k.p = freespace_iter.pos;
bch2_key_resize(&update->k, 1);
ret = bch2_trans_update(trans, &freespace_iter, update, 0);
if (ret)
goto err;
}
err:
fsck_err:
bch2_trans_iter_exit(trans, &freespace_iter);
bch2_trans_iter_exit(trans, &discard_iter);
printbuf_exit(&buf2);
printbuf_exit(&buf);
return ret;
}
static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
{
struct bch_dev *ca;
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
return false;
ca = bch_dev_bkey_exists(c, pos.inode);
return pos.offset >= ca->mi.first_bucket &&
pos.offset < ca->mi.nbuckets;
}
static int bch2_check_freespace_key(struct btree_trans *trans,
struct btree_iter *freespace_iter,
bool initial)
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter;
struct bkey_s_c k, freespace_k;
struct bch_alloc_v4 a;
u64 genbits;
struct bpos pos;
struct bkey_i *update;
struct printbuf buf = PRINTBUF;
int ret;
freespace_k = bch2_btree_iter_peek(freespace_iter);
if (!freespace_k.k)
return 1;
ret = bkey_err(freespace_k);
if (ret)
return ret;
pos = freespace_iter->pos;
pos.offset &= ~(~0ULL << 56);
genbits = freespace_iter->pos.offset & (~0ULL << 56);
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, pos, 0);
if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
"%llu:%llu set in freespace btree but device or bucket does not exist",
pos.inode, pos.offset))
goto delete;
k = bch2_btree_iter_peek_slot(&alloc_iter);
ret = bkey_err(k);
if (ret)
goto err;
bch2_alloc_to_v4(k, &a);
if (fsck_err_on(bucket_state(a) != BUCKET_free ||
genbits != alloc_freespace_genbits(a), c,
"%s\n incorrectly set in freespace index (free %u, genbits %llu should be %llu)",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
bucket_state(a) == BUCKET_free,
genbits >> 56, alloc_freespace_genbits(a) >> 56))
goto delete;
out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &alloc_iter);
printbuf_exit(&buf);
return ret;
delete:
update = bch2_trans_kmalloc(trans, sizeof(*update));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
bkey_init(&update->k);
update->k.p = freespace_iter->pos;
bch2_key_resize(&update->k, 1);
ret = bch2_trans_update(trans, freespace_iter, update, 0) ?:
bch2_trans_commit(trans, NULL, NULL, 0);
goto out;
}
int bch2_check_alloc_info(struct bch_fs *c, bool initial)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0, last_dev = -1;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (k.k->p.inode != last_dev) {
struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode);
if (!ca->mi.freespace_initialized) {
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
continue;
}
last_dev = k.k->p.inode;
}
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_check_alloc_key(&trans, &iter));
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
if (ret)
goto err;
bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN,
BTREE_ITER_PREFETCH);
while (1) {
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_check_freespace_key(&trans, &iter, initial));
if (ret)
break;
bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
}
bch2_trans_iter_exit(&trans, &iter);
err:
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
struct btree_iter *alloc_iter)
{
struct bch_fs *c = trans->c;
struct btree_iter lru_iter;
struct bch_alloc_v4 a;
struct bkey_s_c alloc_k, k;
struct printbuf buf = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
int ret;
alloc_k = bch2_btree_iter_peek(alloc_iter);
if (!alloc_k.k)
return 0;
ret = bkey_err(alloc_k);
if (ret)
return ret;
bch2_alloc_to_v4(alloc_k, &a);
if (bucket_state(a) != BUCKET_cached)
return 0;
bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
POS(alloc_k.k->p.inode, a.io_time[READ]), 0);
k = bch2_btree_iter_peek_slot(&lru_iter);
ret = bkey_err(k);
if (ret)
goto err;
if (fsck_err_on(!a.io_time[READ], c,
"cached bucket with read_time 0\n"
" %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
fsck_err_on(k.k->type != KEY_TYPE_lru ||
le64_to_cpu(bkey_s_c_to_lru(k).v->idx) != alloc_k.k->p.offset, c,
"incorrect/missing lru entry\n"
" %s\n"
" %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
u64 read_time = a.io_time[READ];
if (!a.io_time[READ])
a.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
ret = bch2_lru_change(trans,
alloc_k.k->p.inode,
alloc_k.k->p.offset,
0, &a.io_time[READ]);
if (ret)
goto err;
if (a.io_time[READ] != read_time) {
struct bkey_i_alloc_v4 *a_mut =
bch2_alloc_to_v4_mut(trans, alloc_k);
ret = PTR_ERR_OR_ZERO(a_mut);
if (ret)
goto err;
a_mut->v.io_time[READ] = a.io_time[READ];
ret = bch2_trans_update(trans, alloc_iter,
&a_mut->k_i, BTREE_TRIGGER_NORUN);
if (ret)
goto err;
}
}
err:
fsck_err:
bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf2);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_check_alloc_to_lru_ref(&trans, &iter));
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
}
static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
struct bch_dev *ca, bool *discard_done) struct bch_dev *ca, bool *discard_done)
{ {

View File

@ -113,6 +113,8 @@ int bch2_alloc_read(struct bch_fs *, bool, bool);
int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c, int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
struct bkey_i *, unsigned); struct bkey_i *, unsigned);
int bch2_check_alloc_info(struct bch_fs *, bool);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_do_discards(struct bch_fs *); void bch2_do_discards(struct bch_fs *);
static inline bool should_invalidate_buckets(struct bch_dev *ca) static inline bool should_invalidate_buckets(struct bch_dev *ca)

View File

@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc_background.h"
#include "btree_iter.h" #include "btree_iter.h"
#include "btree_update.h" #include "btree_update.h"
#include "error.h" #include "error.h"
#include "lru.h" #include "lru.h"
#include "recovery.h"
const char *bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k) const char *bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k)
{ {
@ -117,3 +119,85 @@ int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx,
return lru_delete(trans, id, idx, old_time) ?: return lru_delete(trans, id, idx, old_time) ?:
lru_set(trans, id, idx, new_time); lru_set(trans, id, idx, new_time);
} }
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter, bool initial)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c lru_k, k;
struct bch_alloc_v4 a;
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
u64 idx;
int ret;
lru_k = bch2_btree_iter_peek(lru_iter);
if (!lru_k.k)
return 0;
ret = bkey_err(lru_k);
if (ret)
return ret;
idx = le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(lru_k.k->p.inode, idx), 0);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
goto err;
bch2_alloc_to_v4(k, &a);
if (fsck_err_on(bucket_state(a) != BUCKET_cached ||
a.io_time[READ] != lru_k.k->p.offset, c,
"incorrect lru entry %s\n"
" for %s",
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
struct bkey_i *update =
bch2_trans_kmalloc(trans, sizeof(*update));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
bkey_init(&update->k);
update->k.p = lru_iter->pos;
ret = bch2_trans_update(trans, lru_iter, update, 0);
if (ret)
goto err;
}
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf2);
printbuf_exit(&buf1);
return ret;
}
int bch2_check_lrus(struct bch_fs *c, bool initial)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_check_lru_key(&trans, &iter, initial));
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}

View File

@ -12,4 +12,6 @@ void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *); int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *);
int bch2_check_lrus(struct bch_fs *, bool);
#endif /* _BCACHEFS_LRU_H */ #endif /* _BCACHEFS_LRU_H */

View File

@ -16,6 +16,7 @@
#include "journal_io.h" #include "journal_io.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "journal_seq_blacklist.h" #include "journal_seq_blacklist.h"
#include "lru.h"
#include "move.h" #include "move.h"
#include "quota.h" #include "quota.h"
#include "recovery.h" #include "recovery.h"
@ -1166,13 +1167,26 @@ int bch2_fs_recovery(struct bch_fs *c)
bool metadata_only = c->opts.norecovery; bool metadata_only = c->opts.norecovery;
bch_info(c, "checking allocations"); bch_info(c, "checking allocations");
err = "error in mark and sweep"; err = "error checking allocations";
ret = bch2_gc(c, true, metadata_only); ret = bch2_gc(c, true, metadata_only);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "done checking allocations"); bch_verbose(c, "done checking allocations");
} }
if (c->opts.fsck) {
bch_info(c, "checking need_discard and freespace btrees");
err = "error checking need_discard and freespace btrees";
ret = bch2_check_alloc_info(c, true);
if (ret)
goto err;
ret = bch2_check_lrus(c, true);
if (ret)
goto err;
bch_verbose(c, "done checking need_discard and freespace btrees");
}
bch2_stripes_heap_start(c); bch2_stripes_heap_start(c);
clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
@ -1202,6 +1216,19 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret) if (ret)
goto err; goto err;
if (c->opts.fsck) {
bch_info(c, "checking alloc to lru refs");
err = "error checking alloc to lru refs";
ret = bch2_check_alloc_to_lru_refs(c);
if (ret)
goto err;
ret = bch2_check_lrus(c, true);
if (ret)
goto err;
bch_verbose(c, "done checking alloc to lru refs");
}
if (c->sb.version < bcachefs_metadata_version_snapshot_2) { if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
bch2_fs_lazy_rw(c); bch2_fs_lazy_rw(c);