From f2b542ba42a8b35d9dc43f5eab9791fea76bfd3a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 11 Dec 2022 19:14:30 -0500 Subject: [PATCH] bcachefs: Go RW before check_alloc_info() It's possible to do btree updates before going RW by adding them to the list of updates for journal replay to do, but this is limited by what fits in RAM. This patch switches the second alloc info phase to run after going RW - btree_gc has already ensured the alloc btree itself is correct - and tweaks the allocation path to deal with the potential small inconsistencies. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 32 +++++++++++++++++---------- fs/bcachefs/alloc_foreground.c | 40 ++++++++++++++++++++++------------ fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/recovery.c | 17 +++++++++------ 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1db0b6253661..f75d05beaf31 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -583,6 +583,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, goto err; if (ca->mi.freespace_initialized && + test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) && bch2_trans_inconsistent_on(old.k->type != old_type, trans, "incorrect key when %s %s btree (got %s should be %s)\n" " for %s", @@ -1028,21 +1029,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto write; } - if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans, - "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" - "%s", - a->v.journal_seq, - c->journal.flushed_seq_ondisk, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { + if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + bch2_trans_inconsistent(trans, + "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" + "%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + } goto out; } - if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans, - "bucket incorrectly set in need_discard btree\n" - "%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + if (a->v.data_type != BCH_DATA_need_discard) { + if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + bch2_trans_inconsistent(trans, + "bucket incorrectly set in need_discard btree\n" + "%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + } + goto out; } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 534dbf197d58..ba14cfe06515 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -316,7 +316,24 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc a = bch2_alloc_to_v4(k, &a_convert); - if (genbits != (alloc_freespace_genbits(*a) >> 56)) { + if (a->data_type != BCH_DATA_free) { + if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + ob = NULL; + goto err; + } + + prt_printf(&buf, "non free bucket in freespace btree\n" + " freespace key "); + bch2_bkey_val_to_text(&buf, c, freespace_k); + prt_printf(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, k); + bch2_trans_inconsistent(trans, "%s", buf.buf); + ob = ERR_PTR(-EIO); + goto err; + } + + if (genbits != (alloc_freespace_genbits(*a) >> 56) && + test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" " freespace key ", genbits, alloc_freespace_genbits(*a) >> 56); @@ -329,17 +346,6 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc } - if (a->data_type != BCH_DATA_free) { - prt_printf(&buf, "non free bucket in freespace btree\n" - " freespace key "); - bch2_bkey_val_to_text(&buf, c, freespace_k); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); - bch2_trans_inconsistent(trans, "%s", buf.buf); - ob = ERR_PTR(-EIO); - goto err; - } - ob = __try_alloc_bucket(c, ca, b, reserve, a, s, cl); if (!ob) iter.path->preserve = false; @@ -505,6 +511,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; + bool freespace = READ_ONCE(ca->mi.freespace_initialized); u64 avail; struct bucket_alloc_state s = { 0 }; bool waiting = false; @@ -543,13 +550,18 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, if (ob) return ob; } - - ob = likely(ca->mi.freespace_initialized) +alloc: + ob = likely(freespace) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl) : bch2_bucket_alloc_early(trans, ca, reserve, &s, cl); if (s.skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); + + if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + freespace = false; + goto alloc; + } err: if (!ob) ob = ERR_PTR(-BCH_ERR_no_buckets_found); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 56bc58a7bfcf..ad3bf019487e 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -549,6 +549,7 @@ enum { /* fsck passes: */ BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ + BCH_FS_CHECK_ALLOC_DONE, BCH_FS_CHECK_LRUS_DONE, BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, BCH_FS_FSCK_DONE, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index ebdf9f754e08..61890755d335 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1260,13 +1260,6 @@ int bch2_fs_recovery(struct bch_fs *c) set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - bch_info(c, "checking need_discard and freespace btrees"); - err = "error checking need_discard and freespace btrees"; - ret = bch2_check_alloc_info(c); - if (ret) - goto err; - bch_verbose(c, "done checking need_discard and freespace btrees"); - if (c->sb.version < bcachefs_metadata_version_snapshot_2) { err = "error creating root snapshot node"; ret = bch2_fs_initialize_subvolumes(c); @@ -1291,6 +1284,15 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.verbose || !c->sb.clean) bch_info(c, "journal replay done"); + bch_info(c, "checking need_discard and freespace btrees"); + err = "error checking need_discard and freespace btrees"; + ret = bch2_check_alloc_info(c); + if (ret) + goto err; + bch_verbose(c, "done checking need_discard and freespace btrees"); + + set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); + bch_info(c, "checking lrus"); err = "error checking lrus"; ret = bch2_check_lrus(c); @@ -1308,6 +1310,7 @@ int bch2_fs_recovery(struct bch_fs *c) set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); } else { set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags);