linux-next/fs/bcachefs/recovery_passes.c

// SPDX-License-Identifier: GPL-2.0

#include "bcachefs.h"
#include "alloc_background.h"
#include "backpointers.h"
#include "btree_gc.h"
#include "btree_node_scan.h"
#include "disk_accounting.h"
#include "ec.h"
#include "fsck.h"
#include "inode.h"
#include "journal.h"
#include "lru.h"
#include "logged_ops.h"
#include "rebalance.h"
#include "recovery.h"
#include "recovery_passes.h"
#include "snapshot.h"
#include "subvolume.h"
#include "super.h"
#include "super-io.h"

const char * const bch2_recovery_passes[] = {
#define x(_fn, ...)	#_fn,
	BCH_RECOVERY_PASSES()
#undef x
	NULL
};

/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
static int bch2_recovery_pass_empty(struct bch_fs *c)
{
	return 0;
}

static int bch2_set_may_go_rw(struct bch_fs *c)
{
	struct journal_keys *keys = &c->journal_keys;

	/*
	 * After we go RW, the journal keys buffer can't be modified (except for
	 * setting journal_key->overwritten: it will be accessed by multiple
	 * threads
	 */
	move_gap(keys, keys->nr);

	set_bit(BCH_FS_may_go_rw, &c->flags);

	if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
		return bch2_fs_read_write_early(c);
	return 0;
}

struct recovery_pass_fn {
	int		(*fn)(struct bch_fs *);
	unsigned	when;
};

static struct recovery_pass_fn recovery_pass_fns[] = {
#define x(_fn, _id, _when)	{ .fn = bch2_##_fn, .when = _when },
	BCH_RECOVERY_PASSES()
#undef x
};

static const u8 passes_to_stable_map[] = {
#define x(n, id, ...)	[BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
	BCH_RECOVERY_PASSES()
#undef x
};

static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
{
	return passes_to_stable_map[pass];
}

u64 bch2_recovery_passes_to_stable(u64 v)
{
	u64 ret = 0;
	for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
		if (v & BIT_ULL(i))
			ret |= BIT_ULL(passes_to_stable_map[i]);
	return ret;
}

u64 bch2_recovery_passes_from_stable(u64 v)
{
	static const u8 map[] = {
#define x(n, id, ...)	[BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
	BCH_RECOVERY_PASSES()
#undef x
	};

	u64 ret = 0;
	for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
		if (v & BIT_ULL(i))
			ret |= BIT_ULL(map[i]);
	return ret;
}

/*
 * For when we need to rewind recovery passes and run a pass we skipped:
 */
static int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
					     enum bch_recovery_pass pass)
{
	if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns))
		return -BCH_ERR_not_in_recovery;

	if (c->recovery_passes_complete & BIT_ULL(pass))
		return 0;

	bool print = !(c->opts.recovery_passes & BIT_ULL(pass));

	if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
	    c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
		if (print)
			bch_info(c, "need recovery pass %s (%u), but already rw",
				 bch2_recovery_passes[pass], pass);
		return -BCH_ERR_cannot_rewind_recovery;
	}

	if (print)
		bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
			 bch2_recovery_passes[pass], pass,
			 bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);

	c->opts.recovery_passes |= BIT_ULL(pass);

	if (c->curr_recovery_pass > pass) {
		c->next_recovery_pass = pass;
		c->recovery_passes_complete &= (1ULL << pass) >> 1;
		return -BCH_ERR_restart_recovery;
	} else {
		return 0;
	}
}

int bch2_run_explicit_recovery_pass(struct bch_fs *c,
				    enum bch_recovery_pass pass)
{
	unsigned long flags;
	spin_lock_irqsave(&c->recovery_pass_lock, flags);
	int ret = __bch2_run_explicit_recovery_pass(c, pass);
	spin_unlock_irqrestore(&c->recovery_pass_lock, flags);
	return ret;
}

int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
					       enum bch_recovery_pass pass)
{
	lockdep_assert_held(&c->sb_lock);

	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
	__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);

	return bch2_run_explicit_recovery_pass(c, pass);
}

int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
					       enum bch_recovery_pass pass)
{
	enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);

	mutex_lock(&c->sb_lock);
	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);

	if (!test_bit_le64(s, ext->recovery_passes_required)) {
		__set_bit_le64(s, ext->recovery_passes_required);
		bch2_write_super(c);
	}
	mutex_unlock(&c->sb_lock);

	return bch2_run_explicit_recovery_pass(c, pass);
}

static void bch2_clear_recovery_pass_required(struct bch_fs *c,
					      enum bch_recovery_pass pass)
{
	enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);

	mutex_lock(&c->sb_lock);
	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);

	if (test_bit_le64(s, ext->recovery_passes_required)) {
		__clear_bit_le64(s, ext->recovery_passes_required);
		bch2_write_super(c);
	}
	mutex_unlock(&c->sb_lock);
}

u64 bch2_fsck_recovery_passes(void)
{
	u64 ret = 0;

	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
		if (recovery_pass_fns[i].when & PASS_FSCK)
			ret |= BIT_ULL(i);
	return ret;
}

static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
	struct recovery_pass_fn *p = recovery_pass_fns + pass;

	if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
		return false;
	if (c->opts.recovery_passes & BIT_ULL(pass))
		return true;
	if ((p->when & PASS_FSCK) && c->opts.fsck)
		return true;
	if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
		return true;
	if (p->when & PASS_ALWAYS)
		return true;
	return false;
}

static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
	struct recovery_pass_fn *p = recovery_pass_fns + pass;
	int ret;

	if (!(p->when & PASS_SILENT))
		bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
			   bch2_recovery_passes[pass]);
	ret = p->fn(c);
	if (ret)
		return ret;
	if (!(p->when & PASS_SILENT))
		bch2_print(c, KERN_CONT " done\n");

	return 0;
}

int bch2_run_online_recovery_passes(struct bch_fs *c)
{
	int ret = 0;

	down_read(&c->state_lock);

	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
		struct recovery_pass_fn *p = recovery_pass_fns + i;

		if (!(p->when & PASS_ONLINE))
			continue;

		ret = bch2_run_recovery_pass(c, i);
		if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
			i = c->curr_recovery_pass;
			continue;
		}
		if (ret)
			break;
	}

	up_read(&c->state_lock);

	return ret;
}

int bch2_run_recovery_passes(struct bch_fs *c)
{
	int ret = 0;

	/*
	 * We can't allow set_may_go_rw to be excluded; that would cause us to
	 * use the journal replay keys for updates where it's not expected.
	 */
	c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;

	while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
		c->next_recovery_pass = c->curr_recovery_pass + 1;

		spin_lock_irq(&c->recovery_pass_lock);
		unsigned pass = c->curr_recovery_pass;

		if (c->opts.recovery_pass_last &&
		    c->curr_recovery_pass > c->opts.recovery_pass_last) {
			spin_unlock_irq(&c->recovery_pass_lock);
			break;
		}

		if (!should_run_recovery_pass(c, pass)) {
			c->curr_recovery_pass++;
			c->recovery_pass_done = max(c->recovery_pass_done, pass);
			spin_unlock_irq(&c->recovery_pass_lock);
			continue;
		}
		spin_unlock_irq(&c->recovery_pass_lock);

		ret =   bch2_run_recovery_pass(c, pass) ?:
			bch2_journal_flush(&c->journal);

		if (!ret && !test_bit(BCH_FS_error, &c->flags))
			bch2_clear_recovery_pass_required(c, pass);

		spin_lock_irq(&c->recovery_pass_lock);
		if (c->next_recovery_pass < c->curr_recovery_pass) {
			/*
			 * bch2_run_explicit_recovery_pass() was called: we
			 * can't always catch -BCH_ERR_restart_recovery because
			 * it may have been called from another thread (btree
			 * node read completion)
			 */
			ret = 0;
			c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
		} else {
			c->recovery_passes_complete |= BIT_ULL(pass);
			c->recovery_pass_done = max(c->recovery_pass_done, pass);
		}
		c->curr_recovery_pass = c->next_recovery_pass;
		spin_unlock_irq(&c->recovery_pass_lock);
	}

	return ret;
}