mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 16:52:18 +00:00
61ab7cbbaa
We should always signal to rewind if the requested pass hasn't been run, even if called multiple times. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
317 lines
8.0 KiB
C
317 lines
8.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include "bcachefs.h"
|
|
#include "alloc_background.h"
|
|
#include "backpointers.h"
|
|
#include "btree_gc.h"
|
|
#include "btree_node_scan.h"
|
|
#include "disk_accounting.h"
|
|
#include "ec.h"
|
|
#include "fsck.h"
|
|
#include "inode.h"
|
|
#include "journal.h"
|
|
#include "lru.h"
|
|
#include "logged_ops.h"
|
|
#include "rebalance.h"
|
|
#include "recovery.h"
|
|
#include "recovery_passes.h"
|
|
#include "snapshot.h"
|
|
#include "subvolume.h"
|
|
#include "super.h"
|
|
#include "super-io.h"
|
|
|
|
const char * const bch2_recovery_passes[] = {
|
|
#define x(_fn, ...) #_fn,
|
|
BCH_RECOVERY_PASSES()
|
|
#undef x
|
|
NULL
|
|
};
|
|
|
|
/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
|
|
static int bch2_recovery_pass_empty(struct bch_fs *c)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static int bch2_set_may_go_rw(struct bch_fs *c)
|
|
{
|
|
struct journal_keys *keys = &c->journal_keys;
|
|
|
|
/*
|
|
* After we go RW, the journal keys buffer can't be modified (except for
|
|
* setting journal_key->overwritten: it will be accessed by multiple
|
|
* threads
|
|
*/
|
|
move_gap(keys, keys->nr);
|
|
|
|
set_bit(BCH_FS_may_go_rw, &c->flags);
|
|
|
|
if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
|
|
return bch2_fs_read_write_early(c);
|
|
return 0;
|
|
}
|
|
|
|
struct recovery_pass_fn {
|
|
int (*fn)(struct bch_fs *);
|
|
unsigned when;
|
|
};
|
|
|
|
static struct recovery_pass_fn recovery_pass_fns[] = {
|
|
#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
|
|
BCH_RECOVERY_PASSES()
|
|
#undef x
|
|
};
|
|
|
|
static const u8 passes_to_stable_map[] = {
|
|
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
|
|
BCH_RECOVERY_PASSES()
|
|
#undef x
|
|
};
|
|
|
|
static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
|
|
{
|
|
return passes_to_stable_map[pass];
|
|
}
|
|
|
|
u64 bch2_recovery_passes_to_stable(u64 v)
|
|
{
|
|
u64 ret = 0;
|
|
for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
|
|
if (v & BIT_ULL(i))
|
|
ret |= BIT_ULL(passes_to_stable_map[i]);
|
|
return ret;
|
|
}
|
|
|
|
u64 bch2_recovery_passes_from_stable(u64 v)
|
|
{
|
|
static const u8 map[] = {
|
|
#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
|
|
BCH_RECOVERY_PASSES()
|
|
#undef x
|
|
};
|
|
|
|
u64 ret = 0;
|
|
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
|
|
if (v & BIT_ULL(i))
|
|
ret |= BIT_ULL(map[i]);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* For when we need to rewind recovery passes and run a pass we skipped:
|
|
*/
|
|
static int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
|
enum bch_recovery_pass pass)
|
|
{
|
|
if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns))
|
|
return -BCH_ERR_not_in_recovery;
|
|
|
|
if (c->recovery_passes_complete & BIT_ULL(pass))
|
|
return 0;
|
|
|
|
bool print = !(c->opts.recovery_passes & BIT_ULL(pass));
|
|
|
|
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
|
|
c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
|
|
if (print)
|
|
bch_info(c, "need recovery pass %s (%u), but already rw",
|
|
bch2_recovery_passes[pass], pass);
|
|
return -BCH_ERR_cannot_rewind_recovery;
|
|
}
|
|
|
|
if (print)
|
|
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
|
bch2_recovery_passes[pass], pass,
|
|
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
|
|
|
c->opts.recovery_passes |= BIT_ULL(pass);
|
|
|
|
if (c->curr_recovery_pass > pass) {
|
|
c->next_recovery_pass = pass;
|
|
c->recovery_passes_complete &= (1ULL << pass) >> 1;
|
|
return -BCH_ERR_restart_recovery;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
|
enum bch_recovery_pass pass)
|
|
{
|
|
unsigned long flags;
|
|
spin_lock_irqsave(&c->recovery_pass_lock, flags);
|
|
int ret = __bch2_run_explicit_recovery_pass(c, pass);
|
|
spin_unlock_irqrestore(&c->recovery_pass_lock, flags);
|
|
return ret;
|
|
}
|
|
|
|
int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c,
|
|
enum bch_recovery_pass pass)
|
|
{
|
|
lockdep_assert_held(&c->sb_lock);
|
|
|
|
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
|
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
|
|
|
|
return bch2_run_explicit_recovery_pass(c, pass);
|
|
}
|
|
|
|
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
|
|
enum bch_recovery_pass pass)
|
|
{
|
|
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
|
|
|
|
mutex_lock(&c->sb_lock);
|
|
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
|
|
|
if (!test_bit_le64(s, ext->recovery_passes_required)) {
|
|
__set_bit_le64(s, ext->recovery_passes_required);
|
|
bch2_write_super(c);
|
|
}
|
|
mutex_unlock(&c->sb_lock);
|
|
|
|
return bch2_run_explicit_recovery_pass(c, pass);
|
|
}
|
|
|
|
static void bch2_clear_recovery_pass_required(struct bch_fs *c,
|
|
enum bch_recovery_pass pass)
|
|
{
|
|
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
|
|
|
|
mutex_lock(&c->sb_lock);
|
|
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
|
|
|
if (test_bit_le64(s, ext->recovery_passes_required)) {
|
|
__clear_bit_le64(s, ext->recovery_passes_required);
|
|
bch2_write_super(c);
|
|
}
|
|
mutex_unlock(&c->sb_lock);
|
|
}
|
|
|
|
u64 bch2_fsck_recovery_passes(void)
|
|
{
|
|
u64 ret = 0;
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
|
|
if (recovery_pass_fns[i].when & PASS_FSCK)
|
|
ret |= BIT_ULL(i);
|
|
return ret;
|
|
}
|
|
|
|
static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
|
{
|
|
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
|
|
|
if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
|
|
return false;
|
|
if (c->opts.recovery_passes & BIT_ULL(pass))
|
|
return true;
|
|
if ((p->when & PASS_FSCK) && c->opts.fsck)
|
|
return true;
|
|
if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
|
|
return true;
|
|
if (p->when & PASS_ALWAYS)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
|
{
|
|
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
|
int ret;
|
|
|
|
if (!(p->when & PASS_SILENT))
|
|
bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
|
|
bch2_recovery_passes[pass]);
|
|
ret = p->fn(c);
|
|
if (ret)
|
|
return ret;
|
|
if (!(p->when & PASS_SILENT))
|
|
bch2_print(c, KERN_CONT " done\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int bch2_run_online_recovery_passes(struct bch_fs *c)
|
|
{
|
|
int ret = 0;
|
|
|
|
down_read(&c->state_lock);
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
|
|
struct recovery_pass_fn *p = recovery_pass_fns + i;
|
|
|
|
if (!(p->when & PASS_ONLINE))
|
|
continue;
|
|
|
|
ret = bch2_run_recovery_pass(c, i);
|
|
if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
|
|
i = c->curr_recovery_pass;
|
|
continue;
|
|
}
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
up_read(&c->state_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int bch2_run_recovery_passes(struct bch_fs *c)
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* We can't allow set_may_go_rw to be excluded; that would cause us to
|
|
* use the journal replay keys for updates where it's not expected.
|
|
*/
|
|
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
|
|
|
|
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
|
|
c->next_recovery_pass = c->curr_recovery_pass + 1;
|
|
|
|
spin_lock_irq(&c->recovery_pass_lock);
|
|
unsigned pass = c->curr_recovery_pass;
|
|
|
|
if (c->opts.recovery_pass_last &&
|
|
c->curr_recovery_pass > c->opts.recovery_pass_last) {
|
|
spin_unlock_irq(&c->recovery_pass_lock);
|
|
break;
|
|
}
|
|
|
|
if (!should_run_recovery_pass(c, pass)) {
|
|
c->curr_recovery_pass++;
|
|
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
|
spin_unlock_irq(&c->recovery_pass_lock);
|
|
continue;
|
|
}
|
|
spin_unlock_irq(&c->recovery_pass_lock);
|
|
|
|
ret = bch2_run_recovery_pass(c, pass) ?:
|
|
bch2_journal_flush(&c->journal);
|
|
|
|
if (!ret && !test_bit(BCH_FS_error, &c->flags))
|
|
bch2_clear_recovery_pass_required(c, pass);
|
|
|
|
spin_lock_irq(&c->recovery_pass_lock);
|
|
if (c->next_recovery_pass < c->curr_recovery_pass) {
|
|
/*
|
|
* bch2_run_explicit_recovery_pass() was called: we
|
|
* can't always catch -BCH_ERR_restart_recovery because
|
|
* it may have been called from another thread (btree
|
|
* node read completion)
|
|
*/
|
|
ret = 0;
|
|
c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
|
|
} else {
|
|
c->recovery_passes_complete |= BIT_ULL(pass);
|
|
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
|
}
|
|
c->curr_recovery_pass = c->next_recovery_pass;
|
|
spin_unlock_irq(&c->recovery_pass_lock);
|
|
}
|
|
|
|
return ret;
|
|
}
|