bcachefs: bcachefs_metadata_version_inode_depth

This adds a new inode field, bi_depth, for directory inodes: this allows
us to make the check_directory_structure pass much more efficient.

Currently, to ensure the filesystem is fully connect and has no loops,
for every directory we follow backpointers until we find the root. But
by adding a depth counter, it sufficies to only check the parent of each
directory, and check that the parent's bi_depth is smaller.

(fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename
causes bi_depth off, but the chain to the root is still strictly
decreasing, then the algorithm still works and there's no need for fsck
to fixup the bi_depth fields).

We've already checked backpointers, so we know that every directory
(excluding the root)has a valid parent: if bi_depth is always
decreasing, every chain must terminate, and terminate at the root
directory.

bi_depth will not necessarily be correct when fsck runs, due to
directory renames - we can't change bi_depth on every child directory
when renaming a directory. That's ok; fsck will silently fix the
bi_depth field as needed, and future fsck runs will be much faster.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-08-02 20:27:38 -04:00
parent 7fa06b998c
commit b4f1b7e26c
5 changed files with 106 additions and 23 deletions

View File

@ -681,7 +681,8 @@ struct bch_sb_field_ext {
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
x(reflink_p_may_update_opts, BCH_VERSION(1, 16))
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
x(inode_depth, BCH_VERSION(1, 17))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,

View File

@ -170,6 +170,10 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}
if (S_ISDIR(mode) &&
!new_inode->bi_subvol)
new_inode->bi_depth = dir_u->bi_depth + 1;
inode_iter.flags &= ~BTREE_ITER_all_snapshots;
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
@ -510,6 +514,15 @@ int bch2_rename_trans(struct btree_trans *trans,
dst_dir_u->bi_nlink++;
}
if (S_ISDIR(src_inode_u->bi_mode) &&
!src_inode_u->bi_subvol)
src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
if (mode == BCH_RENAME_EXCHANGE &&
S_ISDIR(dst_inode_u->bi_mode) &&
!dst_inode_u->bi_subvol)
dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
dst_dir_u->bi_nlink--;
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;

View File

@ -2597,6 +2597,48 @@ struct pathbuf_entry {
typedef DARRAY(struct pathbuf_entry) pathbuf;
static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
u32 new_depth)
{
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, p->inum, p->snapshot), 0);
struct bch_inode_unpacked inode;
int ret = bkey_err(k) ?:
!bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
: bch2_inode_unpack(k, &inode);
if (ret)
goto err;
if (inode.bi_depth != new_depth) {
inode.bi_depth = new_depth;
ret = __bch2_fsck_write_inode(trans, &inode) ?:
bch2_trans_commit(trans, NULL, NULL, 0);
}
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
{
u32 restart_count = trans->restart_count;
int ret = 0;
darray_for_each_reverse(*path, i) {
ret = nested_lockrestart_do(trans,
bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
bch_err_fn(trans->c, ret);
if (ret)
break;
new_bi_depth++;
}
return ret ?: trans_was_restarted(trans, restart_count);
}
static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
{
darray_for_each(*p, i)
@ -2606,24 +2648,22 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
return false;
}
static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
pathbuf path = {};
struct printbuf buf = PRINTBUF;
u32 snapshot = inode_k.k->p.snapshot;
bool redo_bi_depth = false;
u32 min_bi_depth = U32_MAX;
int ret = 0;
p->nr = 0;
struct bch_inode_unpacked inode;
ret = bch2_inode_unpack(inode_k, &inode);
if (ret)
return ret;
if (!S_ISDIR(inode.bi_mode))
return 0;
while (!inode.bi_subvol) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
@ -2632,7 +2672,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
ret = bkey_err(d.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
break;
goto out;
if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
bch2_trans_iter_exit(trans, &dirent_iter);
@ -2647,7 +2687,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &dirent_iter);
ret = darray_push(p, ((struct pathbuf_entry) {
ret = darray_push(&path, ((struct pathbuf_entry) {
.inum = inode.bi_inum,
.snapshot = snapshot,
}));
@ -2659,22 +2699,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);
struct bch_inode_unpacked parent_inode;
ret = bkey_err(inode_k) ?:
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
: bch2_inode_unpack(inode_k, &inode);
: bch2_inode_unpack(inode_k, &parent_inode);
if (ret) {
/* Should have been caught in dirents pass */
bch_err_msg(c, ret, "error looking up parent directory");
break;
goto out;
}
snapshot = inode_k.k->p.snapshot;
min_bi_depth = parent_inode.bi_depth;
if (path_is_dup(p, inode.bi_inum, snapshot)) {
if (parent_inode.bi_depth < inode.bi_depth &&
min_bi_depth < U16_MAX)
break;
inode = parent_inode;
snapshot = inode_k.k->p.snapshot;
redo_bi_depth = true;
if (path_is_dup(&path, inode.bi_inum, snapshot)) {
/* XXX print path */
bch_err(c, "directory structure loop");
darray_for_each(*p, i)
darray_for_each(path, i)
pr_err("%llu:%u", i->inum, i->snapshot);
pr_err("%llu:%u", inode.bi_inum, snapshot);
@ -2687,12 +2737,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
}
break;
goto out;
}
}
if (inode.bi_subvol)
min_bi_depth = 0;
if (redo_bi_depth)
ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
darray_exit(&path);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
@ -2704,24 +2762,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
pathbuf path = { 0, };
int ret;
ret = bch2_trans_run(c,
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
if (!bkey_is_inode(k.k))
if (!S_ISDIR(bkey_inode_mode(k)))
continue;
if (bch2_inode_flags(k) & BCH_INODE_unlinked)
continue;
check_path(trans, &path, k);
check_path_loop(trans, k);
})));
darray_exit(&path);
bch_err_fn(c, ret);
return ret;

View File

@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k)
}
}
static inline unsigned bkey_inode_mode(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_inode:
return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode);
case KEY_TYPE_inode_v2:
return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode);
case KEY_TYPE_inode_v3:
return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v);
default:
return 0;
}
}
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)

View File

@ -101,7 +101,8 @@ struct bch_inode_generation {
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32) \
x(bi_nocow, 8)
x(bi_nocow, 8) \
x(bi_depth, 32)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \