mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
bcachefs: bcachefs_metadata_version_inode_depth
This adds a new inode field, bi_depth, for directory inodes: this allows us to make the check_directory_structure pass much more efficient. Currently, to ensure the filesystem is fully connect and has no loops, for every directory we follow backpointers until we find the root. But by adding a depth counter, it sufficies to only check the parent of each directory, and check that the parent's bi_depth is smaller. (fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename causes bi_depth off, but the chain to the root is still strictly decreasing, then the algorithm still works and there's no need for fsck to fixup the bi_depth fields). We've already checked backpointers, so we know that every directory (excluding the root)has a valid parent: if bi_depth is always decreasing, every chain must terminate, and terminate at the root directory. bi_depth will not necessarily be correct when fsck runs, due to directory renames - we can't change bi_depth on every child directory when renaming a directory. That's ok; fsck will silently fix the bi_depth field as needed, and future fsck runs will be much faster. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
7fa06b998c
commit
b4f1b7e26c
@ -681,7 +681,8 @@ struct bch_sb_field_ext {
|
||||
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
|
||||
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
|
||||
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16))
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
||||
x(inode_depth, BCH_VERSION(1, 17))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
|
@ -170,6 +170,10 @@ int bch2_create_trans(struct btree_trans *trans,
|
||||
new_inode->bi_dir_offset = dir_offset;
|
||||
}
|
||||
|
||||
if (S_ISDIR(mode) &&
|
||||
!new_inode->bi_subvol)
|
||||
new_inode->bi_depth = dir_u->bi_depth + 1;
|
||||
|
||||
inode_iter.flags &= ~BTREE_ITER_all_snapshots;
|
||||
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
|
||||
|
||||
@ -510,6 +514,15 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
dst_dir_u->bi_nlink++;
|
||||
}
|
||||
|
||||
if (S_ISDIR(src_inode_u->bi_mode) &&
|
||||
!src_inode_u->bi_subvol)
|
||||
src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
|
||||
|
||||
if (mode == BCH_RENAME_EXCHANGE &&
|
||||
S_ISDIR(dst_inode_u->bi_mode) &&
|
||||
!dst_inode_u->bi_subvol)
|
||||
dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
|
||||
|
||||
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
|
||||
dst_dir_u->bi_nlink--;
|
||||
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
|
||||
|
@ -2597,6 +2597,48 @@ struct pathbuf_entry {
|
||||
|
||||
typedef DARRAY(struct pathbuf_entry) pathbuf;
|
||||
|
||||
static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
|
||||
u32 new_depth)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
||||
SPOS(0, p->inum, p->snapshot), 0);
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret = bkey_err(k) ?:
|
||||
!bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
|
||||
: bch2_inode_unpack(k, &inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (inode.bi_depth != new_depth) {
|
||||
inode.bi_depth = new_depth;
|
||||
ret = __bch2_fsck_write_inode(trans, &inode) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
|
||||
darray_for_each_reverse(*path, i) {
|
||||
ret = nested_lockrestart_do(trans,
|
||||
bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
|
||||
bch_err_fn(trans->c, ret);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
new_bi_depth++;
|
||||
}
|
||||
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
|
||||
{
|
||||
darray_for_each(*p, i)
|
||||
@ -2606,24 +2648,22 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
|
||||
return false;
|
||||
}
|
||||
|
||||
static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
|
||||
static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter inode_iter = {};
|
||||
pathbuf path = {};
|
||||
struct printbuf buf = PRINTBUF;
|
||||
u32 snapshot = inode_k.k->p.snapshot;
|
||||
bool redo_bi_depth = false;
|
||||
u32 min_bi_depth = U32_MAX;
|
||||
int ret = 0;
|
||||
|
||||
p->nr = 0;
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
ret = bch2_inode_unpack(inode_k, &inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!S_ISDIR(inode.bi_mode))
|
||||
return 0;
|
||||
|
||||
while (!inode.bi_subvol) {
|
||||
struct btree_iter dirent_iter;
|
||||
struct bkey_s_c_dirent d;
|
||||
@ -2632,7 +2672,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
||||
d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
|
||||
ret = bkey_err(d.s_c);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
break;
|
||||
goto out;
|
||||
|
||||
if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
@ -2647,7 +2687,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
||||
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
|
||||
ret = darray_push(p, ((struct pathbuf_entry) {
|
||||
ret = darray_push(&path, ((struct pathbuf_entry) {
|
||||
.inum = inode.bi_inum,
|
||||
.snapshot = snapshot,
|
||||
}));
|
||||
@ -2659,22 +2699,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
|
||||
SPOS(0, inode.bi_dir, snapshot), 0);
|
||||
|
||||
struct bch_inode_unpacked parent_inode;
|
||||
ret = bkey_err(inode_k) ?:
|
||||
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
|
||||
: bch2_inode_unpack(inode_k, &inode);
|
||||
: bch2_inode_unpack(inode_k, &parent_inode);
|
||||
if (ret) {
|
||||
/* Should have been caught in dirents pass */
|
||||
bch_err_msg(c, ret, "error looking up parent directory");
|
||||
break;
|
||||
goto out;
|
||||
}
|
||||
|
||||
snapshot = inode_k.k->p.snapshot;
|
||||
min_bi_depth = parent_inode.bi_depth;
|
||||
|
||||
if (path_is_dup(p, inode.bi_inum, snapshot)) {
|
||||
if (parent_inode.bi_depth < inode.bi_depth &&
|
||||
min_bi_depth < U16_MAX)
|
||||
break;
|
||||
|
||||
inode = parent_inode;
|
||||
snapshot = inode_k.k->p.snapshot;
|
||||
redo_bi_depth = true;
|
||||
|
||||
if (path_is_dup(&path, inode.bi_inum, snapshot)) {
|
||||
/* XXX print path */
|
||||
bch_err(c, "directory structure loop");
|
||||
|
||||
darray_for_each(*p, i)
|
||||
darray_for_each(path, i)
|
||||
pr_err("%llu:%u", i->inum, i->snapshot);
|
||||
pr_err("%llu:%u", inode.bi_inum, snapshot);
|
||||
|
||||
@ -2687,12 +2737,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
||||
ret = reattach_inode(trans, &inode);
|
||||
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
|
||||
}
|
||||
break;
|
||||
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (inode.bi_subvol)
|
||||
min_bi_depth = 0;
|
||||
|
||||
if (redo_bi_depth)
|
||||
ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
darray_exit(&path);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -2704,24 +2762,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
|
||||
*/
|
||||
int bch2_check_directory_structure(struct bch_fs *c)
|
||||
{
|
||||
pathbuf path = { 0, };
|
||||
int ret;
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
|
||||
BTREE_ITER_intent|
|
||||
BTREE_ITER_prefetch|
|
||||
BTREE_ITER_all_snapshots, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
if (!bkey_is_inode(k.k))
|
||||
if (!S_ISDIR(bkey_inode_mode(k)))
|
||||
continue;
|
||||
|
||||
if (bch2_inode_flags(k) & BCH_INODE_unlinked)
|
||||
continue;
|
||||
|
||||
check_path(trans, &path, k);
|
||||
check_path_loop(trans, k);
|
||||
})));
|
||||
darray_exit(&path);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
|
@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned bkey_inode_mode(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_inode:
|
||||
return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode);
|
||||
case KEY_TYPE_inode_v2:
|
||||
return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode);
|
||||
case KEY_TYPE_inode_v3:
|
||||
return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* i_nlink: */
|
||||
|
||||
static inline unsigned nlink_bias(umode_t mode)
|
||||
|
@ -101,7 +101,8 @@ struct bch_inode_generation {
|
||||
x(bi_dir_offset, 64) \
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32) \
|
||||
x(bi_nocow, 8)
|
||||
x(bi_nocow, 8) \
|
||||
x(bi_depth, 32)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
|
Loading…
Reference in New Issue
Block a user