bcachefs: Fix restart handling in for_each_btree_key()

Code that uses for_each_btree_key often wants transaction restarts to be
handled locally and not returned. Originally, we wouldn't return
transaction restarts if there was a single iterator in the transaction -
the reasoning being if there weren't other iterators being invalidated,
and the current iterator was being advanced/retraversed, there weren't
any locks or iterators we were required to preserve.

But with the btree_path conversion that approach doesn't work anymore -
even when we're using for_each_btree_key() with a single iterator there
will still be two paths in the transaction, since we now always preserve
the path at the pos the iterator was initialized at - the reason being
that on restart we often restart from the same place.

And it turns out there's now a lot of for_each_btree_key() uses that _do
not_ want transaction restarts handled locally, and should be returning
them.

This patch splits out for_each_btree_key_norestart() and
for_each_btree_key_continue_norestart(), and converts existing users as
appropriate. for_each_btree_key(), for_each_btree_key_continue(), and
for_each_btree_node() now handle transaction restarts themselves by
calling bch2_trans_begin() when necessary - and the old hack to not
return transaction restarts when there's a single path in the
transaction has been deleted.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2021-10-21 12:05:21 -04:00 committed by Kent Overstreet
parent d17bc1739c
commit e5fa91d7ac
11 changed files with 101 additions and 83 deletions

View File

@ -1041,8 +1041,6 @@ LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags[0], 15, 20)
LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags[0], 20, 28) LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags[0], 20, 28)
LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags[0], 28, 30) LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags[0], 28, 30)
#define BCH_TIER_MAX 4U
#if 0 #if 0
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20); LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40); LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);

View File

@ -1511,19 +1511,11 @@ static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);
int __must_check bch2_btree_path_traverse(struct btree_trans *trans, int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
struct btree_path *path, unsigned flags) struct btree_path *path, unsigned flags)
{ {
int ret;
if (path->uptodate < BTREE_ITER_NEED_RELOCK) if (path->uptodate < BTREE_ITER_NEED_RELOCK)
return 0; return 0;
ret = bch2_trans_cond_resched(trans) ?: return bch2_trans_cond_resched(trans) ?:
btree_path_traverse_one(trans, path, flags, _RET_IP_); btree_path_traverse_one(trans, path, flags, _RET_IP_);
if (unlikely(ret) && hweight64(trans->paths_allocated) == 1) {
ret = __btree_path_traverse_all(trans, ret, _RET_IP_);
BUG_ON(ret == -EINTR);
}
return ret;
} }
static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
@ -1937,10 +1929,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
if (!btree_path_node(path, path->level)) if (!btree_path_node(path, path->level))
goto out; goto out;
ret = bch2_trans_cond_resched(trans);
if (ret)
goto err;
btree_node_unlock(path, path->level); btree_node_unlock(path, path->level);
path->l[path->level].b = BTREE_ITER_NO_NODE_UP; path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
path->level++; path->level++;

View File

@ -284,57 +284,6 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
} }
} }
#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \
_locks_want, _depth, _flags, _b, _ret) \
for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
_start, _locks_want, _depth, _flags), \
_b = bch2_btree_iter_peek_node(&(_iter)); \
!((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \
(_b) = bch2_btree_iter_next_node(&(_iter)))
#define for_each_btree_node(_trans, _iter, _btree_id, _start, \
_flags, _b, _ret) \
__for_each_btree_node(_trans, _iter, _btree_id, _start, \
0, 0, _flags, _b, _ret)
static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
unsigned flags)
{
return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_peek_slot(iter)
: bch2_btree_iter_peek(iter);
}
static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
unsigned flags)
{
return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_next_slot(iter)
: bch2_btree_iter_next(iter);
}
static inline int bkey_err(struct bkey_s_c k)
{
return PTR_ERR_OR_ZERO(k.k);
}
#define for_each_btree_key(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)), \
(_k) = __bch2_btree_iter_peek(&(_iter), _flags); \
!((_ret) = bkey_err(_k)) && (_k).k; \
(_k) = __bch2_btree_iter_next(&(_iter), _flags))
#define for_each_btree_key_continue(_iter, _flags, _k, _ret) \
for ((_k) = __bch2_btree_iter_peek(&(_iter), _flags); \
!((_ret) = bkey_err(_k)) && (_k).k; \
(_k) = __bch2_btree_iter_next(&(_iter), _flags))
/* new multiple iterator interface: */
void bch2_dump_trans_paths_updates(struct btree_trans *);
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *, void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
unsigned, struct bpos, unsigned); unsigned, struct bpos, unsigned);
@ -350,6 +299,89 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
void *bch2_trans_kmalloc(struct btree_trans *, size_t); void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_begin(struct btree_trans *); void bch2_trans_begin(struct btree_trans *);
static inline struct btree *
__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree *b;
while (b = bch2_btree_iter_peek_node(iter),
PTR_ERR_OR_ZERO(b) == -EINTR)
bch2_trans_begin(trans);
return b;
}
#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \
_locks_want, _depth, _flags, _b, _ret) \
for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
_start, _locks_want, _depth, _flags); \
(_b) = __btree_iter_peek_node_and_restart((_trans), &(_iter)),\
!((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \
(_b) = bch2_btree_iter_next_node(&(_iter)))
#define for_each_btree_node(_trans, _iter, _btree_id, _start, \
_flags, _b, _ret) \
__for_each_btree_node(_trans, _iter, _btree_id, _start, \
0, 0, _flags, _b, _ret)
static inline int bkey_err(struct bkey_s_c k)
{
return PTR_ERR_OR_ZERO(k.k);
}
static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
unsigned flags)
{
return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_peek_slot(iter)
: bch2_btree_iter_peek(iter);
}
static inline struct bkey_s_c
__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags)
{
struct bkey_s_c k;
while (k = __bch2_btree_iter_peek(iter, flags),
bkey_err(k) == -EINTR)
bch2_trans_begin(trans);
return k;
}
#define for_each_btree_key(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = __bch2_btree_iter_peek(&(_iter), _flags), \
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \
for (; \
(_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
for (; \
(_k) = __bch2_btree_iter_peek(&(_iter), _flags), \
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
/* new multiple iterator interface: */
void bch2_dump_trans_paths_updates(struct btree_trans *);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
void bch2_trans_exit(struct btree_trans *); void bch2_trans_exit(struct btree_trans *);

View File

@ -1216,7 +1216,7 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
pos.snapshot++; pos.snapshot++;
for_each_btree_key(trans, iter, btree_id, pos, for_each_btree_key_norestart(trans, iter, btree_id, pos,
BTREE_ITER_ALL_SNAPSHOTS, k, ret) { BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (bkey_cmp(k.k->p, pos)) if (bkey_cmp(k.k->p, pos))
break; break;

View File

@ -432,7 +432,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
if (ret) if (ret)
return ret; return ret;
for_each_btree_key(trans, iter, BTREE_ID_dirents, for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents,
SPOS(dir.inum, 0, snapshot), 0, k, ret) { SPOS(dir.inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode > dir.inum) if (k.k->p.inode > dir.inum)
break; break;
@ -464,7 +464,7 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key(&trans, iter, BTREE_ID_dirents, for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) { SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
if (k.k->p.inode > inum.inum) if (k.k->p.inode > inum.inum)
break; break;

View File

@ -61,7 +61,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c r_k; struct bkey_s_c r_k;
for_each_btree_key(trans, iter, for_each_btree_key_norestart(trans, iter,
BTREE_ID_reflink, POS(0, idx + offset), BTREE_ID_reflink, POS(0, idx + offset),
BTREE_ITER_SLOTS, r_k, ret2) { BTREE_ITER_SLOTS, r_k, ret2) {
if (bkey_cmp(bkey_start_pos(r_k.k), if (bkey_cmp(bkey_start_pos(r_k.k),
@ -120,7 +120,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
bch2_trans_copy_iter(&copy, iter); bch2_trans_copy_iter(&copy, iter);
for_each_btree_key_continue(copy, 0, k, ret) { for_each_btree_key_continue_norestart(copy, 0, k, ret) {
unsigned offset = 0; unsigned offset = 0;
if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)

View File

@ -1815,7 +1815,7 @@ retry:
if (err) if (err)
goto err; goto err;
for_each_btree_key(&trans, iter, BTREE_ID_extents, for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
SPOS(inum.inum, offset, snapshot), SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, err) { BTREE_ITER_SLOTS, k, err) {
if (bkey_cmp(bkey_start_pos(k.k), POS(inum.inum, end)) >= 0) if (bkey_cmp(bkey_start_pos(k.k), POS(inum.inum, end)) >= 0)
@ -2208,7 +2208,7 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key(&trans, iter, BTREE_ID_extents, start, 0, k, ret) { for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, start, 0, k, ret) {
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break; break;
@ -3111,7 +3111,7 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key(&trans, iter, BTREE_ID_extents, for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot), 0, k, ret) { SPOS(inode->v.i_ino, offset >> 9, snapshot), 0, k, ret) {
if (k.k->p.inode != inode->v.i_ino) { if (k.k->p.inode != inode->v.i_ino) {
break; break;
@ -3218,7 +3218,7 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key(&trans, iter, BTREE_ID_extents, for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot), SPOS(inode->v.i_ino, offset >> 9, snapshot),
BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) { if (k.k->p.inode != inode->v.i_ino) {

View File

@ -216,7 +216,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
bch2_trans_copy_iter(&iter, extent_iter); bch2_trans_copy_iter(&iter, extent_iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) { for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, old, ret) {
s64 sectors = min(new->k.p.offset, old.k->p.offset) - s64 sectors = min(new->k.p.offset, old.k->p.offset) -
max(bkey_start_offset(&new->k), max(bkey_start_offset(&new->k),
bkey_start_offset(old.k)); bkey_start_offset(old.k));

View File

@ -131,7 +131,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
if (orig->k.type == KEY_TYPE_inline_data) if (orig->k.type == KEY_TYPE_inline_data)
bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data);
for_each_btree_key(trans, reflink_iter, BTREE_ID_reflink, for_each_btree_key_norestart(trans, reflink_iter, BTREE_ID_reflink,
POS(0, c->reflink_hint), POS(0, c->reflink_hint),
BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
if (reflink_iter.pos.inode) { if (reflink_iter.pos.inode) {
@ -194,7 +194,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
for_each_btree_key_continue(*iter, 0, k, ret) { for_each_btree_key_continue_norestart(*iter, 0, k, ret) {
if (bkey_cmp(iter->pos, end) >= 0) if (bkey_cmp(iter->pos, end) >= 0)
break; break;

View File

@ -156,7 +156,7 @@ bch2_hash_lookup(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
for_each_btree_key(trans, *iter, desc.btree_id, for_each_btree_key_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot), SPOS(inum.inum, desc.hash_key(info, key), snapshot),
BTREE_ITER_SLOTS|flags, k, ret) { BTREE_ITER_SLOTS|flags, k, ret) {
if (iter->pos.inode != inum.inum) if (iter->pos.inode != inum.inum)
@ -192,7 +192,7 @@ bch2_hash_hole(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
for_each_btree_key(trans, *iter, desc.btree_id, for_each_btree_key_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot), SPOS(inum.inum, desc.hash_key(info, key), snapshot),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (iter->pos.inode != inum.inum) if (iter->pos.inode != inum.inum)
@ -220,7 +220,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
bch2_btree_iter_advance(&iter); bch2_btree_iter_advance(&iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) { for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, k, ret) {
if (k.k->type != desc.key_type && if (k.k->type != desc.key_type &&
k.k->type != KEY_TYPE_hash_whiteout) k.k->type != KEY_TYPE_hash_whiteout)
break; break;
@ -253,7 +253,7 @@ int bch2_hash_set(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
for_each_btree_key(trans, iter, desc.btree_id, for_each_btree_key_norestart(trans, iter, desc.btree_id,
SPOS(inum.inum, SPOS(inum.inum,
desc.hash_bkey(info, bkey_i_to_s_c(insert)), desc.hash_bkey(info, bkey_i_to_s_c(insert)),
snapshot), snapshot),

View File

@ -295,7 +295,7 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key(&trans, iter, BTREE_ID_xattrs, for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs,
SPOS(inum, offset, snapshot), 0, k, ret) { SPOS(inum, offset, snapshot), 0, k, ret) {
BUG_ON(k.k->p.inode < inum); BUG_ON(k.k->p.inode < inum);