bcachefs: All held locks must be in a btree path

With the new deadlock cycle detector, it's critical that all held locks
be marked in a btree_path, because that's what the cycle detector
traverses - any locks that aren't correctly marked will cause deadlocks.

This changes the btree_path to allocate some btree_paths for the new
nodes, since until the final update is done we otherwise don't have a
path referencing them.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2022-09-16 14:42:38 -04:00
parent 367d72dd5f
commit e4215d0fec
3 changed files with 84 additions and 66 deletions

View File

@ -619,61 +619,6 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
return true;
}
/*
* Verify that iterator for parent node points to child node:
*/
static void btree_path_verify_new_node(struct btree_trans *trans,
struct btree_path *path, struct btree *b)
{
struct bch_fs *c = trans->c;
struct btree_path_level *l;
unsigned plevel;
bool parent_locked;
struct bkey_packed *k;
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
return;
if (trans->journal_replay_not_finished)
return;
plevel = b->c.level + 1;
if (!btree_path_node(path, plevel))
return;
parent_locked = btree_node_locked(path, plevel);
if (!bch2_btree_node_relock(trans, path, plevel))
return;
l = &path->l[plevel];
k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
if (!k ||
bkey_deleted(k) ||
bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
struct printbuf buf3 = PRINTBUF;
struct printbuf buf4 = PRINTBUF;
struct bkey uk = bkey_unpack_key(b, k);
bch2_dump_btree_node(c, l->b);
bch2_bpos_to_text(&buf1, path->pos);
bch2_bkey_to_text(&buf2, &uk);
bch2_bpos_to_text(&buf3, b->data->min_key);
bch2_bpos_to_text(&buf3, b->data->max_key);
panic("parent iter doesn't point to new node:\n"
"iter pos %s %s\n"
"iter key %s\n"
"new node %s-%s\n",
bch2_btree_ids[path->btree_id],
buf1.buf, buf2.buf, buf3.buf, buf4.buf);
}
if (!parent_locked)
btree_node_unlock(trans, path, plevel);
}
static inline void __btree_path_level_init(struct btree_path *path,
unsigned level)
{
@ -689,14 +634,12 @@ static inline void __btree_path_level_init(struct btree_path *path,
bch2_btree_node_iter_peek(&l->iter, l->b);
}
static inline void btree_path_level_init(struct btree_trans *trans,
struct btree_path *path,
struct btree *b)
inline void bch2_btree_path_level_init(struct btree_trans *trans,
struct btree_path *path,
struct btree *b)
{
BUG_ON(path->cached);
btree_path_verify_new_node(trans, path, b);
EBUG_ON(!btree_path_pos_in_node(path, b));
EBUG_ON(b->c.lock.state.seq & 1);
@ -728,7 +671,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
}
btree_path_level_init(trans, path, b);
bch2_btree_path_level_init(trans, path, b);
}
}
@ -809,7 +752,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
path->l[i].b = NULL;
mark_btree_node_locked(trans, path, path->level, lock_type);
btree_path_level_init(trans, path, b);
bch2_btree_path_level_init(trans, path, b);
return 0;
}
@ -982,7 +925,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
mark_btree_node_locked(trans, path, level, lock_type);
path->level = level;
btree_path_level_init(trans, path, b);
bch2_btree_path_level_init(trans, path, b);
bch2_btree_path_verify_locks(path);
err:

View File

@ -179,6 +179,9 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bke
struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
struct btree_iter *, struct bpos);
inline void bch2_btree_path_level_init(struct btree_trans *,
struct btree_path *, struct btree *);
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_trans_verify_paths(struct btree_trans *);
void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,

View File

@ -28,6 +28,22 @@ static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
struct keylist *, unsigned);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
enum btree_id btree_id,
unsigned level,
struct bpos pos)
{
struct btree_path *path;
path = bch2_path_get(trans, btree_id, pos, level + 1, level,
BTREE_ITER_NOPRESERVE|
BTREE_ITER_INTENT);
path = bch2_btree_path_make_mut(trans, path, true);
bch2_btree_path_downgrade(trans, path);
__bch2_btree_path_unlock(trans, path);
return path;
}
/* Debug code: */
/*
@ -618,7 +634,10 @@ static void btree_update_nodes_written(struct btree_update *as)
"error %i in btree_update_nodes_written()", ret);
err:
if (as->b) {
struct btree_path *path;
b = as->b;
path = get_unlocked_mut_path(&trans, as->btree_id, b->c.level, b->key.k.p);
/*
* @b is the node we did the final insert into:
*
@ -632,7 +651,12 @@ static void btree_update_nodes_written(struct btree_update *as)
*/
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
path->l[b->c.level].lock_seq = b->c.lock.state.seq;
path->l[b->c.level].b = b;
bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
@ -666,10 +690,13 @@ static void btree_update_nodes_written(struct btree_update *as)
}
mutex_unlock(&c->btree_interior_update_lock);
mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent);
six_unlock_write(&b->c.lock);
btree_node_write_if_need(c, b, SIX_LOCK_intent);
six_unlock_intent(&b->c.lock);
btree_node_unlock(&trans, path, b->c.level);
bch2_path_put(&trans, path, true);
}
bch2_journal_pin_drop(&c->journal, &as->journal);
@ -1428,6 +1455,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
struct bch_fs *c = as->c;
struct btree *parent = btree_node_parent(path, b);
struct btree *n1, *n2 = NULL, *n3 = NULL;
struct btree_path *path1 = NULL, *path2 = NULL;
u64 start_time = local_clock();
BUG_ON(!parent && (b != btree_node_root(c, b)));
@ -1450,6 +1478,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock);
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, path1, n1);
path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p);
six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path2, n2->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, path2, n2);
bch2_btree_update_add_new_node(as, n1);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
@ -1467,6 +1505,12 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
/* Depth increases, make a new root */
n3 = __btree_root_alloc(as, trans, b->c.level + 1);
path2->locks_want++;
BUG_ON(btree_node_locked(path2, n3->c.level));
six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path2, n3->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, path2, n3);
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
@ -1480,6 +1524,11 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock);
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, path1, n1);
bch2_btree_update_add_new_node(as, n1);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
@ -1526,6 +1575,15 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
six_unlock_intent(&n2->c.lock);
six_unlock_intent(&n1->c.lock);
if (path2) {
__bch2_btree_path_unlock(trans, path2);
bch2_path_put(trans, path2, true);
}
if (path1) {
__bch2_btree_path_unlock(trans, path1);
bch2_path_put(trans, path1, true);
}
bch2_trans_verify_locks(trans);
bch2_time_stats_update(&c->times[n2
@ -1642,7 +1700,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
enum btree_node_sibling sib)
{
struct bch_fs *c = trans->c;
struct btree_path *sib_path = NULL;
struct btree_path *sib_path = NULL, *new_path = NULL;
struct btree_update *as;
struct bkey_format_state new_s;
struct bkey_format new_f;
@ -1766,6 +1824,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, new_path, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bkey_init(&delete.k);
@ -1795,6 +1858,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
out:
err:
if (new_path)
bch2_path_put(trans, new_path, true);
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
return ret;
@ -1809,6 +1874,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_path *new_path = NULL;
struct btree *n, *parent;
struct btree_update *as;
int ret;
@ -1830,6 +1896,11 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, new_path, n);
trace_and_count(c, btree_node_rewrite, c, b);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
@ -1850,6 +1921,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as, trans);
bch2_path_put(trans, new_path, true);
out:
bch2_btree_path_downgrade(trans, iter->path);
return ret;