bcachefs: Fix a deadlock

Waiting on a btree node write with btree locks held can deadlock, if the
write errors: the write error path has to do do a btree update to drop
the pointer to the replica that errored.

The interior update path has to wait on in flight btree writes before
freeing nodes on disk. Previously, this was done in
bch2_btree_interior_update_will_free_node(), and could deadlock; now, we
just stash a pointer to the node and do it in
btree_update_nodes_written(), just prior to the transactional part of
the update.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-09-10 23:33:08 -04:00
parent 9f2772c454
commit ee7570546e
3 changed files with 27 additions and 7 deletions

View File

@ -1727,6 +1727,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
return;
if (old & (1 << BTREE_NODE_write_in_flight)) {
/*
* XXX waiting on btree writes with btree locks held -
* this can deadlock, and we hit the write error path
*/
btree_node_wait_on_io(b);
continue;
}

View File

@ -550,6 +550,22 @@ static void btree_update_nodes_written(struct btree_update *as)
BUG_ON(!journal_pin_active(&as->journal));
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk:
*/
for (i = 0; i < as->nr_old_nodes; i++) {
struct btree *old = as->old_nodes[i];
__le64 seq;
six_lock_read(&old->c.lock, NULL, NULL);
seq = old->data ? old->data->keys.seq : 0;
six_unlock_read(&old->c.lock);
if (seq == as->old_nodes_seq[i])
btree_node_wait_on_io(old);
}
/*
* We did an update to a parent node where the pointers we added pointed
* to child nodes that weren't written yet: now, the child nodes have
@ -889,13 +905,9 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
btree_update_will_delete_key(as, &b->key);
/*
* XXX: Waiting on io with btree node locks held, we don't want to be
* doing this. We can't have btree writes happening after the space has
* been freed, but we really only need to block before
* btree_update_nodes_written_trans() happens.
*/
btree_node_wait_on_io(b);
as->old_nodes[as->nr_old_nodes] = b;
as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
as->nr_old_nodes++;
}
void bch2_btree_update_done(struct btree_update *as)

View File

@ -92,6 +92,10 @@ struct btree_update {
struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
unsigned nr_new_nodes;
struct btree *old_nodes[BTREE_UPDATE_NODES_MAX];
__le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX];
unsigned nr_old_nodes;
open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX *
BCH_REPLICAS_MAX];
open_bucket_idx_t nr_open_buckets;