mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 14:50:19 +00:00
bcachefs: Run insert triggers before overwrite triggers
Currently, btree triggers are run in natural key order, which presents a problem for fallocate in INSERT_RANGE mode: since we're moving existing extents to higher offsets, the trigger for deleting the old extent runs before the trigger that adds the new extent, potentially leading to indirect extents being deleted that shouldn't be when the delete causes the refcount to hit 0. This changes the order we run triggers so that for a givin btree, we run all insert triggers before overwrite triggers, nicely sidestepping this issue. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
c714614bd0
commit
f0c3f88b35
@ -338,7 +338,8 @@ struct btree_insert_entry {
|
||||
enum btree_id btree_id:8;
|
||||
u8 level;
|
||||
bool cached:1;
|
||||
bool trans_triggers_run:1;
|
||||
bool insert_trigger_run:1;
|
||||
bool overwrite_trigger_run:1;
|
||||
struct bkey_i *k;
|
||||
struct btree_path *path;
|
||||
unsigned long ip_allocated;
|
||||
|
@ -816,10 +816,112 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
struct bkey _deleted = KEY(0, 0, 0);
|
||||
struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL };
|
||||
struct bkey_s_c old;
|
||||
struct bkey unpacked;
|
||||
struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates;
|
||||
bool trans_trigger_run;
|
||||
unsigned btree_id = 0;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being moved
|
||||
* (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
|
||||
* they are re-added.
|
||||
*/
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
while (btree_id_start < trans->updates + trans->nr_updates &&
|
||||
btree_id_start->btree_id < btree_id)
|
||||
btree_id_start++;
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (i = btree_id_start;
|
||||
i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
|
||||
i++) {
|
||||
if (i->insert_trigger_run ||
|
||||
(i->flags & BTREE_TRIGGER_NORUN) ||
|
||||
!(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
|
||||
continue;
|
||||
|
||||
BUG_ON(i->overwrite_trigger_run);
|
||||
|
||||
i->insert_trigger_run = true;
|
||||
trans_trigger_run = true;
|
||||
|
||||
old = bch2_btree_path_peek_slot(i->path, &unpacked);
|
||||
_deleted.p = i->path->pos;
|
||||
|
||||
if (old.k->type == i->k->k.type &&
|
||||
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
|
||||
i->overwrite_trigger_run = true;
|
||||
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(i->k),
|
||||
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|i->flags);
|
||||
} else {
|
||||
ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(i->k),
|
||||
BTREE_TRIGGER_INSERT|i->flags);
|
||||
}
|
||||
|
||||
if (ret == -EINTR)
|
||||
trace_trans_restart_mark(trans->ip, _RET_IP_,
|
||||
i->btree_id, &i->path->pos);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (i = btree_id_start;
|
||||
i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
|
||||
i++) {
|
||||
if (i->overwrite_trigger_run ||
|
||||
(i->flags & BTREE_TRIGGER_NORUN) ||
|
||||
!(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
|
||||
continue;
|
||||
|
||||
BUG_ON(!i->insert_trigger_run);
|
||||
|
||||
i->overwrite_trigger_run = true;
|
||||
trans_trigger_run = true;
|
||||
|
||||
old = bch2_btree_path_peek_slot(i->path, &unpacked);
|
||||
_deleted.p = i->path->pos;
|
||||
|
||||
ret = bch2_trans_mark_key(trans, old, deleted,
|
||||
BTREE_TRIGGER_OVERWRITE|i->flags);
|
||||
|
||||
if (ret == -EINTR)
|
||||
trace_trans_restart_mark(trans->ip, _RET_IP_,
|
||||
i->btree_id, &i->path->pos);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) &&
|
||||
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
|
||||
(!i->insert_trigger_run || !i->overwrite_trigger_run));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __bch2_trans_commit(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i = NULL;
|
||||
bool trans_trigger_run;
|
||||
unsigned u64s;
|
||||
int ret = 0;
|
||||
|
||||
@ -854,30 +956,9 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
i->btree_id, i->k->k.p);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
|
||||
!i->trans_triggers_run) {
|
||||
i->trans_triggers_run = true;
|
||||
trans_trigger_run = true;
|
||||
|
||||
ret = bch2_trans_mark_update(trans, i->path,
|
||||
i->k, i->flags);
|
||||
if (unlikely(ret)) {
|
||||
if (ret == -EINTR)
|
||||
trace_trans_restart_mark(trans->ip, _RET_IP_,
|
||||
i->btree_id, &i->path->pos);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
ret = bch2_trans_commit_run_triggers(trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
BUG_ON(!i->path->should_be_locked);
|
||||
@ -1297,7 +1378,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
|
||||
if (i < trans->updates + trans->nr_updates &&
|
||||
!btree_insert_entry_cmp(&n, i)) {
|
||||
BUG_ON(i->trans_triggers_run);
|
||||
BUG_ON(i->insert_trigger_run || i->overwrite_trigger_run);
|
||||
|
||||
/*
|
||||
* This is a hack to ensure that inode creates update the btree,
|
||||
|
@ -1882,41 +1882,6 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_trans_mark_update(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct bkey_i *new,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bkey _deleted = KEY(0, 0, 0);
|
||||
struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL };
|
||||
struct bkey_s_c old;
|
||||
struct bkey unpacked;
|
||||
int ret;
|
||||
|
||||
_deleted.p = path->pos;
|
||||
|
||||
if (unlikely(flags & BTREE_TRIGGER_NORUN))
|
||||
return 0;
|
||||
|
||||
if (!btree_node_type_needs_gc(path->btree_id))
|
||||
return 0;
|
||||
|
||||
old = bch2_btree_path_peek_slot(path, &unpacked);
|
||||
|
||||
if (old.k->type == new->k.type &&
|
||||
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
|
||||
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
|
||||
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
|
||||
} else {
|
||||
ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(new),
|
||||
BTREE_TRIGGER_INSERT|flags) ?:
|
||||
bch2_trans_mark_key(trans, old, deleted,
|
||||
BTREE_TRIGGER_OVERWRITE|flags);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca, size_t b,
|
||||
enum bch_data_type type,
|
||||
|
@ -233,8 +233,6 @@ int bch2_mark_update(struct btree_trans *, struct btree_path *,
|
||||
|
||||
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
|
||||
struct bkey_s_c, unsigned);
|
||||
int bch2_trans_mark_update(struct btree_trans *, struct btree_path *,
|
||||
struct bkey_i *, unsigned);
|
||||
void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
|
||||
|
||||
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
|
||||
|
Loading…
x
Reference in New Issue
Block a user