mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-18 06:15:12 +00:00
bcachefs: Deadlock cycle detector
We've outgrown our own deadlock avoidance strategy. The btree iterator API provides an interface where the user doesn't need to concern themselves with lock ordering - different btree iterators can be traversed in any order. Without special care, this will lead to deadlocks. Our previous strategy was to define a lock ordering internally, and whenever we attempt to take a lock and trylock() fails, we'd check if the current btree transaction is holding any locks that cause a lock ordering violation. If so, we'd issue a transaction restart, and then bch2_trans_begin() would re-traverse all previously used iterators, but in the correct order. That approach had some issues, though. - Sometimes we'd issue transaction restarts unnecessarily, when no deadlock would have actually occured. Lock ordering restarts have become our primary cause of transaction restarts, on some workloads totally 20% of actual transaction commits. - To avoid deadlock or livelock, we'd often have to take intent locks when we only wanted a read lock: with the lock ordering approach, it is actually illegal to hold _any_ read lock while blocking on an intent lock, and this has been causing us unnecessary lock contention. - It was getting fragile - the various lock ordering rules are not trivial, and we'd been seeing occasional livelock issues related to this machinery. So, since bcachefs is already a relational database masquerading as a filesystem, we're stealing the next traditional database technique and switching to a cycle detector for avoiding deadlocks. When we block taking a btree lock, after adding ourself to the waitlist but before sleeping, we do a DFS of btree transactions waiting on other btree transactions, starting with the current transaction and walking our held locks, and transactions blocking on our held locks. If we find a cycle, we emit a transaction restart. Occasionally (e.g. the btree split path) we can not allow the lock() operation to fail, so if necessary we'll tell another transaction that it has to fail. Result: trans_restart_would_deadlock events are reduced by a factor of 10 to 100, and we'll be able to delete a whole bunch of grotty, fragile code. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
62448afee7
commit
33bd5d0686
@ -1400,7 +1400,8 @@ struct bch_sb_field_disk_groups {
|
|||||||
x(trans_restart_key_cache_upgrade, 70) \
|
x(trans_restart_key_cache_upgrade, 70) \
|
||||||
x(trans_traverse_all, 71) \
|
x(trans_traverse_all, 71) \
|
||||||
x(transaction_commit, 72) \
|
x(transaction_commit, 72) \
|
||||||
x(write_super, 73)
|
x(write_super, 73) \
|
||||||
|
x(trans_restart_would_deadlock_recursion_limit, 74)
|
||||||
|
|
||||||
enum bch_persistent_counters {
|
enum bch_persistent_counters {
|
||||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||||
|
@ -2848,9 +2848,10 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
|
|||||||
trans->fn = fn;
|
trans->fn = fn;
|
||||||
trans->last_begin_time = ktime_get_ns();
|
trans->last_begin_time = ktime_get_ns();
|
||||||
trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
|
trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
|
||||||
trans->task = current;
|
trans->locking_wait.task = current;
|
||||||
trans->journal_replay_not_finished =
|
trans->journal_replay_not_finished =
|
||||||
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
||||||
|
closure_init_stack(&trans->ref);
|
||||||
|
|
||||||
bch2_trans_alloc_paths(trans, c);
|
bch2_trans_alloc_paths(trans, c);
|
||||||
|
|
||||||
@ -2877,7 +2878,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
|
|||||||
|
|
||||||
mutex_lock(&c->btree_trans_lock);
|
mutex_lock(&c->btree_trans_lock);
|
||||||
list_for_each_entry(pos, &c->btree_trans_list, list) {
|
list_for_each_entry(pos, &c->btree_trans_list, list) {
|
||||||
if (trans->task->pid < pos->task->pid) {
|
if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
|
||||||
list_add_tail(&trans->list, &pos->list);
|
list_add_tail(&trans->list, &pos->list);
|
||||||
goto list_add_done;
|
goto list_add_done;
|
||||||
}
|
}
|
||||||
@ -2919,6 +2920,8 @@ void bch2_trans_exit(struct btree_trans *trans)
|
|||||||
|
|
||||||
bch2_trans_unlock(trans);
|
bch2_trans_unlock(trans);
|
||||||
|
|
||||||
|
closure_sync(&trans->ref);
|
||||||
|
|
||||||
if (s)
|
if (s)
|
||||||
s->max_mem = max(s->max_mem, trans->mem_max);
|
s->max_mem = max(s->max_mem, trans->mem_max);
|
||||||
|
|
||||||
@ -2997,7 +3000,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
|||||||
static char lock_types[] = { 'r', 'i', 'w' };
|
static char lock_types[] = { 'r', 'i', 'w' };
|
||||||
unsigned l;
|
unsigned l;
|
||||||
|
|
||||||
prt_printf(out, "%i %s\n", trans->task->pid, trans->fn);
|
prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn);
|
||||||
|
|
||||||
trans_for_each_path(trans, path) {
|
trans_for_each_path(trans, path) {
|
||||||
if (!path->nodes_locked)
|
if (!path->nodes_locked)
|
||||||
@ -3029,7 +3032,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
|||||||
trans->locking_path_idx,
|
trans->locking_path_idx,
|
||||||
path->cached ? 'c' : 'b',
|
path->cached ? 'c' : 'b',
|
||||||
trans->locking_level,
|
trans->locking_level,
|
||||||
lock_types[trans->locking_lock_type],
|
lock_types[trans->locking_wait.lock_want],
|
||||||
bch2_btree_ids[trans->locking_btree_id]);
|
bch2_btree_ids[trans->locking_btree_id]);
|
||||||
bch2_bpos_to_text(out, trans->locking_pos);
|
bch2_bpos_to_text(out, trans->locking_pos);
|
||||||
|
|
||||||
|
@ -81,11 +81,14 @@ __trans_next_path(struct btree_trans *trans, unsigned idx)
|
|||||||
return &trans->paths[idx];
|
return &trans->paths[idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
#define trans_for_each_path(_trans, _path) \
|
#define trans_for_each_path_from(_trans, _path, _start) \
|
||||||
for (_path = __trans_next_path((_trans), 0); \
|
for (_path = __trans_next_path((_trans), _start); \
|
||||||
(_path); \
|
(_path); \
|
||||||
_path = __trans_next_path((_trans), (_path)->idx + 1))
|
_path = __trans_next_path((_trans), (_path)->idx + 1))
|
||||||
|
|
||||||
|
#define trans_for_each_path(_trans, _path) \
|
||||||
|
trans_for_each_path_from(_trans, _path, 0)
|
||||||
|
|
||||||
static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
|
static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
|
||||||
{
|
{
|
||||||
unsigned idx = path ? path->sorted_idx + 1 : 0;
|
unsigned idx = path ? path->sorted_idx + 1 : 0;
|
||||||
|
@ -52,10 +52,248 @@ void bch2_btree_node_unlock_write(struct btree_trans *trans,
|
|||||||
|
|
||||||
/* lock */
|
/* lock */
|
||||||
|
|
||||||
void __bch2_btree_node_lock_write(struct btree_trans *trans,
|
/*
|
||||||
struct btree_bkey_cached_common *b)
|
* @trans wants to lock @b with type @type
|
||||||
|
*/
|
||||||
|
struct trans_waiting_for_lock {
|
||||||
|
struct btree_trans *trans;
|
||||||
|
struct btree_bkey_cached_common *node_want;
|
||||||
|
enum six_lock_type lock_want;
|
||||||
|
|
||||||
|
/* for iterating over held locks :*/
|
||||||
|
u8 path_idx;
|
||||||
|
u8 level;
|
||||||
|
u64 lock_start_time;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct lock_graph {
|
||||||
|
struct trans_waiting_for_lock g[8];
|
||||||
|
unsigned nr;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void lock_graph_pop(struct lock_graph *g)
|
||||||
|
{
|
||||||
|
closure_put(&g->g[--g->nr].trans->ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (i == g->g) {
|
||||||
|
ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||||
|
} else {
|
||||||
|
i->trans->lock_must_abort = true;
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||||
|
wake_up_process(i->trans->locking_wait.task);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline int break_cycle(struct lock_graph *g)
|
||||||
|
{
|
||||||
|
struct trans_waiting_for_lock *i;
|
||||||
|
|
||||||
|
for (i = g->g; i < g->g + g->nr; i++) {
|
||||||
|
if (i->trans->lock_may_not_fail ||
|
||||||
|
i->trans->locking_wait.lock_want == SIX_LOCK_write)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
return abort_lock(g, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = g->g; i < g->g + g->nr; i++) {
|
||||||
|
if (i->trans->lock_may_not_fail ||
|
||||||
|
!i->trans->in_traverse_all)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
return abort_lock(g, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = g->g; i < g->g + g->nr; i++) {
|
||||||
|
if (i->trans->lock_may_not_fail)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
return abort_lock(g, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans)
|
||||||
|
{
|
||||||
|
struct btree_trans *orig_trans = g->g->trans;
|
||||||
|
struct trans_waiting_for_lock *i;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
for (i = g->g; i < g->g + g->nr; i++) {
|
||||||
|
if (i->trans->locking != i->node_want)
|
||||||
|
while (g->g + g->nr >= i) {
|
||||||
|
lock_graph_pop(g);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i->trans == trans) {
|
||||||
|
ret = break_cycle(g);
|
||||||
|
if (ret)
|
||||||
|
goto deadlock;
|
||||||
|
/*
|
||||||
|
* If we didn't abort (instead telling another
|
||||||
|
* transaction to abort), keep checking:
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g->nr == ARRAY_SIZE(g->g)) {
|
||||||
|
if (orig_trans->lock_may_not_fail)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
|
||||||
|
ret = btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
|
||||||
|
goto deadlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
closure_get(&trans->ref);
|
||||||
|
|
||||||
|
g->g[g->nr++] = (struct trans_waiting_for_lock) {
|
||||||
|
.trans = trans,
|
||||||
|
.node_want = trans->locking,
|
||||||
|
.lock_want = trans->locking_wait.lock_want,
|
||||||
|
};
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
deadlock:
|
||||||
|
while (g->nr)
|
||||||
|
lock_graph_pop(g);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
static void print_cycle(struct printbuf *out, struct lock_graph *g)
|
||||||
|
{
|
||||||
|
struct trans_waiting_for_lock *i;
|
||||||
|
|
||||||
|
prt_str(out, "Found lock cycle:");
|
||||||
|
prt_newline(out);
|
||||||
|
|
||||||
|
for (i = g->g; i < g->g + g->nr; i++)
|
||||||
|
bch2_btree_trans_to_text(out, i->trans);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static noinline void lock_graph_remove_non_waiters(struct lock_graph *g)
|
||||||
|
{
|
||||||
|
struct trans_waiting_for_lock *i;
|
||||||
|
|
||||||
|
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||||
|
if (i->trans->locking != i->node_want ||
|
||||||
|
i->trans->locking_wait.start_time != i[-1].lock_start_time) {
|
||||||
|
while (g->g + g->nr >= i)
|
||||||
|
lock_graph_pop(g);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
|
||||||
|
{
|
||||||
|
return t1 + t2 > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_for_deadlock(struct btree_trans *trans)
|
||||||
|
{
|
||||||
|
struct lock_graph g;
|
||||||
|
struct trans_waiting_for_lock *top;
|
||||||
|
struct btree_bkey_cached_common *b;
|
||||||
|
struct btree_path *path;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (trans->lock_must_abort)
|
||||||
|
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||||
|
|
||||||
|
g.nr = 0;
|
||||||
|
ret = lock_graph_descend(&g, trans);
|
||||||
|
BUG_ON(ret);
|
||||||
|
next:
|
||||||
|
if (!g.nr)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
top = &g.g[g.nr - 1];
|
||||||
|
|
||||||
|
trans_for_each_path_from(top->trans, path, top->path_idx) {
|
||||||
|
if (!path->nodes_locked)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (top->path_idx != path->idx) {
|
||||||
|
top->path_idx = path->idx;
|
||||||
|
top->level = 0;
|
||||||
|
top->lock_start_time = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;
|
||||||
|
top->level < BTREE_MAX_DEPTH;
|
||||||
|
top->level++, top->lock_start_time = 0) {
|
||||||
|
int lock_held = btree_node_locked_type(path, top->level);
|
||||||
|
|
||||||
|
if (lock_held == BTREE_NODE_UNLOCKED)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
b = &READ_ONCE(path->l[top->level].b)->c;
|
||||||
|
|
||||||
|
if (unlikely(IS_ERR_OR_NULL(b))) {
|
||||||
|
lock_graph_remove_non_waiters(&g);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_empty_careful(&b->lock.wait_list))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
raw_spin_lock(&b->lock.wait_lock);
|
||||||
|
list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) {
|
||||||
|
BUG_ON(b != trans->locking);
|
||||||
|
|
||||||
|
if (top->lock_start_time &&
|
||||||
|
time_after_eq64(top->lock_start_time, trans->locking_wait.start_time))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
top->lock_start_time = trans->locking_wait.start_time;
|
||||||
|
|
||||||
|
/* Don't check for self deadlock: */
|
||||||
|
if (trans == top->trans ||
|
||||||
|
!lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret = lock_graph_descend(&g, trans);
|
||||||
|
raw_spin_unlock(&b->lock.wait_lock);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
return ret < 0 ? ret : 0;
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
}
|
||||||
|
raw_spin_unlock(&b->lock.wait_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lock_graph_pop(&g);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
|
||||||
|
{
|
||||||
|
struct btree_trans *trans = p;
|
||||||
|
|
||||||
|
return check_for_deadlock(trans);
|
||||||
|
}
|
||||||
|
|
||||||
|
int __bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||||
|
struct btree_bkey_cached_common *b,
|
||||||
|
bool lock_may_not_fail)
|
||||||
{
|
{
|
||||||
int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
|
int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
|
||||||
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Must drop our read locks before calling six_lock_write() -
|
* Must drop our read locks before calling six_lock_write() -
|
||||||
@ -64,8 +302,10 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans,
|
|||||||
* locked:
|
* locked:
|
||||||
*/
|
*/
|
||||||
six_lock_readers_add(&b->lock, -readers);
|
six_lock_readers_add(&b->lock, -readers);
|
||||||
btree_node_lock_nopath_nofail(trans, b, SIX_LOCK_write);
|
ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, lock_may_not_fail);
|
||||||
six_lock_readers_add(&b->lock, readers);
|
six_lock_readers_add(&b->lock, readers);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool path_has_read_locks(struct btree_path *path)
|
static inline bool path_has_read_locks(struct btree_path *path)
|
||||||
|
@ -183,22 +183,41 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
|
|||||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||||
struct btree_path *, struct btree *);
|
struct btree_path *, struct btree *);
|
||||||
|
|
||||||
|
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
|
||||||
|
|
||||||
/* lock: */
|
/* lock: */
|
||||||
|
|
||||||
|
static inline int __btree_node_lock_nopath(struct btree_trans *trans,
|
||||||
|
struct btree_bkey_cached_common *b,
|
||||||
|
enum six_lock_type type,
|
||||||
|
bool lock_may_not_fail)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
trans->lock_may_not_fail = lock_may_not_fail;
|
||||||
|
trans->locking = b;
|
||||||
|
trans->lock_must_abort = false;
|
||||||
|
|
||||||
|
ret = six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
|
||||||
|
bch2_six_check_for_deadlock, trans);
|
||||||
|
WRITE_ONCE(trans->locking, NULL);
|
||||||
|
WRITE_ONCE(trans->locking_wait.start_time, 0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int __must_check
|
static inline int __must_check
|
||||||
btree_node_lock_nopath(struct btree_trans *trans,
|
btree_node_lock_nopath(struct btree_trans *trans,
|
||||||
struct btree_bkey_cached_common *b,
|
struct btree_bkey_cached_common *b,
|
||||||
enum six_lock_type type)
|
enum six_lock_type type)
|
||||||
{
|
{
|
||||||
six_lock_type(&b->lock, type, NULL, NULL);
|
return __btree_node_lock_nopath(trans, b, type, false);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
|
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
|
||||||
struct btree_bkey_cached_common *b,
|
struct btree_bkey_cached_common *b,
|
||||||
enum six_lock_type type)
|
enum six_lock_type type)
|
||||||
{
|
{
|
||||||
int ret = btree_node_lock_nopath(trans, b, type);
|
int ret = __btree_node_lock_nopath(trans, b, type, true);
|
||||||
|
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
@ -210,8 +229,6 @@ static inline int btree_node_lock_type(struct btree_trans *trans,
|
|||||||
enum six_lock_type type,
|
enum six_lock_type type,
|
||||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (six_trylock_type(&b->lock, type))
|
if (six_trylock_type(&b->lock, type))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -219,11 +236,10 @@ static inline int btree_node_lock_type(struct btree_trans *trans,
|
|||||||
trans->locking_pos = pos;
|
trans->locking_pos = pos;
|
||||||
trans->locking_btree_id = path->btree_id;
|
trans->locking_btree_id = path->btree_id;
|
||||||
trans->locking_level = level;
|
trans->locking_level = level;
|
||||||
trans->locking_lock_type = type;
|
trans->lock_may_not_fail = false;
|
||||||
trans->locking = b;
|
trans->locking = b;
|
||||||
ret = six_lock_type(&b->lock, type, should_sleep_fn, p);
|
return six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
|
||||||
trans->locking = NULL;
|
bch2_six_check_for_deadlock, trans);
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -279,12 +295,15 @@ static inline int btree_node_lock(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *);
|
int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *, bool);
|
||||||
|
|
||||||
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
static inline int __btree_node_lock_write(struct btree_trans *trans,
|
||||||
struct btree_path *path,
|
struct btree_path *path,
|
||||||
struct btree_bkey_cached_common *b)
|
struct btree_bkey_cached_common *b,
|
||||||
|
bool lock_may_not_fail)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
EBUG_ON(&path->l[b->level].b->c != b);
|
EBUG_ON(&path->l[b->level].b->c != b);
|
||||||
EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
|
EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
|
||||||
EBUG_ON(!btree_node_intent_locked(path, b->level));
|
EBUG_ON(!btree_node_intent_locked(path, b->level));
|
||||||
@ -296,8 +315,21 @@ static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
|||||||
*/
|
*/
|
||||||
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
|
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
|
||||||
|
|
||||||
if (unlikely(!six_trylock_write(&b->lock)))
|
ret = likely(six_trylock_write(&b->lock))
|
||||||
__bch2_btree_node_lock_write(trans, b);
|
? 0
|
||||||
|
: __bch2_btree_node_lock_write(trans, b, lock_may_not_fail);
|
||||||
|
if (ret)
|
||||||
|
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||||
|
struct btree_path *path,
|
||||||
|
struct btree_bkey_cached_common *b)
|
||||||
|
{
|
||||||
|
int ret = __btree_node_lock_write(trans, path, b, true);
|
||||||
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __must_check
|
static inline int __must_check
|
||||||
@ -305,8 +337,7 @@ bch2_btree_node_lock_write(struct btree_trans *trans,
|
|||||||
struct btree_path *path,
|
struct btree_path *path,
|
||||||
struct btree_bkey_cached_common *b)
|
struct btree_bkey_cached_common *b)
|
||||||
{
|
{
|
||||||
bch2_btree_node_lock_write_nofail(trans, path, b);
|
return __btree_node_lock_write(trans, path, b, false);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* relock: */
|
/* relock: */
|
||||||
|
@ -387,15 +387,19 @@ struct btree_trans_commit_hook {
|
|||||||
struct btree_trans {
|
struct btree_trans {
|
||||||
struct bch_fs *c;
|
struct bch_fs *c;
|
||||||
const char *fn;
|
const char *fn;
|
||||||
|
struct closure ref;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
u64 last_begin_time;
|
u64 last_begin_time;
|
||||||
struct btree_bkey_cached_common *locking;
|
|
||||||
unsigned locking_path_idx;
|
unsigned locking_path_idx;
|
||||||
struct bpos locking_pos;
|
struct bpos locking_pos;
|
||||||
u8 locking_btree_id;
|
u8 locking_btree_id;
|
||||||
u8 locking_level;
|
u8 locking_level;
|
||||||
u8 locking_lock_type;
|
u8 lock_may_not_fail;
|
||||||
struct task_struct *task;
|
u8 lock_must_abort;
|
||||||
|
struct btree_bkey_cached_common *locking;
|
||||||
|
struct six_lock_waiter locking_wait;
|
||||||
|
|
||||||
int srcu_idx;
|
int srcu_idx;
|
||||||
|
|
||||||
u8 fn_idx;
|
u8 fn_idx;
|
||||||
|
@ -534,7 +534,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
|||||||
|
|
||||||
mutex_lock(&c->btree_trans_lock);
|
mutex_lock(&c->btree_trans_lock);
|
||||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||||
if (trans->task->pid <= i->iter)
|
if (trans->locking_wait.task->pid <= i->iter)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ret = flush_buf(i);
|
ret = flush_buf(i);
|
||||||
@ -546,11 +546,11 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
|||||||
prt_printf(&i->buf, "backtrace:");
|
prt_printf(&i->buf, "backtrace:");
|
||||||
prt_newline(&i->buf);
|
prt_newline(&i->buf);
|
||||||
printbuf_indent_add(&i->buf, 2);
|
printbuf_indent_add(&i->buf, 2);
|
||||||
prt_backtrace(&i->buf, trans->task);
|
prt_backtrace(&i->buf, trans->locking_wait.task);
|
||||||
printbuf_indent_sub(&i->buf, 2);
|
printbuf_indent_sub(&i->buf, 2);
|
||||||
prt_newline(&i->buf);
|
prt_newline(&i->buf);
|
||||||
|
|
||||||
i->iter = trans->task->pid;
|
i->iter = trans->locking_wait.task->pid;
|
||||||
}
|
}
|
||||||
mutex_unlock(&c->btree_trans_lock);
|
mutex_unlock(&c->btree_trans_lock);
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \
|
x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \
|
||||||
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \
|
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \
|
||||||
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
|
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
|
||||||
|
x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\
|
||||||
x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
|
x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
|
||||||
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
|
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
|
||||||
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
|
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
|
||||||
|
@ -1065,6 +1065,12 @@ TRACE_EVENT(trans_restart_would_deadlock,
|
|||||||
__entry->want_pos_snapshot)
|
__entry->want_pos_snapshot)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit,
|
||||||
|
TP_PROTO(struct btree_trans *trans,
|
||||||
|
unsigned long caller_ip),
|
||||||
|
TP_ARGS(trans, caller_ip)
|
||||||
|
);
|
||||||
|
|
||||||
TRACE_EVENT(trans_restart_would_deadlock_write,
|
TRACE_EVENT(trans_restart_would_deadlock_write,
|
||||||
TP_PROTO(struct btree_trans *trans),
|
TP_PROTO(struct btree_trans *trans),
|
||||||
TP_ARGS(trans),
|
TP_ARGS(trans),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user