mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-08 14:23:19 +00:00
bcachefs: New locking functions
In the future, with the new deadlock cycle detector, we won't be using bare six_lock_* anymore: lock wait entries will all be embedded in btree_trans, and we will need a btree_trans context whenever locking a btree node. This patch plumbs a btree_trans to the few places that need it, and adds two new locking functions - btree_node_lock_nopath, which may fail returning a transaction restart, and - btree_node_lock_nopath_nofail, to be used in places where we know we cannot deadlock (i.e. because we're holding no other locks). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
546180874a
commit
ca7d8fcabf
@ -959,12 +959,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
|
||||
return b;
|
||||
}
|
||||
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
|
||||
struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
|
||||
const struct bkey_i *k,
|
||||
enum btree_id btree_id,
|
||||
unsigned level,
|
||||
bool nofill)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
struct bset_tree *t;
|
||||
@ -998,9 +999,14 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
|
||||
goto out;
|
||||
} else {
|
||||
lock_node:
|
||||
ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
|
||||
if (ret)
|
||||
goto retry;
|
||||
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
|
||||
goto retry;
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
|
||||
b->c.btree_id != btree_id ||
|
||||
@ -1062,8 +1068,9 @@ int bch2_btree_node_prefetch(struct bch_fs *c,
|
||||
return PTR_ERR_OR_ZERO(b);
|
||||
}
|
||||
|
||||
void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
|
||||
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
@ -1079,8 +1086,8 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
|
||||
__bch2_btree_node_wait_on_read(b);
|
||||
__bch2_btree_node_wait_on_write(b);
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
|
@ -26,13 +26,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
|
||||
const struct bkey_i *, unsigned,
|
||||
enum six_lock_type, unsigned long);
|
||||
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
|
||||
struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
|
||||
enum btree_id, unsigned, bool);
|
||||
|
||||
int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
|
||||
const struct bkey_i *, enum btree_id, unsigned);
|
||||
|
||||
void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
|
||||
void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *);
|
||||
|
@ -165,10 +165,11 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_btree_node_update_key_early(struct bch_fs *c,
|
||||
static void bch2_btree_node_update_key_early(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_i *new)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b;
|
||||
struct bkey_buf tmp;
|
||||
int ret;
|
||||
@ -176,7 +177,7 @@ static void bch2_btree_node_update_key_early(struct bch_fs *c,
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
bch2_bkey_buf_reassemble(&tmp, c, old);
|
||||
|
||||
b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true);
|
||||
b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
|
||||
if (!IS_ERR_OR_NULL(b)) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
|
||||
@ -352,8 +353,9 @@ static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf prev_k, cur_k;
|
||||
@ -378,7 +380,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
bch2_bkey_buf_reassemble(&cur_k, c, k);
|
||||
|
||||
cur = bch2_btree_node_get_noiter(c, cur_k.k,
|
||||
cur = bch2_btree_node_get_noiter(trans, cur_k.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(cur);
|
||||
@ -392,7 +394,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
bch2_btree_ids[b->c.btree_id],
|
||||
b->c.level - 1,
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(c, cur_k.k);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
cur = NULL;
|
||||
@ -411,7 +413,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
six_unlock_read(&cur->c.lock);
|
||||
bch2_btree_node_evict(c, cur_k.k);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
cur = NULL;
|
||||
@ -425,7 +427,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
prev = NULL;
|
||||
|
||||
if (ret == DROP_PREV_NODE) {
|
||||
bch2_btree_node_evict(c, prev_k.k);
|
||||
bch2_btree_node_evict(trans, prev_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, prev_k.k->k.p);
|
||||
if (ret)
|
||||
@ -465,7 +467,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
bch2_bkey_buf_reassemble(&cur_k, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
cur = bch2_btree_node_get_noiter(c, cur_k.k,
|
||||
cur = bch2_btree_node_get_noiter(trans, cur_k.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(cur);
|
||||
@ -476,12 +478,12 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_repair_topology_recurse(c, cur);
|
||||
ret = bch2_btree_repair_topology_recurse(trans, cur);
|
||||
six_unlock_read(&cur->c.lock);
|
||||
cur = NULL;
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
bch2_btree_node_evict(c, cur_k.k);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
dropped_children = true;
|
||||
@ -522,17 +524,20 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
|
||||
static int bch2_repair_topology(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR && !ret; i++) {
|
||||
b = c->btree_roots[i].b;
|
||||
if (btree_node_fake(b))
|
||||
continue;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
ret = bch2_btree_repair_topology_recurse(c, b);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
ret = bch2_btree_repair_topology_recurse(&trans, b);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
@ -541,13 +546,16 @@ static int bch2_repair_topology(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned level, bool is_root,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p = { 0 };
|
||||
@ -747,7 +755,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
}
|
||||
|
||||
if (level)
|
||||
bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new);
|
||||
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
|
||||
|
||||
if (c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
@ -788,7 +796,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
BUG_ON(bch2_journal_seq_verify &&
|
||||
k->k->version.lo > atomic64_read(&c->journal.seq));
|
||||
|
||||
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
|
||||
ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -941,7 +949,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
child = bch2_btree_node_get_noiter(c, cur.k,
|
||||
child = bch2_btree_node_get_noiter(trans, cur.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
|
@ -1652,9 +1652,15 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
struct btree_trans trans;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
__btree_node_write_done(c, b);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
}
|
||||
|
||||
static void btree_node_write_work(struct work_struct *work)
|
||||
|
@ -95,25 +95,14 @@ static void bkey_cached_free(struct btree_key_cache *bc,
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
}
|
||||
|
||||
static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
struct btree_key_cache_freelist *f;
|
||||
bool freed = false;
|
||||
|
||||
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
|
||||
|
||||
ck->btree_trans_barrier_seq =
|
||||
start_poll_synchronize_srcu(&c->btree_trans_barrier);
|
||||
|
||||
list_del_init(&ck->list);
|
||||
atomic_long_inc(&bc->nr_freed);
|
||||
|
||||
kfree(ck->k);
|
||||
ck->k = NULL;
|
||||
ck->u64s = 0;
|
||||
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
@ -138,13 +127,32 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
list_move_tail(&ck->list, &bc->freed);
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
|
||||
ck->btree_trans_barrier_seq =
|
||||
start_poll_synchronize_srcu(&c->btree_trans_barrier);
|
||||
|
||||
list_del_init(&ck->list);
|
||||
atomic_long_inc(&bc->nr_freed);
|
||||
|
||||
kfree(ck->k);
|
||||
ck->k = NULL;
|
||||
ck->u64s = 0;
|
||||
|
||||
bkey_cached_move_to_freelist(bc, ck);
|
||||
|
||||
six_unlock_write(&ck->c.lock);
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
bkey_cached_alloc(struct btree_key_cache *c)
|
||||
bkey_cached_alloc(struct btree_trans *trans,
|
||||
struct btree_key_cache *c)
|
||||
{
|
||||
struct bkey_cached *ck = NULL;
|
||||
struct btree_key_cache_freelist *f;
|
||||
@ -173,8 +181,21 @@ bkey_cached_alloc(struct btree_key_cache *c)
|
||||
}
|
||||
|
||||
if (ck) {
|
||||
six_lock_intent(&ck->c.lock, NULL, NULL);
|
||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||
int ret;
|
||||
|
||||
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent);
|
||||
if (unlikely(ret)) {
|
||||
bkey_cached_move_to_freelist(c, ck);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_write);
|
||||
if (unlikely(ret)) {
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
bkey_cached_move_to_freelist(c, ck);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return ck;
|
||||
}
|
||||
|
||||
@ -216,15 +237,18 @@ bkey_cached_reuse(struct btree_key_cache *c)
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
btree_key_cache_create(struct bch_fs *c,
|
||||
btree_key_cache_create(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck;
|
||||
bool was_new = true;
|
||||
|
||||
ck = bkey_cached_alloc(bc);
|
||||
ck = bkey_cached_alloc(trans, bc);
|
||||
if (unlikely(IS_ERR(ck)))
|
||||
return ck;
|
||||
|
||||
if (unlikely(!ck)) {
|
||||
ck = bkey_cached_reuse(bc);
|
||||
@ -370,7 +394,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
|
||||
retry:
|
||||
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
|
||||
if (!ck) {
|
||||
ck = btree_key_cache_create(c, path->btree_id, path->pos);
|
||||
ck = btree_key_cache_create(trans, path->btree_id, path->pos);
|
||||
ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -519,10 +543,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
evict:
|
||||
BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
|
||||
|
||||
mark_btree_node_unlocked(c_iter.path, 0);
|
||||
c_iter.path->l[0].b = NULL;
|
||||
/*
|
||||
* XXX: holding a lock that is not marked in btree_trans, not
|
||||
* ideal:
|
||||
*/
|
||||
six_lock_increment(&ck->c.lock, SIX_LOCK_intent);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||
/* Will not fail because we are holding no other locks: */
|
||||
btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_write);
|
||||
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||
@ -546,11 +575,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
struct bkey_cached *ck =
|
||||
container_of(pin, struct bkey_cached, journal);
|
||||
struct bkey_cached_key key;
|
||||
struct btree_trans trans;
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
int ret = 0;
|
||||
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
six_lock_read(&ck->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
|
||||
key = ck->key;
|
||||
|
||||
if (ck->journal.seq != seq ||
|
||||
@ -567,12 +598,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
}
|
||||
six_unlock_read(&ck->c.lock);
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = commit_do(&trans, NULL, NULL, 0,
|
||||
btree_key_cache_flush_pos(&trans, key, seq,
|
||||
BTREE_INSERT_JOURNAL_RECLAIM, false));
|
||||
unlock:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,7 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
|
||||
* locked:
|
||||
*/
|
||||
six_lock_readers_add(&b->c.lock, -readers);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
six_lock_readers_add(&b->c.lock, readers);
|
||||
}
|
||||
|
||||
|
@ -185,6 +185,24 @@ void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
|
||||
/* lock: */
|
||||
|
||||
static inline int __must_check
|
||||
btree_node_lock_nopath(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
six_lock_type(&b->lock, type, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
int ret = btree_node_lock_nopath(trans, b, type);
|
||||
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline int btree_node_lock_type(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
|
@ -169,7 +169,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
|
||||
BUG_ON(path->l[b->c.level].b == b &&
|
||||
path->l[b->c.level].lock_seq == b->c.lock.state.seq);
|
||||
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__btree_node_free(c, b);
|
||||
@ -259,7 +259,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
return b;
|
||||
}
|
||||
|
||||
static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
|
||||
static struct btree *bch2_btree_node_alloc(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
unsigned level)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *b;
|
||||
@ -271,8 +273,8 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
|
||||
b = p->b[--p->nr];
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
set_btree_node_accessed(b);
|
||||
set_btree_node_dirty_acct(c, b);
|
||||
@ -323,12 +325,13 @@ static void btree_set_max(struct btree *b, struct bpos pos)
|
||||
}
|
||||
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
struct btree *b,
|
||||
struct bkey_format format)
|
||||
{
|
||||
struct btree *n;
|
||||
|
||||
n = bch2_btree_node_alloc(as, b->c.level);
|
||||
n = bch2_btree_node_alloc(as, trans, b->c.level);
|
||||
|
||||
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
|
||||
|
||||
@ -347,6 +350,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
}
|
||||
|
||||
static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
struct btree *b)
|
||||
{
|
||||
struct bkey_format new_f = bch2_btree_calc_format(b);
|
||||
@ -358,12 +362,13 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
if (!bch2_btree_node_format_fits(as->c, b, &new_f))
|
||||
new_f = b->format;
|
||||
|
||||
return __bch2_btree_node_alloc_replacement(as, b, new_f);
|
||||
return __bch2_btree_node_alloc_replacement(as, trans, b, new_f);
|
||||
}
|
||||
|
||||
static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
|
||||
static struct btree *__btree_root_alloc(struct btree_update *as,
|
||||
struct btree_trans *trans, unsigned level)
|
||||
{
|
||||
struct btree *b = bch2_btree_node_alloc(as, level);
|
||||
struct btree *b = bch2_btree_node_alloc(as, trans, level);
|
||||
|
||||
btree_set_min(b, POS_MIN);
|
||||
btree_set_max(b, SPOS_MAX);
|
||||
@ -378,7 +383,7 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
|
||||
return b;
|
||||
}
|
||||
|
||||
static void bch2_btree_reserve_put(struct btree_update *as)
|
||||
static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct prealloc_nodes *p;
|
||||
@ -405,8 +410,8 @@ static void bch2_btree_reserve_put(struct btree_update *as)
|
||||
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
__btree_node_free(c, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
@ -460,7 +465,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
|
||||
|
||||
/* Asynchronous interior node update machinery */
|
||||
|
||||
static void bch2_btree_update_free(struct btree_update *as)
|
||||
static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
@ -473,7 +478,7 @@ static void bch2_btree_update_free(struct btree_update *as)
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
bch2_journal_pin_flush(&c->journal, &as->journal);
|
||||
bch2_disk_reservation_put(c, &as->disk_res);
|
||||
bch2_btree_reserve_put(as);
|
||||
bch2_btree_reserve_put(as, trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
|
||||
as->start_time);
|
||||
@ -551,12 +556,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
static void btree_update_nodes_written(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *b = as->b;
|
||||
struct btree *b;
|
||||
struct btree_trans trans;
|
||||
u64 journal_seq = 0;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 512);
|
||||
/*
|
||||
* If we're already in an error state, it might be because a btree node
|
||||
* was never written, and we might be trying to free that same btree
|
||||
@ -573,15 +579,16 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* on disk:
|
||||
*/
|
||||
for (i = 0; i < as->nr_old_nodes; i++) {
|
||||
struct btree *old = as->old_nodes[i];
|
||||
__le64 seq;
|
||||
|
||||
six_lock_read(&old->c.lock, NULL, NULL);
|
||||
seq = old->data ? old->data->keys.seq : 0;
|
||||
six_unlock_read(&old->c.lock);
|
||||
b = as->old_nodes[i];
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
seq = b->data ? b->data->keys.seq : 0;
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner,
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
@ -598,19 +605,19 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* journal reclaim does btree updates when flushing bkey_cached entries,
|
||||
* which may require allocations as well.
|
||||
*/
|
||||
bch2_trans_init(&trans, c, 0, 512);
|
||||
ret = commit_do(&trans, &as->disk_res, &journal_seq,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||
JOURNAL_WATERMARK_reserved,
|
||||
btree_update_nodes_written_trans(&trans, as));
|
||||
bch2_trans_exit(&trans);
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||
JOURNAL_WATERMARK_reserved,
|
||||
btree_update_nodes_written_trans(&trans, as));
|
||||
bch2_trans_unlock(&trans);
|
||||
|
||||
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
|
||||
"error %i in btree_update_nodes_written()", ret);
|
||||
err:
|
||||
if (b) {
|
||||
if (as->b) {
|
||||
b = as->b;
|
||||
/*
|
||||
* @b is the node we did the final insert into:
|
||||
*
|
||||
@ -623,8 +630,8 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* we're in journal error state:
|
||||
*/
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
list_del(&as->write_blocked_list);
|
||||
@ -681,7 +688,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
for (i = 0; i < as->nr_new_nodes; i++) {
|
||||
b = as->new_nodes[i];
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
@ -689,7 +696,8 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
for (i = 0; i < as->nr_open_buckets; i++)
|
||||
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
|
||||
|
||||
bch2_btree_update_free(as);
|
||||
bch2_btree_update_free(as, &trans);
|
||||
bch2_trans_exit(&trans);
|
||||
}
|
||||
|
||||
static void btree_interior_update_work(struct work_struct *work)
|
||||
@ -936,7 +944,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
as->nr_old_nodes++;
|
||||
}
|
||||
|
||||
static void bch2_btree_update_done(struct btree_update *as)
|
||||
static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
u64 start_time = as->start_time;
|
||||
@ -947,7 +955,7 @@ static void bch2_btree_update_done(struct btree_update *as)
|
||||
up_read(&as->c->gc_lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_btree_reserve_put(as);
|
||||
bch2_btree_reserve_put(as, trans);
|
||||
|
||||
continue_at(&as->cl, btree_update_set_nodes_written,
|
||||
as->c->btree_interior_update_worker);
|
||||
@ -1102,7 +1110,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
bch2_trans_verify_not_restarted(trans, restart_count);
|
||||
return as;
|
||||
err:
|
||||
bch2_btree_update_free(as);
|
||||
bch2_btree_update_free(as, trans);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -1254,6 +1262,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
|
||||
* node)
|
||||
*/
|
||||
static struct btree *__btree_split_node(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
struct btree *n1)
|
||||
{
|
||||
struct bkey_format_state s;
|
||||
@ -1263,7 +1272,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
||||
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
|
||||
struct bpos n1_pos;
|
||||
|
||||
n2 = bch2_btree_node_alloc(as, n1->c.level);
|
||||
n2 = bch2_btree_node_alloc(as, trans, n1->c.level);
|
||||
|
||||
n2->data->max_key = n1->data->max_key;
|
||||
n2->data->format = n1->format;
|
||||
@ -1427,7 +1436,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
n1 = bch2_btree_node_alloc_replacement(as, b);
|
||||
n1 = bch2_btree_node_alloc_replacement(as, trans, b);
|
||||
|
||||
if (keys)
|
||||
btree_split_insert_keys(as, trans, path, n1, keys);
|
||||
@ -1435,7 +1444,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
|
||||
trace_and_count(c, btree_node_split, c, b);
|
||||
|
||||
n2 = __btree_split_node(as, n1);
|
||||
n2 = __btree_split_node(as, trans, n1);
|
||||
|
||||
bch2_btree_build_aux_trees(n2);
|
||||
bch2_btree_build_aux_trees(n1);
|
||||
@ -1457,7 +1466,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
|
||||
if (!parent) {
|
||||
/* Depth increases, make a new root */
|
||||
n3 = __btree_root_alloc(as, b->c.level + 1);
|
||||
n3 = __btree_root_alloc(as, trans, b->c.level + 1);
|
||||
|
||||
n3->sib_u64s[0] = U16_MAX;
|
||||
n3->sib_u64s[1] = U16_MAX;
|
||||
@ -1622,7 +1631,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
|
||||
return PTR_ERR(as);
|
||||
|
||||
btree_split(as, trans, path, b, NULL, flags);
|
||||
bch2_btree_update_done(as);
|
||||
bch2_btree_update_done(as, trans);
|
||||
|
||||
for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
|
||||
ret = bch2_foreground_maybe_merge(trans, path, l, flags);
|
||||
@ -1741,7 +1750,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
bch2_btree_interior_update_will_free_node(as, m);
|
||||
|
||||
n = bch2_btree_node_alloc(as, b->c.level);
|
||||
n = bch2_btree_node_alloc(as, trans, b->c.level);
|
||||
|
||||
SET_BTREE_NODE_SEQ(n->data,
|
||||
max(BTREE_NODE_SEQ(b->data),
|
||||
@ -1788,7 +1797,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
bch2_btree_update_done(as, trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
|
||||
out:
|
||||
@ -1822,7 +1831,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
n = bch2_btree_node_alloc_replacement(as, b);
|
||||
n = bch2_btree_node_alloc_replacement(as, trans, b);
|
||||
bch2_btree_update_add_new_node(as, n);
|
||||
|
||||
bch2_btree_build_aux_trees(n);
|
||||
@ -1847,7 +1856,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
bch2_btree_node_free_inmem(trans, b);
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
bch2_btree_update_done(as, trans);
|
||||
out:
|
||||
bch2_btree_path_downgrade(trans, iter->path);
|
||||
return ret;
|
||||
|
@ -117,6 +117,7 @@ struct btree_update {
|
||||
};
|
||||
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
|
||||
struct btree_trans *,
|
||||
struct btree *,
|
||||
struct bkey_format);
|
||||
|
||||
|
@ -169,10 +169,13 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct btree_write *w = container_of(pin, struct btree_write, journal);
|
||||
struct btree *b = container_of(w, struct btree, writes[i]);
|
||||
struct btree_trans trans;
|
||||
unsigned long old, new, v;
|
||||
unsigned idx = w - b->writes;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
v = READ_ONCE(b->flags);
|
||||
|
||||
do {
|
||||
@ -188,6 +191,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user