mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
bcachefs: More allocator startup improvements
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b8adb83365
commit
d0cc3defba
@ -347,12 +347,14 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_alloc_write(struct bch_fs *c)
|
||||
int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
*wrote = false;
|
||||
|
||||
for_each_rw_member(ca, c, i) {
|
||||
struct btree_iter iter;
|
||||
struct bucket_array *buckets;
|
||||
@ -370,9 +372,14 @@ int bch2_alloc_write(struct bch_fs *c)
|
||||
if (!buckets->b[b].mark.dirty)
|
||||
continue;
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, 0);
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL,
|
||||
nowait
|
||||
? BTREE_INSERT_NOWAIT
|
||||
: 0);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
*wrote = true;
|
||||
}
|
||||
up_read(&ca->bucket_lock);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
@ -1270,20 +1277,23 @@ static void flush_held_btree_writes(struct bch_fs *c)
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
bool flush_updates;
|
||||
size_t i, nr_pending_updates;
|
||||
bool nodes_blocked;
|
||||
size_t i;
|
||||
struct closure cl;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
again:
|
||||
pr_debug("flushing dirty btree nodes");
|
||||
cond_resched();
|
||||
closure_wait(&c->btree_interior_update_wait, &cl);
|
||||
|
||||
flush_updates = false;
|
||||
nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
|
||||
nodes_blocked = false;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
if (btree_node_dirty(b) && (!b->written || b->level)) {
|
||||
if (btree_node_need_write(b)) {
|
||||
if (btree_node_may_write(b)) {
|
||||
rcu_read_unlock();
|
||||
btree_node_lock_type(c, b, SIX_LOCK_read);
|
||||
@ -1291,7 +1301,7 @@ static void flush_held_btree_writes(struct bch_fs *c)
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
} else {
|
||||
flush_updates = true;
|
||||
nodes_blocked = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -1299,17 +1309,16 @@ static void flush_held_btree_writes(struct bch_fs *c)
|
||||
if (c->btree_roots_dirty)
|
||||
bch2_journal_meta(&c->journal);
|
||||
|
||||
/*
|
||||
* This is ugly, but it's needed to flush btree node writes
|
||||
* without spinning...
|
||||
*/
|
||||
if (flush_updates) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
bch2_btree_interior_updates_nr_pending(c) <
|
||||
nr_pending_updates);
|
||||
if (nodes_blocked) {
|
||||
closure_sync(&cl);
|
||||
goto again;
|
||||
}
|
||||
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
closure_sync(&cl);
|
||||
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
}
|
||||
|
||||
static void allocator_start_issue_discards(struct bch_fs *c)
|
||||
@ -1331,13 +1340,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
unsigned dev_iter;
|
||||
u64 journal_seq = 0;
|
||||
long bu;
|
||||
bool invalidating_data = false;
|
||||
int ret = 0;
|
||||
|
||||
if (test_alloc_startup(c)) {
|
||||
invalidating_data = true;
|
||||
if (test_alloc_startup(c))
|
||||
goto not_enough;
|
||||
}
|
||||
|
||||
/* Scan for buckets that are already invalidated: */
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
@ -1384,21 +1390,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
not_enough:
|
||||
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
find_reclaimable_buckets(c, ca);
|
||||
|
||||
while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
|
||||
(bu = next_alloc_bucket(ca)) >= 0) {
|
||||
invalidating_data |=
|
||||
bch2_invalidate_one_bucket(c, ca, bu, &journal_seq);
|
||||
|
||||
fifo_push(&ca->free[RESERVE_BTREE], bu);
|
||||
bucket_set_dirty(ca, bu);
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("done scanning for reclaimable buckets");
|
||||
|
||||
/*
|
||||
* We're moving buckets to freelists _before_ they've been marked as
|
||||
* invalidated on disk - we have to so that we can allocate new btree
|
||||
@ -1408,38 +1399,59 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
* have cached data in them, which is live until they're marked as
|
||||
* invalidated on disk:
|
||||
*/
|
||||
if (invalidating_data) {
|
||||
pr_debug("invalidating existing data");
|
||||
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
} else {
|
||||
pr_debug("issuing discards");
|
||||
allocator_start_issue_discards(c);
|
||||
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
|
||||
while (1) {
|
||||
bool wrote = false;
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
find_reclaimable_buckets(c, ca);
|
||||
|
||||
while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
|
||||
(bu = next_alloc_bucket(ca)) >= 0) {
|
||||
bch2_invalidate_one_bucket(c, ca, bu,
|
||||
&journal_seq);
|
||||
|
||||
fifo_push(&ca->free[RESERVE_BTREE], bu);
|
||||
bucket_set_dirty(ca, bu);
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("done scanning for reclaimable buckets");
|
||||
|
||||
/*
|
||||
* XXX: it's possible for this to deadlock waiting on journal reclaim,
|
||||
* since we're holding btree writes. What then?
|
||||
*/
|
||||
ret = bch2_alloc_write(c, true, &wrote);
|
||||
|
||||
/*
|
||||
* If bch2_alloc_write() did anything, it may have used some
|
||||
* buckets, and we need the RESERVE_BTREE freelist full - so we
|
||||
* need to loop and scan again.
|
||||
* And if it errored, it may have been because there weren't
|
||||
* enough buckets, so just scan and loop again as long as it
|
||||
* made some progress:
|
||||
*/
|
||||
if (!wrote && ret)
|
||||
return ret;
|
||||
if (!wrote && !ret)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: it's possible for this to deadlock waiting on journal reclaim,
|
||||
* since we're holding btree writes. What then?
|
||||
*/
|
||||
ret = bch2_alloc_write(c);
|
||||
pr_debug("flushing journal");
|
||||
|
||||
ret = bch2_journal_flush(&c->journal);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (invalidating_data) {
|
||||
pr_debug("flushing journal");
|
||||
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pr_debug("issuing discards");
|
||||
allocator_start_issue_discards(c);
|
||||
}
|
||||
pr_debug("issuing discards");
|
||||
allocator_start_issue_discards(c);
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
|
||||
|
||||
/* now flush dirty btree nodes: */
|
||||
if (invalidating_data)
|
||||
flush_held_btree_writes(c);
|
||||
flush_held_btree_writes(c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1448,6 +1460,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
bool wrote;
|
||||
int ret;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
@ -1465,7 +1478,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
return bch2_alloc_write(c);
|
||||
return bch2_alloc_write(c, false, &wrote);
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *c)
|
||||
|
@ -55,7 +55,7 @@ void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_stop(struct bch_dev *);
|
||||
int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
int bch2_alloc_write(struct bch_fs *);
|
||||
int bch2_alloc_write(struct bch_fs *, bool, bool *);
|
||||
int bch2_fs_allocator_start(struct bch_fs *);
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
|
@ -171,6 +171,10 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
if (!btree_node_may_write(b))
|
||||
goto out_unlock;
|
||||
|
||||
if (btree_node_dirty(b) &&
|
||||
test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
|
||||
goto out_unlock;
|
||||
|
||||
if (btree_node_dirty(b) ||
|
||||
btree_node_write_in_flight(b) ||
|
||||
btree_node_read_in_flight(b)) {
|
||||
|
@ -1330,8 +1330,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
if (!(old & (1 << BTREE_NODE_dirty)))
|
||||
return;
|
||||
|
||||
if (b->written &&
|
||||
!btree_node_may_write(b))
|
||||
if (!btree_node_may_write(b))
|
||||
return;
|
||||
|
||||
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
||||
@ -1347,7 +1346,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
||||
|
||||
BUG_ON(btree_node_fake(b));
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON((b->will_make_reachable != 0) != !b->written);
|
||||
|
||||
BUG_ON(b->written >= c->opts.btree_node_size);
|
||||
@ -1685,15 +1683,13 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
|
||||
unsigned long flags = READ_ONCE(b->flags);
|
||||
unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0;
|
||||
|
||||
if (//!(flags & (1 << BTREE_NODE_dirty)) &&
|
||||
!b->writes[0].wait.list.first &&
|
||||
!b->writes[1].wait.list.first &&
|
||||
!(b->will_make_reachable & 1))
|
||||
if (!(flags & (1 << BTREE_NODE_dirty)))
|
||||
continue;
|
||||
|
||||
pr_buf(&out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n",
|
||||
pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n",
|
||||
b,
|
||||
(flags & (1 << BTREE_NODE_dirty)) != 0,
|
||||
(flags & (1 << BTREE_NODE_need_write)) != 0,
|
||||
b->level,
|
||||
b->written,
|
||||
!list_empty_careful(&b->write_blocked),
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _BCACHEFS_BTREE_IO_H
|
||||
|
||||
#include "bset.h"
|
||||
#include "btree_locking.h"
|
||||
#include "extents.h"
|
||||
#include "io_types.h"
|
||||
|
||||
@ -48,7 +49,7 @@ static inline void btree_node_wait_on_io(struct btree *b)
|
||||
static inline bool btree_node_may_write(struct btree *b)
|
||||
{
|
||||
return list_empty_careful(&b->write_blocked) &&
|
||||
!b->will_make_reachable;
|
||||
(!b->written || !b->will_make_reachable);
|
||||
}
|
||||
|
||||
enum compact_mode {
|
||||
@ -100,42 +101,36 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
enum six_lock_type);
|
||||
|
||||
/*
|
||||
* btree_node_dirty() can be cleared with only a read lock,
|
||||
* and for bch2_btree_node_write_cond() we want to set need_write iff it's
|
||||
* still dirty:
|
||||
*/
|
||||
static inline void set_btree_node_need_write_if_dirty(struct btree *b)
|
||||
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
unsigned long old, new, v = READ_ONCE(b->flags);
|
||||
while (b->written &&
|
||||
btree_node_need_write(b) &&
|
||||
btree_node_may_write(b)) {
|
||||
if (!btree_node_write_in_flight(b)) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_read);
|
||||
break;
|
||||
}
|
||||
|
||||
do {
|
||||
old = new = v;
|
||||
|
||||
if (!(old & (1 << BTREE_NODE_dirty)))
|
||||
return;
|
||||
|
||||
new |= (1 << BTREE_NODE_need_write);
|
||||
} while ((v = cmpxchg(&b->flags, old, new)) != old);
|
||||
six_unlock_read(&b->lock);
|
||||
btree_node_wait_on_io(b);
|
||||
btree_node_lock_type(c, b, SIX_LOCK_read);
|
||||
}
|
||||
}
|
||||
|
||||
#define bch2_btree_node_write_cond(_c, _b, cond) \
|
||||
do { \
|
||||
while ((_b)->written && btree_node_dirty(_b) && (cond)) { \
|
||||
if (!btree_node_may_write(_b)) { \
|
||||
set_btree_node_need_write_if_dirty(_b); \
|
||||
break; \
|
||||
} \
|
||||
unsigned long old, new, v = READ_ONCE((_b)->flags); \
|
||||
\
|
||||
if (!btree_node_write_in_flight(_b)) { \
|
||||
bch2_btree_node_write(_c, _b, SIX_LOCK_read); \
|
||||
break; \
|
||||
} \
|
||||
do { \
|
||||
old = new = v; \
|
||||
\
|
||||
six_unlock_read(&(_b)->lock); \
|
||||
btree_node_wait_on_io(_b); \
|
||||
btree_node_lock_type(c, b, SIX_LOCK_read); \
|
||||
} \
|
||||
if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \
|
||||
break; \
|
||||
\
|
||||
new |= (1 << BTREE_NODE_need_write); \
|
||||
} while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \
|
||||
\
|
||||
btree_node_write_if_need(_c, _b); \
|
||||
} while (0)
|
||||
|
||||
void bch2_btree_flush_all_reads(struct bch_fs *);
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _BCACHEFS_BTREE_ITER_H
|
||||
#define _BCACHEFS_BTREE_ITER_H
|
||||
|
||||
#include "bset.h"
|
||||
#include "btree_types.h"
|
||||
|
||||
static inline void btree_iter_set_dirty(struct btree_iter *iter,
|
||||
|
@ -11,7 +11,6 @@
|
||||
*/
|
||||
|
||||
#include "btree_iter.h"
|
||||
#include "btree_io.h"
|
||||
#include "six.h"
|
||||
|
||||
/* matches six lock types */
|
||||
|
@ -367,6 +367,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
|
||||
set_btree_node_accessed(b);
|
||||
set_btree_node_dirty(b);
|
||||
set_btree_node_need_write(b);
|
||||
|
||||
bch2_bset_init_first(b, &b->data->keys);
|
||||
memset(&b->nr, 0, sizeof(b->nr));
|
||||
@ -655,6 +656,12 @@ static void btree_update_nodes_written(struct closure *cl)
|
||||
closure_wait(&btree_current_write(b)->wait, cl);
|
||||
|
||||
list_del(&as->write_blocked_list);
|
||||
|
||||
/*
|
||||
* for flush_held_btree_writes() waiting on updates to flush or
|
||||
* nodes to be writeable:
|
||||
*/
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
/*
|
||||
@ -958,6 +965,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
|
||||
list_del(&p->write_blocked_list);
|
||||
btree_update_reparent(as, p);
|
||||
|
||||
/*
|
||||
* for flush_held_btree_writes() waiting on updates to flush or
|
||||
* nodes to be writeable:
|
||||
*/
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
}
|
||||
|
||||
clear_btree_node_dirty(b);
|
||||
|
@ -1038,7 +1038,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
|
||||
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
|
||||
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
|
||||
btree_reserve);
|
||||
btree_reserve * 2);
|
||||
bool resize = ca->buckets[0] != NULL,
|
||||
start_copygc = ca->copygc_thread != NULL;
|
||||
int ret = -ENOMEM;
|
||||
|
@ -25,9 +25,6 @@
|
||||
#include "eytzinger.h"
|
||||
#include "util.h"
|
||||
|
||||
#define simple_strtoint(c, end, base) simple_strtol(c, end, base)
|
||||
#define simple_strtouint(c, end, base) simple_strtoul(c, end, base)
|
||||
|
||||
static const char si_units[] = "?kMGTPEZY";
|
||||
|
||||
static int __bch2_strtoh(const char *cp, u64 *res,
|
||||
|
Loading…
Reference in New Issue
Block a user