mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 02:14:58 +00:00
bcachefs: Rework btree node pinning
In backpointers fsck, we do a seqential scan of one btree, and check references to another: extents <-> backpointers Checking references generates random lookups, so we want to pin that btree in memory (or only a range, if it doesn't fit in ram). Previously, this was done with a simple check in the shrinker - "if btree node is in range being pinned, don't free it" - but this generated OOMs, as our shrinker wasn't well behaved if there was less memory available than expected. Instead, we now have two different shrinkers and lru lists; the second shrinker being for pinned nodes, with seeks set much higher than normal - so they can still be freed if necessary, but we'll prefer not to. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
91ddd71510
commit
7a51608d01
@ -752,10 +752,12 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
|||||||
s64 mem_may_pin = mem_may_pin_bytes(c);
|
s64 mem_may_pin = mem_may_pin_bytes(c);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
bch2_btree_cache_unpin(c);
|
||||||
|
|
||||||
btree_interior_mask |= btree_leaf_mask;
|
btree_interior_mask |= btree_leaf_mask;
|
||||||
|
|
||||||
c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
|
c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask;
|
||||||
c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
|
c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask;
|
||||||
c->btree_cache.pinned_nodes_start = start;
|
c->btree_cache.pinned_nodes_start = start;
|
||||||
c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
|
c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
|
||||||
|
|
||||||
@ -777,6 +779,7 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
|||||||
BBPOS(btree, b->key.k.p);
|
BBPOS(btree, b->key.k.p);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
bch2_node_pin(c, b);
|
||||||
0;
|
0;
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@ -936,8 +939,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
|||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
bch2_bkey_buf_exit(&s.last_flushed, c);
|
bch2_bkey_buf_exit(&s.last_flushed, c);
|
||||||
|
|
||||||
c->btree_cache.pinned_nodes_leaf_mask = 0;
|
bch2_btree_cache_unpin(c);
|
||||||
c->btree_cache.pinned_nodes_interior_mask = 0;
|
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1053,8 +1055,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
|
|
||||||
c->btree_cache.pinned_nodes_leaf_mask = 0;
|
bch2_btree_cache_unpin(c);
|
||||||
c->btree_cache.pinned_nodes_interior_mask = 0;
|
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -47,9 +47,14 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
|
|||||||
c->btree_cache.nr_reserve = reserve;
|
c->btree_cache.nr_reserve = reserve;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t btree_cache_can_free(struct btree_cache *bc)
|
static inline size_t btree_cache_can_free(struct btree_cache_list *list)
|
||||||
{
|
{
|
||||||
return max_t(int, 0, bc->nr_live + bc->nr_freeable - bc->nr_reserve);
|
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||||
|
|
||||||
|
size_t can_free = list->nr;
|
||||||
|
if (!list->idx)
|
||||||
|
can_free = max_t(ssize_t, 0, can_free - bc->nr_reserve);
|
||||||
|
return can_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
||||||
@ -184,6 +189,51 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
|||||||
six_unlock_intent(&b->c.lock);
|
six_unlock_intent(&b->c.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
|
||||||
|
{
|
||||||
|
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
||||||
|
|
||||||
|
u64 mask = bc->pinned_nodes_mask[!!b->c.level];
|
||||||
|
|
||||||
|
return ((mask & BIT_ULL(b->c.btree_id)) &&
|
||||||
|
bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
|
||||||
|
bbpos_cmp(bc->pinned_nodes_end, pos) >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_node_pin(struct bch_fs *c, struct btree *b)
|
||||||
|
{
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
|
||||||
|
mutex_lock(&bc->lock);
|
||||||
|
BUG_ON(!__btree_node_pinned(bc, b));
|
||||||
|
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
|
||||||
|
set_btree_node_pinned(b);
|
||||||
|
list_move(&b->list, &bc->live[1].list);
|
||||||
|
bc->live[0].nr--;
|
||||||
|
bc->live[1].nr++;
|
||||||
|
}
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_btree_cache_unpin(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
struct btree *b, *n;
|
||||||
|
|
||||||
|
mutex_lock(&bc->lock);
|
||||||
|
c->btree_cache.pinned_nodes_mask[0] = 0;
|
||||||
|
c->btree_cache.pinned_nodes_mask[1] = 0;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(b, n, &bc->live[1].list, list) {
|
||||||
|
clear_btree_node_pinned(b);
|
||||||
|
list_move(&b->list, &bc->live[0].list);
|
||||||
|
bc->live[0].nr++;
|
||||||
|
bc->live[1].nr--;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
|
}
|
||||||
|
|
||||||
/* Btree in memory cache - hash table */
|
/* Btree in memory cache - hash table */
|
||||||
|
|
||||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||||
@ -199,7 +249,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
|||||||
if (b->c.btree_id < BTREE_ID_NR)
|
if (b->c.btree_id < BTREE_ID_NR)
|
||||||
--bc->nr_by_btree[b->c.btree_id];
|
--bc->nr_by_btree[b->c.btree_id];
|
||||||
|
|
||||||
bc->nr_live--;
|
bc->live[btree_node_pinned(b)].nr--;
|
||||||
bc->nr_freeable++;
|
bc->nr_freeable++;
|
||||||
list_move(&b->list, &bc->freeable);
|
list_move(&b->list, &bc->freeable);
|
||||||
}
|
}
|
||||||
@ -216,9 +266,14 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
|||||||
|
|
||||||
if (b->c.btree_id < BTREE_ID_NR)
|
if (b->c.btree_id < BTREE_ID_NR)
|
||||||
bc->nr_by_btree[b->c.btree_id]++;
|
bc->nr_by_btree[b->c.btree_id]++;
|
||||||
bc->nr_live++;
|
|
||||||
|
bool p = __btree_node_pinned(bc, b);
|
||||||
|
mod_bit(BTREE_NODE_pinned, &b->flags, p);
|
||||||
|
|
||||||
|
list_move_tail(&b->list, &bc->live[p].list);
|
||||||
|
bc->live[p].nr++;
|
||||||
|
|
||||||
bc->nr_freeable--;
|
bc->nr_freeable--;
|
||||||
list_move_tail(&b->list, &bc->live);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,20 +338,6 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, b
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&bc->lock);
|
lockdep_assert_held(&bc->lock);
|
||||||
|
|
||||||
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
|
||||||
|
|
||||||
u64 mask = b->c.level
|
|
||||||
? bc->pinned_nodes_interior_mask
|
|
||||||
: bc->pinned_nodes_leaf_mask;
|
|
||||||
|
|
||||||
if ((mask & BIT_ULL(b->c.btree_id)) &&
|
|
||||||
bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
|
|
||||||
bbpos_cmp(bc->pinned_nodes_end, pos) >= 0) {
|
|
||||||
BTREE_CACHE_NOT_FREED_INCREMENT(pinned);
|
|
||||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_on_io:
|
wait_on_io:
|
||||||
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
||||||
(1U << BTREE_NODE_read_in_flight)|
|
(1U << BTREE_NODE_read_in_flight)|
|
||||||
@ -401,8 +442,9 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
|||||||
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = shrink->private_data;
|
struct btree_cache_list *list = shrink->private_data;
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||||
|
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
|
||||||
struct btree *b, *t;
|
struct btree *b, *t;
|
||||||
unsigned long nr = sc->nr_to_scan;
|
unsigned long nr = sc->nr_to_scan;
|
||||||
unsigned long can_free = 0;
|
unsigned long can_free = 0;
|
||||||
@ -410,8 +452,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
|||||||
unsigned long touched = 0;
|
unsigned long touched = 0;
|
||||||
unsigned i, flags;
|
unsigned i, flags;
|
||||||
unsigned long ret = SHRINK_STOP;
|
unsigned long ret = SHRINK_STOP;
|
||||||
bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >=
|
bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4;
|
||||||
(bc->nr_live + bc->nr_freeable) * 3 / 4;
|
|
||||||
|
|
||||||
if (bch2_btree_shrinker_disabled)
|
if (bch2_btree_shrinker_disabled)
|
||||||
return SHRINK_STOP;
|
return SHRINK_STOP;
|
||||||
@ -426,7 +467,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
|||||||
* succeed, so that inserting keys into the btree can always succeed and
|
* succeed, so that inserting keys into the btree can always succeed and
|
||||||
* IO can always make forward progress:
|
* IO can always make forward progress:
|
||||||
*/
|
*/
|
||||||
can_free = btree_cache_can_free(bc);
|
can_free = btree_cache_can_free(list);
|
||||||
nr = min_t(unsigned long, nr, can_free);
|
nr = min_t(unsigned long, nr, can_free);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
@ -452,7 +493,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
restart:
|
restart:
|
||||||
list_for_each_entry_safe(b, t, &bc->live, list) {
|
list_for_each_entry_safe(b, t, &list->list, list) {
|
||||||
touched++;
|
touched++;
|
||||||
|
|
||||||
if (btree_node_accessed(b)) {
|
if (btree_node_accessed(b)) {
|
||||||
@ -476,7 +517,7 @@ restart:
|
|||||||
!btree_node_will_make_reachable(b) &&
|
!btree_node_will_make_reachable(b) &&
|
||||||
!btree_node_write_blocked(b) &&
|
!btree_node_write_blocked(b) &&
|
||||||
six_trylock_read(&b->c.lock)) {
|
six_trylock_read(&b->c.lock)) {
|
||||||
list_move(&bc->live, &b->list);
|
list_move(&list->list, &b->list);
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
||||||
six_unlock_read(&b->c.lock);
|
six_unlock_read(&b->c.lock);
|
||||||
@ -490,8 +531,8 @@ restart:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
out_rotate:
|
out_rotate:
|
||||||
if (&t->list != &bc->live)
|
if (&t->list != &list->list)
|
||||||
list_move_tail(&bc->live, &t->list);
|
list_move_tail(&list->list, &t->list);
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
out_nounlock:
|
out_nounlock:
|
||||||
@ -504,40 +545,42 @@ out_nounlock:
|
|||||||
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = shrink->private_data;
|
struct btree_cache_list *list = shrink->private_data;
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
|
||||||
|
|
||||||
if (bch2_btree_shrinker_disabled)
|
if (bch2_btree_shrinker_disabled)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return btree_cache_can_free(bc);
|
return btree_cache_can_free(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct btree *b, *t;
|
struct btree *b, *t;
|
||||||
unsigned i, flags;
|
unsigned long flags;
|
||||||
|
|
||||||
shrinker_free(bc->shrink);
|
shrinker_free(bc->live[1].shrink);
|
||||||
|
shrinker_free(bc->live[0].shrink);
|
||||||
|
|
||||||
/* vfree() can allocate memory: */
|
/* vfree() can allocate memory: */
|
||||||
flags = memalloc_nofs_save();
|
flags = memalloc_nofs_save();
|
||||||
mutex_lock(&bc->lock);
|
mutex_lock(&bc->lock);
|
||||||
|
|
||||||
if (c->verify_data)
|
if (c->verify_data)
|
||||||
list_move(&c->verify_data->list, &bc->live);
|
list_move(&c->verify_data->list, &bc->live[0].list);
|
||||||
|
|
||||||
kvfree(c->verify_ondisk);
|
kvfree(c->verify_ondisk);
|
||||||
|
|
||||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
|
||||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||||
|
|
||||||
if (r->b)
|
if (r->b)
|
||||||
list_add(&r->b->list, &bc->live);
|
list_add(&r->b->list, &bc->live[0].list);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry_safe(b, t, &bc->live, list)
|
list_for_each_entry_safe(b, t, &bc->live[1].list, list)
|
||||||
|
bch2_btree_node_hash_remove(bc, b);
|
||||||
|
list_for_each_entry_safe(b, t, &bc->live[0].list, list)
|
||||||
bch2_btree_node_hash_remove(bc, b);
|
bch2_btree_node_hash_remove(bc, b);
|
||||||
|
|
||||||
list_for_each_entry_safe(b, t, &bc->freeable, list) {
|
list_for_each_entry_safe(b, t, &bc->freeable, list) {
|
||||||
@ -563,7 +606,8 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
|||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++)
|
||||||
BUG_ON(bc->nr_by_btree[i]);
|
BUG_ON(bc->nr_by_btree[i]);
|
||||||
BUG_ON(bc->nr_live);
|
BUG_ON(bc->live[0].nr);
|
||||||
|
BUG_ON(bc->live[1].nr);
|
||||||
BUG_ON(bc->nr_freeable);
|
BUG_ON(bc->nr_freeable);
|
||||||
|
|
||||||
if (bc->table_init_done)
|
if (bc->table_init_done)
|
||||||
@ -589,18 +633,28 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
|||||||
if (!__bch2_btree_node_mem_alloc(c))
|
if (!__bch2_btree_node_mem_alloc(c))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
list_splice_init(&bc->live, &bc->freeable);
|
list_splice_init(&bc->live[0].list, &bc->freeable);
|
||||||
|
|
||||||
mutex_init(&c->verify_lock);
|
mutex_init(&c->verify_lock);
|
||||||
|
|
||||||
shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
|
shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
|
||||||
if (!shrink)
|
if (!shrink)
|
||||||
goto err;
|
goto err;
|
||||||
bc->shrink = shrink;
|
bc->live[0].shrink = shrink;
|
||||||
shrink->count_objects = bch2_btree_cache_count;
|
shrink->count_objects = bch2_btree_cache_count;
|
||||||
shrink->scan_objects = bch2_btree_cache_scan;
|
shrink->scan_objects = bch2_btree_cache_scan;
|
||||||
shrink->seeks = 4;
|
shrink->seeks = 2;
|
||||||
shrink->private_data = c;
|
shrink->private_data = &bc->live[0];
|
||||||
|
shrinker_register(shrink);
|
||||||
|
|
||||||
|
shrink = shrinker_alloc(0, "%s-btree_cache-pinned", c->name);
|
||||||
|
if (!shrink)
|
||||||
|
goto err;
|
||||||
|
bc->live[1].shrink = shrink;
|
||||||
|
shrink->count_objects = bch2_btree_cache_count;
|
||||||
|
shrink->scan_objects = bch2_btree_cache_scan;
|
||||||
|
shrink->seeks = 8;
|
||||||
|
shrink->private_data = &bc->live[1];
|
||||||
shrinker_register(shrink);
|
shrinker_register(shrink);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -611,7 +665,10 @@ err:
|
|||||||
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
||||||
{
|
{
|
||||||
mutex_init(&bc->lock);
|
mutex_init(&bc->lock);
|
||||||
INIT_LIST_HEAD(&bc->live);
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) {
|
||||||
|
bc->live[i].idx = i;
|
||||||
|
INIT_LIST_HEAD(&bc->live[i].list);
|
||||||
|
}
|
||||||
INIT_LIST_HEAD(&bc->freeable);
|
INIT_LIST_HEAD(&bc->freeable);
|
||||||
INIT_LIST_HEAD(&bc->freed_pcpu);
|
INIT_LIST_HEAD(&bc->freed_pcpu);
|
||||||
INIT_LIST_HEAD(&bc->freed_nonpcpu);
|
INIT_LIST_HEAD(&bc->freed_nonpcpu);
|
||||||
@ -673,14 +730,16 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
|||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
list_for_each_entry_reverse(b, &bc->live, list)
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
|
||||||
if (!btree_node_reclaim(c, b, false))
|
list_for_each_entry_reverse(b, &bc->live[i].list, list)
|
||||||
return b;
|
if (!btree_node_reclaim(c, b, false))
|
||||||
|
return b;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
list_for_each_entry_reverse(b, &bc->live, list)
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
|
||||||
if (!btree_node_write_and_reclaim(c, b))
|
list_for_each_entry_reverse(b, &bc->live[i].list, list)
|
||||||
return b;
|
if (!btree_node_write_and_reclaim(c, b))
|
||||||
|
return b;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Rare case: all nodes were intent-locked.
|
* Rare case: all nodes were intent-locked.
|
||||||
@ -1387,9 +1446,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
|
|||||||
if (!out->nr_tabstops)
|
if (!out->nr_tabstops)
|
||||||
printbuf_tabstop_push(out, 32);
|
printbuf_tabstop_push(out, 32);
|
||||||
|
|
||||||
prt_btree_cache_line(out, c, "nr_live:", bc->nr_live);
|
prt_btree_cache_line(out, c, "live:", bc->live[0].nr);
|
||||||
prt_btree_cache_line(out, c, "nr_freeable:", bc->nr_freeable);
|
prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr);
|
||||||
prt_btree_cache_line(out, c, "nr dirty:", atomic_long_read(&bc->nr_dirty));
|
prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable);
|
||||||
|
prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty));
|
||||||
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
|
|
||||||
|
@ -19,6 +19,9 @@ int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
|
|||||||
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||||
unsigned, enum btree_id);
|
unsigned, enum btree_id);
|
||||||
|
|
||||||
|
void bch2_node_pin(struct bch_fs *, struct btree *);
|
||||||
|
void bch2_btree_cache_unpin(struct bch_fs *);
|
||||||
|
|
||||||
void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsigned,
|
void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsigned,
|
||||||
struct bkey_s_c, struct bkey_i *);
|
struct bkey_s_c, struct bkey_i *);
|
||||||
|
|
||||||
|
@ -147,8 +147,7 @@ struct btree {
|
|||||||
x(noevict) \
|
x(noevict) \
|
||||||
x(write_blocked) \
|
x(write_blocked) \
|
||||||
x(will_make_reachable) \
|
x(will_make_reachable) \
|
||||||
x(access_bit) \
|
x(access_bit)
|
||||||
x(pinned) \
|
|
||||||
|
|
||||||
enum bch_btree_cache_not_freed_reasons {
|
enum bch_btree_cache_not_freed_reasons {
|
||||||
#define x(n) BCH_BTREE_CACHE_NOT_FREED_##n,
|
#define x(n) BCH_BTREE_CACHE_NOT_FREED_##n,
|
||||||
@ -157,6 +156,13 @@ enum bch_btree_cache_not_freed_reasons {
|
|||||||
BCH_BTREE_CACHE_NOT_FREED_REASONS_NR,
|
BCH_BTREE_CACHE_NOT_FREED_REASONS_NR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct btree_cache_list {
|
||||||
|
unsigned idx;
|
||||||
|
struct shrinker *shrink;
|
||||||
|
struct list_head list;
|
||||||
|
size_t nr;
|
||||||
|
};
|
||||||
|
|
||||||
struct btree_cache {
|
struct btree_cache {
|
||||||
struct rhashtable table;
|
struct rhashtable table;
|
||||||
bool table_init_done;
|
bool table_init_done;
|
||||||
@ -174,12 +180,11 @@ struct btree_cache {
|
|||||||
* should never grow past ~2-3 nodes in practice.
|
* should never grow past ~2-3 nodes in practice.
|
||||||
*/
|
*/
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
struct list_head live;
|
|
||||||
struct list_head freeable;
|
struct list_head freeable;
|
||||||
struct list_head freed_pcpu;
|
struct list_head freed_pcpu;
|
||||||
struct list_head freed_nonpcpu;
|
struct list_head freed_nonpcpu;
|
||||||
|
struct btree_cache_list live[2];
|
||||||
|
|
||||||
size_t nr_live;
|
|
||||||
size_t nr_freeable;
|
size_t nr_freeable;
|
||||||
size_t nr_reserve;
|
size_t nr_reserve;
|
||||||
size_t nr_by_btree[BTREE_ID_NR];
|
size_t nr_by_btree[BTREE_ID_NR];
|
||||||
@ -188,7 +193,6 @@ struct btree_cache {
|
|||||||
/* shrinker stats */
|
/* shrinker stats */
|
||||||
size_t nr_freed;
|
size_t nr_freed;
|
||||||
u64 not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR];
|
u64 not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR];
|
||||||
struct shrinker *shrink;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we need to allocate memory for a new btree node and that
|
* If we need to allocate memory for a new btree node and that
|
||||||
@ -201,8 +205,8 @@ struct btree_cache {
|
|||||||
|
|
||||||
struct bbpos pinned_nodes_start;
|
struct bbpos pinned_nodes_start;
|
||||||
struct bbpos pinned_nodes_end;
|
struct bbpos pinned_nodes_end;
|
||||||
u64 pinned_nodes_leaf_mask;
|
/* btree id mask: 0 for leaves, 1 for interior */
|
||||||
u64 pinned_nodes_interior_mask;
|
u64 pinned_nodes_mask[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btree_node_iter {
|
struct btree_node_iter {
|
||||||
@ -594,7 +598,8 @@ enum btree_write_type {
|
|||||||
x(dying) \
|
x(dying) \
|
||||||
x(fake) \
|
x(fake) \
|
||||||
x(need_rewrite) \
|
x(need_rewrite) \
|
||||||
x(never_write)
|
x(never_write) \
|
||||||
|
x(pinned)
|
||||||
|
|
||||||
enum btree_flags {
|
enum btree_flags {
|
||||||
/* First bits for btree node write type */
|
/* First bits for btree node write type */
|
||||||
|
@ -1904,7 +1904,7 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
|
|||||||
six_unlock_intent(&n->c.lock);
|
six_unlock_intent(&n->c.lock);
|
||||||
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&c->btree_cache.lock);
|
||||||
list_add_tail(&b->list, &c->btree_cache.live);
|
list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list);
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
bch2_trans_verify_locks(trans);
|
bch2_trans_verify_locks(trans);
|
||||||
|
@ -641,6 +641,7 @@ static u64 journal_seq_to_flush(struct journal *j)
|
|||||||
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||||
u64 seq_to_flush;
|
u64 seq_to_flush;
|
||||||
size_t min_nr, min_key_cache, nr_flushed;
|
size_t min_nr, min_key_cache, nr_flushed;
|
||||||
@ -681,7 +682,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
if (j->watermark != BCH_WATERMARK_stripe)
|
if (j->watermark != BCH_WATERMARK_stripe)
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
if (atomic_long_read(&c->btree_cache.nr_dirty) * 2 > c->btree_cache.nr_live)
|
size_t btree_cache_live = bc->live[0].nr + bc->live[1].nr;
|
||||||
|
if (atomic_long_read(&bc->nr_dirty) * 2 > btree_cache_live)
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
|
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
|
||||||
@ -689,8 +691,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
trace_and_count(c, journal_reclaim_start, c,
|
trace_and_count(c, journal_reclaim_start, c,
|
||||||
direct, kicked,
|
direct, kicked,
|
||||||
min_nr, min_key_cache,
|
min_nr, min_key_cache,
|
||||||
atomic_long_read(&c->btree_cache.nr_dirty),
|
atomic_long_read(&bc->nr_dirty), btree_cache_live,
|
||||||
c->btree_cache.nr_live,
|
|
||||||
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
||||||
atomic_long_read(&c->btree_key_cache.nr_keys));
|
atomic_long_read(&c->btree_key_cache.nr_keys));
|
||||||
|
|
||||||
|
@ -244,14 +244,18 @@ static struct attribute sysfs_state_rw = {
|
|||||||
|
|
||||||
static size_t bch2_btree_cache_size(struct bch_fs *c)
|
static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
size_t ret = 0;
|
size_t ret = 0;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&bc->lock);
|
||||||
list_for_each_entry(b, &c->btree_cache.live, list)
|
list_for_each_entry(b, &bc->live[0].list, list)
|
||||||
ret += btree_buf_bytes(b);
|
ret += btree_buf_bytes(b);
|
||||||
|
list_for_each_entry(b, &bc->live[1].list, list)
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
ret += btree_buf_bytes(b);
|
||||||
|
list_for_each_entry(b, &bc->freeable, list)
|
||||||
|
ret += btree_buf_bytes(b);
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -444,11 +448,12 @@ STORE(bch2_fs)
|
|||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_btree_cache_shrink) {
|
if (attr == &sysfs_trigger_btree_cache_shrink) {
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct shrink_control sc;
|
struct shrink_control sc;
|
||||||
|
|
||||||
sc.gfp_mask = GFP_KERNEL;
|
sc.gfp_mask = GFP_KERNEL;
|
||||||
sc.nr_to_scan = strtoul_or_return(buf);
|
sc.nr_to_scan = strtoul_or_return(buf);
|
||||||
c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
|
bc->live[0].shrink->scan_objects(bc->live[0].shrink, &sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_btree_key_cache_shrink) {
|
if (attr == &sysfs_trigger_btree_key_cache_shrink) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user