mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
bcachefs: Remove some uses of PAGE_SIZE in the btree code
For portability to userspace, we should try to avoid working in kernel pages. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
760992aac8
commit
4580baec7f
@ -301,44 +301,6 @@ struct rw_aux_tree {
|
||||
struct bpos k;
|
||||
};
|
||||
|
||||
/*
|
||||
* BSET_CACHELINE was originally intended to match the hardware cacheline size -
|
||||
* it used to be 64, but I realized the lookup code would touch slightly less
|
||||
* memory if it was 128.
|
||||
*
|
||||
* It definites the number of bytes (in struct bset) per struct bkey_float in
|
||||
* the auxiliar search tree - when we're done searching the bset_float tree we
|
||||
* have this many bytes left that we do a linear search over.
|
||||
*
|
||||
* Since (after level 5) every level of the bset_tree is on a new cacheline,
|
||||
* we're touching one fewer cacheline in the bset tree in exchange for one more
|
||||
* cacheline in the linear search - but the linear search might stop before it
|
||||
* gets to the second cacheline.
|
||||
*/
|
||||
|
||||
#define BSET_CACHELINE 128
|
||||
|
||||
/* Space required for the btree node keys */
|
||||
static inline size_t btree_keys_bytes(struct btree *b)
|
||||
{
|
||||
return PAGE_SIZE << b->page_order;
|
||||
}
|
||||
|
||||
static inline size_t btree_keys_cachelines(struct btree *b)
|
||||
{
|
||||
return btree_keys_bytes(b) / BSET_CACHELINE;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_bytes(struct btree *b)
|
||||
{
|
||||
return btree_keys_cachelines(b) * 8;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_u64s(struct btree *b)
|
||||
{
|
||||
return btree_aux_data_bytes(b) / sizeof(u64);
|
||||
}
|
||||
|
||||
static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
|
||||
{
|
||||
BUG_ON(t->aux_data_offset == U16_MAX);
|
||||
@ -414,24 +376,6 @@ static void bset_aux_tree_verify(struct btree *b)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory allocation */
|
||||
|
||||
void bch2_btree_keys_free(struct btree *b)
|
||||
{
|
||||
kvfree(b->aux_data);
|
||||
b->aux_data = NULL;
|
||||
}
|
||||
|
||||
int bch2_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp)
|
||||
{
|
||||
b->page_order = page_order;
|
||||
b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
|
||||
if (!b->aux_data)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
|
||||
{
|
||||
unsigned i;
|
||||
|
@ -184,6 +184,38 @@ static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* BSET_CACHELINE was originally intended to match the hardware cacheline size -
|
||||
* it used to be 64, but I realized the lookup code would touch slightly less
|
||||
* memory if it was 128.
|
||||
*
|
||||
* It definites the number of bytes (in struct bset) per struct bkey_float in
|
||||
* the auxiliar search tree - when we're done searching the bset_float tree we
|
||||
* have this many bytes left that we do a linear search over.
|
||||
*
|
||||
* Since (after level 5) every level of the bset_tree is on a new cacheline,
|
||||
* we're touching one fewer cacheline in the bset tree in exchange for one more
|
||||
* cacheline in the linear search - but the linear search might stop before it
|
||||
* gets to the second cacheline.
|
||||
*/
|
||||
|
||||
#define BSET_CACHELINE 128
|
||||
|
||||
static inline size_t btree_keys_cachelines(struct btree *b)
|
||||
{
|
||||
return (1U << b->byte_order) / BSET_CACHELINE;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_bytes(struct btree *b)
|
||||
{
|
||||
return btree_keys_cachelines(b) * 8;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_u64s(struct btree *b)
|
||||
{
|
||||
return btree_aux_data_bytes(b) / sizeof(u64);
|
||||
}
|
||||
|
||||
typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
|
||||
|
||||
static inline void
|
||||
@ -334,8 +366,6 @@ static inline struct bset *bset_next_set(struct btree *b,
|
||||
return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
|
||||
}
|
||||
|
||||
void bch2_btree_keys_free(struct btree *);
|
||||
int bch2_btree_keys_alloc(struct btree *, unsigned, gfp_t);
|
||||
void bch2_btree_keys_init(struct btree *, bool *);
|
||||
|
||||
void bch2_bset_init_first(struct btree *, struct bset *);
|
||||
|
@ -44,7 +44,8 @@ static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
bch2_btree_keys_free(b);
|
||||
kvfree(b->aux_data);
|
||||
b->aux_data = NULL;
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
@ -72,7 +73,7 @@ static const struct rhashtable_params bch_btree_cache_params = {
|
||||
.obj_cmpfn = bch2_btree_cache_cmp_fn,
|
||||
};
|
||||
|
||||
static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
{
|
||||
BUG_ON(b->data || b->aux_data);
|
||||
|
||||
@ -80,7 +81,8 @@ static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
if (!b->data)
|
||||
return -ENOMEM;
|
||||
|
||||
if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp)) {
|
||||
b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
|
||||
if (!b->aux_data) {
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
return -ENOMEM;
|
||||
@ -89,21 +91,9 @@ static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
if (!__btree_node_data_alloc(c, b, gfp)) {
|
||||
bc->used++;
|
||||
list_move(&b->list, &bc->freeable);
|
||||
} else {
|
||||
list_move(&b->list, &bc->freed);
|
||||
}
|
||||
}
|
||||
|
||||
static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
{
|
||||
struct btree *b = kzalloc(sizeof(struct btree), gfp);
|
||||
struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
@ -112,9 +102,25 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
lockdep_set_novalidate_class(&b->c.lock);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
b->byte_order = ilog2(btree_bytes(c));
|
||||
return b;
|
||||
}
|
||||
|
||||
btree_node_data_alloc(c, b, gfp);
|
||||
return b->data ? b : NULL;
|
||||
static struct btree *btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b = __btree_node_mem_alloc(c);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
|
||||
kfree(b);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bc->used++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Btree in memory cache - hash table */
|
||||
@ -405,7 +411,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bch2_recalc_btree_reserve(c);
|
||||
|
||||
for (i = 0; i < bc->reserve; i++)
|
||||
if (!btree_node_mem_alloc(c, GFP_KERNEL)) {
|
||||
if (!btree_node_mem_alloc(c)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -421,7 +427,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
goto out;
|
||||
}
|
||||
|
||||
c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
c->verify_data = btree_node_mem_alloc(c);
|
||||
if (!c->verify_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@ -553,21 +559,16 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
if (!b) {
|
||||
b = kzalloc(sizeof(struct btree), GFP_KERNEL);
|
||||
b = __btree_node_mem_alloc(c);
|
||||
if (!b)
|
||||
goto err;
|
||||
|
||||
bkey_btree_ptr_init(&b->key);
|
||||
six_lock_init(&b->c.lock);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
}
|
||||
|
||||
if (!b->data) {
|
||||
if (__btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
|
||||
if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
|
||||
goto err;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
|
@ -79,14 +79,9 @@ static inline size_t btree_max_u64s(struct bch_fs *c)
|
||||
return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline size_t btree_page_order(struct bch_fs *c)
|
||||
{
|
||||
return get_order(btree_bytes(c));
|
||||
}
|
||||
|
||||
static inline size_t btree_pages(struct bch_fs *c)
|
||||
{
|
||||
return 1 << btree_page_order(c);
|
||||
return btree_bytes(c) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline unsigned btree_blocks(struct bch_fs *c)
|
||||
|
@ -57,25 +57,25 @@ static void set_needs_whiteout(struct bset *i, int v)
|
||||
k->needs_whiteout = v;
|
||||
}
|
||||
|
||||
static void btree_bounce_free(struct bch_fs *c, unsigned order,
|
||||
static void btree_bounce_free(struct bch_fs *c, size_t size,
|
||||
bool used_mempool, void *p)
|
||||
{
|
||||
if (used_mempool)
|
||||
mempool_free(p, &c->btree_bounce_pool);
|
||||
else
|
||||
vpfree(p, PAGE_SIZE << order);
|
||||
vpfree(p, size);
|
||||
}
|
||||
|
||||
static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
|
||||
static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
|
||||
bool *used_mempool)
|
||||
{
|
||||
unsigned flags = memalloc_nofs_save();
|
||||
void *p;
|
||||
|
||||
BUG_ON(order > btree_page_order(c));
|
||||
BUG_ON(size > btree_bytes(c));
|
||||
|
||||
*used_mempool = false;
|
||||
p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
|
||||
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
|
||||
if (!p) {
|
||||
*used_mempool = true;
|
||||
p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
|
||||
@ -125,16 +125,14 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
|
||||
bool used_mempool = false;
|
||||
unsigned order;
|
||||
size_t bytes = b->whiteout_u64s * sizeof(u64);
|
||||
|
||||
if (!b->whiteout_u64s)
|
||||
return;
|
||||
|
||||
order = get_order(b->whiteout_u64s * sizeof(u64));
|
||||
new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
|
||||
new_whiteouts = btree_bounce_alloc(c, order, &used_mempool);
|
||||
|
||||
ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order));
|
||||
ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
|
||||
|
||||
for (k = unwritten_whiteouts_start(c, b);
|
||||
k != unwritten_whiteouts_end(c, b);
|
||||
@ -158,7 +156,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
||||
memcpy_u64s(unwritten_whiteouts_start(c, b),
|
||||
new_whiteouts, b->whiteout_u64s);
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, new_whiteouts);
|
||||
btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
|
||||
}
|
||||
|
||||
static bool should_compact_bset(struct btree *b, struct bset_tree *t,
|
||||
@ -187,7 +185,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
||||
struct bkey_packed *whiteouts = NULL;
|
||||
struct bkey_packed *u_start, *u_pos;
|
||||
struct sort_iter sort_iter;
|
||||
unsigned order, whiteout_u64s = 0, u64s;
|
||||
unsigned bytes, whiteout_u64s = 0, u64s;
|
||||
bool used_mempool, compacting = false;
|
||||
|
||||
BUG_ON(!btree_node_is_extents(b));
|
||||
@ -204,9 +202,9 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
||||
sort_iter_init(&sort_iter, b);
|
||||
|
||||
whiteout_u64s += b->whiteout_u64s;
|
||||
order = get_order(whiteout_u64s * sizeof(u64));
|
||||
bytes = whiteout_u64s * sizeof(u64);
|
||||
|
||||
whiteouts = btree_bounce_alloc(c, order, &used_mempool);
|
||||
whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
u_start = u_pos = whiteouts;
|
||||
|
||||
memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b),
|
||||
@ -306,7 +304,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
||||
unwritten_whiteouts_end(c, b),
|
||||
true);
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, whiteouts);
|
||||
btree_bounce_free(c, bytes, used_mempool, whiteouts);
|
||||
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
@ -401,7 +399,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
struct bset *start_bset = bset(b, &b->set[start_idx]);
|
||||
bool used_mempool = false;
|
||||
u64 start_time, seq = 0;
|
||||
unsigned i, u64s = 0, order, shift = end_idx - start_idx - 1;
|
||||
unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
|
||||
bool sorting_entire_node = start_idx == 0 &&
|
||||
end_idx == b->nsets;
|
||||
|
||||
@ -416,11 +414,11 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
btree_bkey_last(b, t));
|
||||
}
|
||||
|
||||
order = sorting_entire_node
|
||||
? btree_page_order(c)
|
||||
: get_order(__vstruct_bytes(struct btree_node, u64s));
|
||||
bytes = sorting_entire_node
|
||||
? btree_bytes(c)
|
||||
: __vstruct_bytes(struct btree_node, u64s);
|
||||
|
||||
out = btree_bounce_alloc(c, order, &used_mempool);
|
||||
out = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
|
||||
start_time = local_clock();
|
||||
|
||||
@ -435,7 +433,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
|
||||
out->keys.u64s = cpu_to_le16(u64s);
|
||||
|
||||
BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
|
||||
BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
|
||||
|
||||
if (sorting_entire_node)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
|
||||
@ -449,7 +447,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
if (sorting_entire_node) {
|
||||
unsigned u64s = le16_to_cpu(out->keys.u64s);
|
||||
|
||||
BUG_ON(order != btree_page_order(c));
|
||||
BUG_ON(bytes != btree_bytes(c));
|
||||
|
||||
/*
|
||||
* Our temporary buffer is the same size as the btree node's
|
||||
@ -484,7 +482,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
set_btree_bset_end(b, &b->set[start_idx]);
|
||||
bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, out);
|
||||
btree_bounce_free(c, bytes, used_mempool, out);
|
||||
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
}
|
||||
@ -1043,7 +1041,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
BTREE_ERR_WANT_RETRY, c, b, NULL,
|
||||
"found bset signature after last bset");
|
||||
|
||||
sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
|
||||
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
|
||||
set_btree_bset(b, b->set, &b->data->keys);
|
||||
@ -1061,7 +1059,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
|
||||
BUG_ON(b->nr.live_u64s != u64s);
|
||||
|
||||
btree_bounce_free(c, btree_page_order(c), used_mempool, sorted);
|
||||
btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
|
||||
|
||||
i = &b->data->keys;
|
||||
for (k = i->start; k != vstruct_last(i);) {
|
||||
@ -1403,7 +1401,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
|
||||
btree_bounce_free(c,
|
||||
wbio->wbio.order,
|
||||
wbio->bytes,
|
||||
wbio->wbio.used_mempool,
|
||||
wbio->data);
|
||||
|
||||
@ -1486,7 +1484,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct sort_iter sort_iter;
|
||||
struct nonce nonce;
|
||||
unsigned bytes_to_write, sectors_to_write, order, bytes, u64s;
|
||||
unsigned bytes_to_write, sectors_to_write, bytes, u64s;
|
||||
u64 seq = 0;
|
||||
bool used_mempool;
|
||||
unsigned long old, new;
|
||||
@ -1556,8 +1554,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
seq = max(seq, le64_to_cpu(i->journal_seq));
|
||||
}
|
||||
|
||||
order = get_order(bytes);
|
||||
data = btree_bounce_alloc(c, order, &used_mempool);
|
||||
data = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
|
||||
if (!b->written) {
|
||||
bn = data;
|
||||
@ -1671,7 +1668,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct btree_write_bio, wbio.bio);
|
||||
wbio_init(&wbio->wbio.bio);
|
||||
wbio->data = data;
|
||||
wbio->wbio.order = order;
|
||||
wbio->bytes = bytes;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
|
||||
wbio->wbio.bio.bi_private = b;
|
||||
@ -1707,7 +1704,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
set_btree_node_noevict(b);
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, order, used_mempool, data);
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
btree_node_write_done(c, b);
|
||||
}
|
||||
|
||||
|
@ -23,8 +23,9 @@ struct btree_read_bio {
|
||||
};
|
||||
|
||||
struct btree_write_bio {
|
||||
void *data;
|
||||
struct work_struct work;
|
||||
void *data;
|
||||
unsigned bytes;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
|
@ -94,7 +94,7 @@ struct btree {
|
||||
struct btree_nr_keys nr;
|
||||
u16 sib_u64s[2];
|
||||
u16 whiteout_u64s;
|
||||
u8 page_order;
|
||||
u8 byte_order;
|
||||
u8 unpack_fn_len;
|
||||
|
||||
/*
|
||||
|
@ -79,7 +79,6 @@ struct bch_write_bio {
|
||||
u64 submit_time;
|
||||
|
||||
struct bch_devs_list failed;
|
||||
u8 order;
|
||||
u8 dev;
|
||||
|
||||
unsigned split:1,
|
||||
|
Loading…
Reference in New Issue
Block a user