mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 13:43:51 +00:00
bcachefs: RESERVE_stripe
Rework stripe creation path - new algorithm for deciding when to create new stripes or reuse existing stripes. We add a new allocation watermark, RESERVE_stripe, above RESERVE_none. Then we always try to create a new stripe by doing RESERVE_stripe allocations; if this fails, we reuse an existing stripe and allocate buckets for it with the reserve watermark for the given write (RESERVE_none or RESERVE_movinggc). Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
d57c9add59
commit
e84face6f0
@ -216,7 +216,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
|||||||
u64 free = max_t(s64, 0,
|
u64 free = max_t(s64, 0,
|
||||||
u.d[BCH_DATA_free].buckets
|
u.d[BCH_DATA_free].buckets
|
||||||
+ u.d[BCH_DATA_need_discard].buckets
|
+ u.d[BCH_DATA_need_discard].buckets
|
||||||
- bch2_dev_buckets_reserved(ca, RESERVE_none));
|
- bch2_dev_buckets_reserved(ca, RESERVE_stripe));
|
||||||
|
|
||||||
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
|
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,8 @@ struct ec_bucket_buf;
|
|||||||
x(btree_movinggc) \
|
x(btree_movinggc) \
|
||||||
x(btree) \
|
x(btree) \
|
||||||
x(movinggc) \
|
x(movinggc) \
|
||||||
x(none)
|
x(none) \
|
||||||
|
x(stripe)
|
||||||
|
|
||||||
enum alloc_reserve {
|
enum alloc_reserve {
|
||||||
#define x(name) RESERVE_##name,
|
#define x(name) RESERVE_##name,
|
||||||
|
@ -157,6 +157,9 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reser
|
|||||||
switch (reserve) {
|
switch (reserve) {
|
||||||
case RESERVE_NR:
|
case RESERVE_NR:
|
||||||
unreachable();
|
unreachable();
|
||||||
|
case RESERVE_stripe:
|
||||||
|
reserved += ca->mi.nbuckets >> 6;
|
||||||
|
fallthrough;
|
||||||
case RESERVE_none:
|
case RESERVE_none:
|
||||||
reserved += ca->mi.nbuckets >> 6;
|
reserved += ca->mi.nbuckets >> 6;
|
||||||
fallthrough;
|
fallthrough;
|
||||||
|
@ -1569,6 +1569,17 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
|
|||||||
BUG_ON(h->s->existing_stripe.size != h->blocksize);
|
BUG_ON(h->s->existing_stripe.size != h->blocksize);
|
||||||
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
|
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free buckets we initially allocated - they might conflict with
|
||||||
|
* blocks from the stripe we're reusing:
|
||||||
|
*/
|
||||||
|
for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
|
||||||
|
bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
|
||||||
|
h->s->blocks[i] = 0;
|
||||||
|
}
|
||||||
|
memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
|
||||||
|
memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
|
||||||
|
|
||||||
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
|
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
|
||||||
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
|
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
|
||||||
__set_bit(i, h->s->blocks_gotten);
|
__set_bit(i, h->s->blocks_gotten);
|
||||||
@ -1649,8 +1660,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct ec_stripe_head *h;
|
struct ec_stripe_head *h;
|
||||||
|
bool waiting = false;
|
||||||
int ret;
|
int ret;
|
||||||
bool needs_stripe_new;
|
|
||||||
|
|
||||||
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve);
|
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve);
|
||||||
if (!h)
|
if (!h)
|
||||||
@ -1658,8 +1669,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
|||||||
if (IS_ERR_OR_NULL(h))
|
if (IS_ERR_OR_NULL(h))
|
||||||
return h;
|
return h;
|
||||||
|
|
||||||
needs_stripe_new = !h->s;
|
if (!h->s) {
|
||||||
if (needs_stripe_new) {
|
|
||||||
if (ec_new_stripe_alloc(c, h)) {
|
if (ec_new_stripe_alloc(c, h)) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
bch_err(c, "failed to allocate new stripe");
|
bch_err(c, "failed to allocate new stripe");
|
||||||
@ -1670,30 +1680,53 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (h->s->allocated)
|
||||||
|
goto allocated;
|
||||||
|
|
||||||
|
if (h->s->have_existing_stripe)
|
||||||
|
goto alloc_existing;
|
||||||
|
|
||||||
|
/* First, try to allocate a full stripe: */
|
||||||
|
ret = new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?:
|
||||||
|
__bch2_ec_stripe_head_reserve(trans, h);
|
||||||
|
if (!ret)
|
||||||
|
goto allocated;
|
||||||
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
|
||||||
|
bch2_err_matches(ret, ENOMEM))
|
||||||
|
goto err;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try reserve a new stripe before reusing an
|
* Not enough buckets available for a full stripe: we must reuse an
|
||||||
* existing stripe. This will prevent unnecessary
|
* existing stripe:
|
||||||
* read amplification during write oriented workloads.
|
|
||||||
*/
|
*/
|
||||||
ret = 0;
|
while (1) {
|
||||||
if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
|
|
||||||
ret = __bch2_ec_stripe_head_reserve(trans, h);
|
|
||||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
if (ret && needs_stripe_new)
|
|
||||||
ret = __bch2_ec_stripe_head_reuse(trans, h);
|
ret = __bch2_ec_stripe_head_reuse(trans, h);
|
||||||
if (ret)
|
if (!ret)
|
||||||
goto err;
|
break;
|
||||||
|
if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl)
|
||||||
if (!h->s->allocated) {
|
ret = -BCH_ERR_stripe_alloc_blocked;
|
||||||
ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
|
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
|
||||||
if (ret)
|
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
h->s->allocated = true;
|
/* XXX freelist_wait? */
|
||||||
|
closure_wait(&c->freelist_wait, cl);
|
||||||
|
waiting = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (waiting)
|
||||||
|
closure_wake_up(&c->freelist_wait);
|
||||||
|
alloc_existing:
|
||||||
|
/*
|
||||||
|
* Retry allocating buckets, with the reserve watermark for this
|
||||||
|
* particular write:
|
||||||
|
*/
|
||||||
|
ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
allocated:
|
||||||
|
h->s->allocated = true;
|
||||||
|
BUG_ON(!h->s->idx);
|
||||||
|
|
||||||
BUG_ON(trans->restarted);
|
BUG_ON(trans->restarted);
|
||||||
return h;
|
return h;
|
||||||
err:
|
err:
|
||||||
|
@ -93,6 +93,7 @@
|
|||||||
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
|
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
|
||||||
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
|
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
|
||||||
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
|
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
|
||||||
|
x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
|
||||||
x(BCH_ERR_invalid, invalid_sb) \
|
x(BCH_ERR_invalid, invalid_sb) \
|
||||||
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
|
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
|
||||||
x(BCH_ERR_invalid_sb, invalid_sb_version) \
|
x(BCH_ERR_invalid_sb, invalid_sb_version) \
|
||||||
|
Loading…
Reference in New Issue
Block a user