bcachefs: RESERVE_stripe

Rework stripe creation path - new algorithm for deciding when to create
new stripes or reuse existing stripes.

We add a new allocation watermark, RESERVE_stripe, above RESERVE_none.
Then we always try to create a new stripe by doing RESERVE_stripe
allocations; if this fails, we reuse an existing stripe and allocate
buckets for it with the reserve watermark for the given write
(RESERVE_none or RESERVE_movinggc).

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-03-02 01:54:17 -05:00
parent d57c9add59
commit e84face6f0
5 changed files with 60 additions and 22 deletions

View File

@ -216,7 +216,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
u64 free = max_t(s64, 0, u64 free = max_t(s64, 0,
u.d[BCH_DATA_free].buckets u.d[BCH_DATA_free].buckets
+ u.d[BCH_DATA_need_discard].buckets + u.d[BCH_DATA_need_discard].buckets
- bch2_dev_buckets_reserved(ca, RESERVE_none)); - bch2_dev_buckets_reserved(ca, RESERVE_stripe));
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
} }

View File

@ -22,7 +22,8 @@ struct ec_bucket_buf;
x(btree_movinggc) \ x(btree_movinggc) \
x(btree) \ x(btree) \
x(movinggc) \ x(movinggc) \
x(none) x(none) \
x(stripe)
enum alloc_reserve { enum alloc_reserve {
#define x(name) RESERVE_##name, #define x(name) RESERVE_##name,

View File

@ -157,6 +157,9 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reser
switch (reserve) { switch (reserve) {
case RESERVE_NR: case RESERVE_NR:
unreachable(); unreachable();
case RESERVE_stripe:
reserved += ca->mi.nbuckets >> 6;
fallthrough;
case RESERVE_none: case RESERVE_none:
reserved += ca->mi.nbuckets >> 6; reserved += ca->mi.nbuckets >> 6;
fallthrough; fallthrough;

View File

@ -1569,6 +1569,17 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
BUG_ON(h->s->existing_stripe.size != h->blocksize); BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors); BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
/*
* Free buckets we initially allocated - they might conflict with
* blocks from the stripe we're reusing:
*/
for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) {
bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
h->s->blocks[i] = 0;
}
memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) { if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
__set_bit(i, h->s->blocks_gotten); __set_bit(i, h->s->blocks_gotten);
@ -1649,8 +1660,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct ec_stripe_head *h; struct ec_stripe_head *h;
bool waiting = false;
int ret; int ret;
bool needs_stripe_new;
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve); h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve);
if (!h) if (!h)
@ -1658,8 +1669,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
if (IS_ERR_OR_NULL(h)) if (IS_ERR_OR_NULL(h))
return h; return h;
needs_stripe_new = !h->s; if (!h->s) {
if (needs_stripe_new) {
if (ec_new_stripe_alloc(c, h)) { if (ec_new_stripe_alloc(c, h)) {
ret = -ENOMEM; ret = -ENOMEM;
bch_err(c, "failed to allocate new stripe"); bch_err(c, "failed to allocate new stripe");
@ -1670,30 +1680,53 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
BUG(); BUG();
} }
if (h->s->allocated)
goto allocated;
if (h->s->have_existing_stripe)
goto alloc_existing;
/* First, try to allocate a full stripe: */
ret = new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h);
if (!ret)
goto allocated;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, ENOMEM))
goto err;
/* /*
* Try reserve a new stripe before reusing an * Not enough buckets available for a full stripe: we must reuse an
* existing stripe. This will prevent unnecessary * existing stripe:
* read amplification during write oriented workloads.
*/ */
ret = 0; while (1) {
if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
ret = __bch2_ec_stripe_head_reserve(trans, h);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto err;
if (ret && needs_stripe_new)
ret = __bch2_ec_stripe_head_reuse(trans, h); ret = __bch2_ec_stripe_head_reuse(trans, h);
if (ret) if (!ret)
goto err; break;
if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl)
if (!h->s->allocated) { ret = -BCH_ERR_stripe_alloc_blocked;
ret = new_stripe_alloc_buckets(trans, h, reserve, cl); if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
if (ret)
goto err; goto err;
h->s->allocated = true; /* XXX freelist_wait? */
closure_wait(&c->freelist_wait, cl);
waiting = true;
} }
if (waiting)
closure_wake_up(&c->freelist_wait);
alloc_existing:
/*
* Retry allocating buckets, with the reserve watermark for this
* particular write:
*/
ret = new_stripe_alloc_buckets(trans, h, reserve, cl);
if (ret)
goto err;
allocated:
h->s->allocated = true;
BUG_ON(!h->s->idx);
BUG_ON(trans->restarted); BUG_ON(trans->restarted);
return h; return h;
err: err:

View File

@ -93,6 +93,7 @@
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \ x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \ x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \ x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
x(BCH_ERR_invalid, invalid_sb) \ x(BCH_ERR_invalid, invalid_sb) \
x(BCH_ERR_invalid_sb, invalid_sb_magic) \ x(BCH_ERR_invalid_sb, invalid_sb_magic) \
x(BCH_ERR_invalid_sb, invalid_sb_version) \ x(BCH_ERR_invalid_sb, invalid_sb_version) \