bcachefs: Make allocator stuck timeout configurable, ratelimit messages

Limit these messages to once every 2 minutes to avoid spamming logs;
with multiple devices the output can be quite significant.

Also, up the default timeout to 30 seconds from 10 seconds.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-08-07 13:58:57 -04:00
parent 6d496e02b4
commit cecf72798b
8 changed files with 45 additions and 12 deletions

View File

@ -1758,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats));
}
void bch2_print_allocator_stuck(struct bch_fs *c)
static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
{
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n");
printbuf_indent_add(&buf, 2);
@ -1792,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
{
if (c->allocator_last_stuck &&
time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
return 0;
return c->opts.allocator_stuck_timeout * HZ;
}
void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
unsigned t = allocator_wait_timeout(c);
if (t && closure_sync_timeout(cl, t)) {
c->allocator_last_stuck = jiffies;
bch2_print_allocator_stuck(c);
}
closure_sync(cl);
}

View File

@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
void bch2_print_allocator_stuck(struct bch_fs *);
void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
if (cl->closure_get_happened)
__bch2_wait_on_allocator(c, cl);
}
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */

View File

@ -893,6 +893,8 @@ struct bch_fs {
struct bch_fs_usage_base __percpu *usage;
u64 __percpu *online_reserved;
unsigned long allocator_last_stuck;
struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */

View File

@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
struct bch_sb, flags[5], 16, 32);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{

View File

@ -126,11 +126,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
if (closure_nr_remaining(&cl) != 1) {
bch2_trans_unlock_long(trans);
if (closure_sync_timeout(&cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&cl);
}
bch2_wait_on_allocator(c, &cl);
}
return ret;

View File

@ -1503,10 +1503,7 @@ static void __bch2_write(struct bch_write_op *op)
if ((op->flags & BCH_WRITE_SYNC) ||
(!(op->flags & BCH_WRITE_SUBMITTED) &&
!(op->flags & BCH_WRITE_IN_WORKER))) {
if (closure_sync_timeout(&op->cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&op->cl);
}
bch2_wait_on_allocator(c, &op->cl);
__bch2_write_index(op);

View File

@ -391,6 +391,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
NULL, "Log transaction function names in journal") \
x(allocator_stuck_timeout, u16, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(0, U16_MAX), \
BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \
NULL, "Default timeout in seconds for stuck allocator messages")\
x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \

View File

@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
!BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
}
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {