bcachefs: Improve trans->extra_journal_entries

Instead of using a darray, we now allocate journal entries for the
transaction commit path with our normal bump allocator - with an inlined
fastpath, and using btree_transaction_stats to remember how much to
initially allocate so as to avoid transaction restarts.

This is prep work for converting write buffer updates to use this
mechanism.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-12-10 16:48:22 -05:00
parent e4e49375a8
commit 24de63dacb
7 changed files with 98 additions and 63 deletions

View File

@ -638,6 +638,7 @@ struct btree_transaction_stats {
struct mutex lock;
unsigned nr_max_paths;
unsigned wb_updates_size;
unsigned journal_entries_size;
unsigned max_mem;
char *max_paths_text;
};

View File

@ -2794,6 +2794,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->restart_count++;
trans->mem_top = 0;
trans->journal_entries = NULL;
trans_for_each_path(trans, path) {
path->should_be_locked = false;
@ -2914,6 +2915,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
if (s) {
trans->nr_max_paths = s->nr_max_paths;
trans->wb_updates_size = s->wb_updates_size;
trans->journal_entries_size = s->journal_entries_size;
}
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
@ -3000,8 +3002,6 @@ void bch2_trans_put(struct btree_trans *trans)
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
}
kfree(trans->extra_journal_entries.data);
if (trans->fs_usage_deltas) {
if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
REPLICAS_DELTA_LIST_MAX)

View File

@ -726,15 +726,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
goto fatal_err;
}
if (unlikely(trans->extra_journal_entries.nr)) {
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
trans->extra_journal_entries.data,
trans->extra_journal_entries.nr);
trans->journal_res.offset += trans->extra_journal_entries.nr;
trans->journal_res.u64s -= trans->extra_journal_entries.nr;
}
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
struct journal *j = &c->journal;
struct jset_entry *entry;
@ -772,6 +763,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
bkey_copy((struct bkey_i *) entry->start, &wb->k);
}
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
trans->journal_entries,
trans->journal_entries_u64s);
trans->journal_res.offset += trans->journal_entries_u64s;
trans->journal_res.u64s -= trans->journal_entries_u64s;
if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq;
}
@ -1036,7 +1034,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (!trans->nr_updates &&
!trans->nr_wb_updates &&
!trans->extra_journal_entries.nr)
!trans->journal_entries_u64s)
goto out_reset;
ret = bch2_trans_commit_run_triggers(trans);
@ -1088,7 +1086,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
trans->journal_u64s = trans->extra_journal_entries.nr;
trans->journal_u64s = trans->journal_entries_u64s;
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
if (trans->journal_transaction_names)
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);

View File

@ -410,7 +410,9 @@ struct btree_trans {
* extent:
*/
unsigned extra_journal_res;
unsigned nr_max_paths;
u8 nr_max_paths;
u16 journal_entries_u64s;
u16 journal_entries_size;
unsigned long paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)];
@ -426,7 +428,7 @@ struct btree_trans {
/* update path: */
struct btree_trans_commit_hook *hooks;
darray_u64 extra_journal_entries;
struct jset_entry *journal_entries;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;

View File

@ -545,6 +545,34 @@ static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans,
return bch2_btree_insert_trans(trans, btree, n, 0);
}
struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
{
unsigned new_top = trans->journal_entries_u64s + u64s;
unsigned old_size = trans->journal_entries_size;
if (new_top > trans->journal_entries_size) {
trans->journal_entries_size = roundup_pow_of_two(new_top);
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (s)
s->journal_entries_size = trans->journal_entries_size;
}
struct jset_entry *n =
bch2_trans_kmalloc_nomemzero(trans,
trans->journal_entries_size * sizeof(u64));
if (IS_ERR(n))
return ERR_CAST(n);
if (trans->journal_entries)
memcpy(n, trans->journal_entries, old_size * sizeof(u64));
trans->journal_entries = n;
struct jset_entry *e = btree_trans_journal_entries_top(trans);
trans->journal_entries_u64s = new_top;
return e;
}
int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
@ -823,41 +851,17 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
return bch2_trans_update_buffered(trans, btree, &k);
}
__printf(2, 0)
static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s)
{
struct printbuf buf = PRINTBUF;
struct jset_entry_log *l;
unsigned u64s;
int ret;
prt_vprintf(&buf, fmt, args);
ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
int ret = PTR_ERR_OR_ZERO(e);
if (ret)
goto err;
return ret;
u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
ret = darray_make_room(entries, jset_u64s(u64s));
if (ret)
goto err;
l = (void *) &darray_top(*entries);
l->entry.u64s = cpu_to_le16(u64s);
l->entry.btree_id = 0;
l->entry.level = 1;
l->entry.type = BCH_JSET_ENTRY_log;
l->entry.pad[0] = 0;
l->entry.pad[1] = 0;
l->entry.pad[2] = 0;
memcpy(l->d, buf.buf, buf.pos);
while (buf.pos & 7)
l->d[buf.pos++] = '\0';
entries->nr += jset_u64s(u64s);
err:
printbuf_exit(&buf);
return ret;
struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry);
journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s);
memcpy(l->d, buf->buf, buf->pos);
return 0;
}
__printf(3, 0)
@ -865,16 +869,32 @@ static int
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
va_list args)
{
int ret;
struct printbuf buf = PRINTBUF;
prt_vprintf(&buf, fmt, args);
unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos);
int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
goto err;
if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s));
if (ret)
goto err;
struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
memcpy(l->d, buf.buf, buf.pos);
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_do(c, NULL, NULL,
BCH_TRANS_COMMIT_lazy_rw|commit_flags,
__bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args));
__bch2_trans_log_msg(trans, &buf, u64s));
}
err:
printbuf_exit(&buf);
return ret;
}

View File

@ -101,11 +101,28 @@ int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, enum btree_update_flags);
int __must_check bch2_trans_update_seq(struct btree_trans *, u64, struct btree_iter *,
struct bkey_i *, enum btree_update_flags);
int __must_check bch2_trans_update_buffered(struct btree_trans *,
enum btree_id, struct bkey_i *);
struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans)
{
return (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
}
static inline struct jset_entry *
bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
{
if (!trans->journal_entries ||
trans->journal_entries_u64s + u64s > trans->journal_entries_size)
return __bch2_trans_jset_entry_alloc(trans, u64s);
struct jset_entry *e = btree_trans_journal_entries_top(trans);
trans->journal_entries_u64s += u64s;
return e;
}
void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *, unsigned);
@ -171,10 +188,10 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
trans->extra_journal_res = 0;
trans->nr_updates = 0;
trans->journal_entries_u64s = 0;
trans->nr_wb_updates = 0;
trans->wb_updates = NULL;
trans->hooks = NULL;
trans->extra_journal_entries.nr = 0;
if (trans->fs_usage_deltas) {
trans->fs_usage_deltas->used = 0;

View File

@ -557,16 +557,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
struct btree_update *as)
{
struct bkey_i *k;
int ret;
ret = darray_make_room(&trans->extra_journal_entries, as->journal_u64s);
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, as->journal_u64s);
int ret = PTR_ERR_OR_ZERO(e);
if (ret)
return ret;
memcpy(&darray_top(trans->extra_journal_entries),
as->journal_entries,
as->journal_u64s * sizeof(u64));
trans->extra_journal_entries.nr += as->journal_u64s;
memcpy(e, as->journal_entries, as->journal_u64s * sizeof(u64));
trans->journal_pin = &as->journal;
@ -2188,16 +2185,16 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
} else {
BUG_ON(btree_node_root(c, b) != b);
ret = darray_make_room(&trans->extra_journal_entries,
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s));
ret = PTR_ERR_OR_ZERO(e);
if (ret)
return ret;
journal_entry_set((void *) &darray_top(trans->extra_journal_entries),
journal_entry_set(e,
BCH_JSET_ENTRY_btree_root,
b->c.btree_id, b->c.level,
new_key, new_key->k.u64s);
trans->extra_journal_entries.nr += jset_u64s(new_key->k.u64s);
}
ret = bch2_trans_commit(trans, NULL, NULL, commit_flags);