From 30418de09e6bda5478a6cfb7c1a54e128b7e2a22 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 Nov 2023 19:57:09 -0500 Subject: [PATCH 001/226] bcachefs: Flush fsck errors before running twice It's confusing if we run fsck a second time (in debug mode, to verify the second run is clean), but errors are still ratelimited from the first run. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 5cf7d0532002..70add8274a95 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -924,6 +924,8 @@ int bch2_fs_recovery(struct bch_fs *c) test_bit(BCH_FS_ERRORS_FIXED, &c->flags) && !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) && !test_bit(BCH_FS_ERROR, &c->flags)) { + bch2_flush_fsck_errs(c); + bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); clear_bit(BCH_FS_ERRORS_FIXED, &c->flags); From e7f7ddedd67d4abc1ed24dbf33816f2b0a953c02 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 22 Nov 2023 18:30:43 -0500 Subject: [PATCH 002/226] bcachefs: Add extra verbose logging for ro path Also log time waiting for c->writes references to be dropped; this will help in debugging why unmounts are taking longer than they should. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 818ec467a06b..6e38e2d4abf2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -276,6 +276,8 @@ void bch2_fs_read_only(struct bch_fs *c) BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); + bch_verbose(c, "going read-only"); + /* * Block new foreground-end write operations from starting - any new * writes will return -EROFS: @@ -303,13 +305,21 @@ void bch2_fs_read_only(struct bch_fs *c) test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) || test_bit(BCH_FS_EMERGENCY_RO, &c->flags)); + bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); + if (writes_disabled) + bch_verbose(c, "finished waiting for writes to stop"); + __bch2_fs_read_only(c); wait_event(bch2_read_only_wait, test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); + if (!writes_disabled) + bch_verbose(c, "finished waiting for writes to stop"); + clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); clear_bit(BCH_FS_GOING_RO, &c->flags); + clear_bit(BCH_FS_RW, &c->flags); if (!bch2_journal_error(&c->journal) && !test_bit(BCH_FS_ERROR, &c->flags) && @@ -324,9 +334,9 @@ void bch2_fs_read_only(struct bch_fs *c) bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); + } else { + bch_verbose(c, "done going read-only, filesystem not clean"); } - - clear_bit(BCH_FS_RW, &c->flags); } static void bch2_fs_read_only_work(struct work_struct *work) From 8b58623f5b911feacee2e55b6e378b2e0858e677 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 Nov 2023 21:24:24 -0500 Subject: [PATCH 003/226] bcachefs: Improved backpointer messages in fsck When we have a key to print, we should print it. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 50 ++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index e0c5cd119acc..f7bcde976f92 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1023,25 +1023,6 @@ static bool dirent_points_to_inode(struct bkey_s_c_dirent d, : le64_to_cpu(d.v->d_inum) == inode->bi_inum; } -static int inode_backpointer_exists(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) -{ - struct btree_iter iter; - struct bkey_s_c_dirent d; - int ret; - - d = dirent_get_by_pos(trans, &iter, - SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); - ret = bkey_err(d); - if (ret) - return bch2_err_matches(ret, ENOENT) ? 0 : ret; - - ret = dirent_points_to_inode(d, inode); - bch2_trans_iter_exit(trans, &iter); - return ret; -} - static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; @@ -1553,8 +1534,8 @@ static int check_dirent_target(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_i_dirent *n; - bool backpointer_exists = true; struct printbuf buf = PRINTBUF; + struct btree_iter bp_iter = { NULL }; int ret = 0; if (!target->bi_dir && @@ -1568,25 +1549,37 @@ static int check_dirent_target(struct btree_trans *trans, } if (!inode_points_to_dirent(target, d)) { - ret = inode_backpointer_exists(trans, target, d.k->p.snapshot); - if (ret < 0) + struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, + SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); + ret = bkey_err(bp_dirent); + if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - backpointer_exists = ret; + bool backpointer_exists = !ret; ret = 0; + bch2_bkey_val_to_text(&buf, c, d.s_c); + prt_newline(&buf); + if (backpointer_exists) + bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); + if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists, c, inode_dir_multiple_links, - "directory %llu with multiple links", - target->bi_inum)) { + "directory %llu:%u with multiple links\n%s", + target->bi_inum, target_snapshot, buf.buf)) { ret = __remove_dirent(trans, d.k->p); goto out; } + /* + * hardlinked file with nlink 0: + * We're just adjusting nlink here so check_nlinks() will pick + * it up, it ignores inodes with nlink 0 + */ if (fsck_err_on(backpointer_exists && !target->bi_nlink, c, inode_multiple_links_but_nlink_0, - "inode %llu type %s has multiple links but i_nlink 0", - target->bi_inum, bch2_d_types[d.v->d_type])) { + "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", + target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { target->bi_nlink++; target->bi_flags &= ~BCH_INODE_unlinked; @@ -1660,6 +1653,7 @@ static int check_dirent_target(struct btree_trans *trans, out: err: fsck_err: + bch2_trans_iter_exit(trans, &bp_iter); printbuf_exit(&buf); bch_err_fn(c, ret); return ret; @@ -2401,7 +2395,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); if (ret < 0) { - bch_err(c, "error in fsck: btree error %i while walking inodes", ret); + bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); return ret; } From ecf8a74dab1db2d3d208ad08e4104ebdc734911b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 15:46:50 -0500 Subject: [PATCH 004/226] bcachefs: kill INODE_LOCK, use lock_two_nondirectories() In an ideal world, we'd have a common helper that could be used for sorting a list of inodes into the correct lock order, and then the same lock ordering could be used for any type of inode lock, not just i_rwsem. But the lock ordering rules for i_rwsem are a bit complicated, so - abandon that dream for now and do it the more standard way. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 6 ++++-- fs/bcachefs/fs.h | 9 ++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index b0e8144ec550..31f40e587a4f 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -861,7 +861,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, abs(pos_src - pos_dst) < len) return -EINVAL; - bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); + lock_two_nondirectories(&src->v, &dst->v); + bch2_lock_inodes(INODE_PAGECACHE_BLOCK, src, dst); inode_dio_wait(&src->v); inode_dio_wait(&dst->v); @@ -914,7 +915,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, ret = bch2_flush_inode(c, dst); err: bch2_quota_reservation_put(c, dst, "a_res); - bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); + bch2_unlock_inodes(INODE_PAGECACHE_BLOCK, src, dst); + unlock_two_nondirectories(&src->v, &dst->v); return bch2_err_class(ret); } diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 5edf1d4b9e6b..c3af7225ff69 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -77,9 +77,8 @@ static inline int ptrcmp(void *l, void *r) } enum bch_inode_lock_op { - INODE_LOCK = (1U << 0), - INODE_PAGECACHE_BLOCK = (1U << 1), - INODE_UPDATE_LOCK = (1U << 2), + INODE_PAGECACHE_BLOCK = (1U << 0), + INODE_UPDATE_LOCK = (1U << 1), }; #define bch2_lock_inodes(_locks, ...) \ @@ -91,8 +90,6 @@ do { \ \ for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if ((_locks) & INODE_LOCK) \ - down_write_nested(&a[i]->v.i_rwsem, i); \ if ((_locks) & INODE_PAGECACHE_BLOCK) \ bch2_pagecache_block_get(a[i]);\ if ((_locks) & INODE_UPDATE_LOCK) \ @@ -109,8 +106,6 @@ do { \ \ for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if ((_locks) & INODE_LOCK) \ - up_write(&a[i]->v.i_rwsem); \ if ((_locks) & INODE_PAGECACHE_BLOCK) \ bch2_pagecache_block_put(a[i]);\ if ((_locks) & INODE_UPDATE_LOCK) \ From 359d1bad1b5cc047e26d8fefa0b368df89f53acb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 17:24:54 -0500 Subject: [PATCH 005/226] bcachefs: Check for unlinked inodes not on deleted list Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 27 +++++++++++++++++++++++++-- fs/bcachefs/sb-errors_types.h | 3 ++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f7bcde976f92..d95940075757 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -826,6 +826,18 @@ static int hash_check_key(struct btree_trans *trans, goto out; } +static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) +{ + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); + int ret = bkey_err(k); + if (ret) + return ret; + + bch2_trans_iter_exit(trans, &iter); + return k.k->type == KEY_TYPE_set; +} + static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -867,7 +879,7 @@ static int check_inode(struct btree_trans *trans, c, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); - return -EINVAL; + return -BCH_ERR_fsck_repair_unimplemented; } if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && @@ -890,6 +902,18 @@ static int check_inode(struct btree_trans *trans, return 0; } + if (u.bi_flags & BCH_INODE_unlinked && + c->sb.version >= bcachefs_metadata_version_deleted_inodes) { + ret = check_inode_deleted_list(trans, k.k->p); + if (ret < 0) + return ret; + + fsck_err_on(ret, c, unlinked_inode_not_on_deleted_list, + "inode %llu:%u unlinked, but not on deleted list", + u.bi_inum, k.k->p.snapshot); + ret = 0; + } + if (u.bi_flags & BCH_INODE_unlinked && (!c->sb.clean || fsck_err(c, inode_unlinked_but_clean, @@ -976,7 +1000,6 @@ static int check_inode(struct btree_trans *trans, return ret; } -noinline_for_stack int bch2_check_inodes(struct bch_fs *c) { bool full = c->opts.fsck; diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 3504c2d09c29..e7be1f9bdaab 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -248,7 +248,8 @@ x(root_inode_not_dir, 240) \ x(dir_loop, 241) \ x(hash_table_key_duplicate, 242) \ - x(hash_table_key_wrong_offset, 243) + x(hash_table_key_wrong_offset, 243) \ + x(unlinked_inode_not_on_deleted_list, 244) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, From 7d9ae04e3987648b99b9ac8036178a75fb913dd6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 17:28:16 -0500 Subject: [PATCH 006/226] bcachefs: Fix locking when checking freespace btree On transaction restart, we weren't re-validating the hole we saw. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 60 ++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1fec0e67891f..6002bd0cf89c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1212,7 +1212,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, return ret; } -static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_trans *trans, +static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter) { struct bch_fs *c = trans->c; @@ -1271,24 +1271,6 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr goto out; } -static int bch2_check_discard_freespace_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos end) -{ - if (!btree_id_is_extents(iter->btree_id)) { - return __bch2_check_discard_freespace_key(trans, iter); - } else { - int ret = 0; - - while (!bkey_eq(iter->pos, end) && - !(ret = btree_trans_too_many_iters(trans) ?: - __bch2_check_discard_freespace_key(trans, iter))) - bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); - - return ret; - } -} - /* * We've already checked that generation numbers in the bucket_gens btree are * valid for buckets that exist; this just checks for keys for nonexistent @@ -1445,12 +1427,40 @@ int bch2_check_alloc_info(struct bch_fs *c) ret = for_each_btree_key2(trans, iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_PREFETCH, k, - bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?: - for_each_btree_key2(trans, iter, - BTREE_ID_freespace, POS_MIN, - BTREE_ITER_PREFETCH, k, - bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?: - for_each_btree_key_commit(trans, iter, + bch2_check_discard_freespace_key(trans, &iter)); + if (ret) + goto err; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, + BTREE_ITER_PREFETCH); + while (1) { + bch2_trans_begin(trans); + k = bch2_btree_iter_peek(&iter); + if (!k.k) + break; + + ret = bkey_err(k) ?: + bch2_check_discard_freespace_key(trans, &iter); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } + if (ret) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, k); + + bch_err(c, "while checking %s", buf.buf); + printbuf_exit(&buf); + break; + } + + bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); + } + bch2_trans_iter_exit(trans, &iter); + if (ret) + goto err; + + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, From fbf9270817249bb00044de74587155f307d1003f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 21:40:58 -0500 Subject: [PATCH 007/226] bcachefs: Print old version when scanning for old metadata Also: we should be using bch2_fs_read_write_early() Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 70add8274a95..7b625fa37d92 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -995,8 +995,12 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_move_stats_init(&stats, "recovery"); - bch_info(c, "scanning for old btree nodes"); - ret = bch2_fs_read_write(c) ?: + struct printbuf buf = PRINTBUF; + bch2_version_to_text(&buf, c->sb.version_min); + bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf); + printbuf_exit(&buf); + + ret = bch2_fs_read_write_early(c) ?: bch2_scan_old_btree_nodes(c, &stats); if (ret) goto err; From 09e0153b72bfba618e6ceedb5dd883cdeed911af Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Nov 2023 20:05:45 -0500 Subject: [PATCH 008/226] bcachefs: Fix warning when building in userspace bch_err() doesn't reference the fs arg in userspace Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 12907beda98c..9cedc1755d34 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -361,7 +361,6 @@ noinline static int btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, struct btree_path *path, unsigned new_u64s) { - struct bch_fs *c = trans->c; struct btree_insert_entry *i; struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_i *new_k; @@ -372,7 +371,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { - bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", + bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(path->btree_id), new_u64s); return -BCH_ERR_ENOMEM_btree_key_cache_insert; } From fa5df9e7d5a85507cc01f32815854993983c1ff5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Nov 2023 16:51:06 -0500 Subject: [PATCH 009/226] bcachefs: Include average write size in sysfs journal_debug Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 22 +++++++++++++--------- fs/bcachefs/journal_io.c | 2 ++ fs/bcachefs/journal_types.h | 1 + 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 8cf238be6213..5df417cd6743 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1262,6 +1262,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) union journal_res_state s; struct bch_dev *ca; unsigned long now = jiffies; + u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; u64 seq; unsigned i; @@ -1275,20 +1276,23 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size); prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); - prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); + prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk); - prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); - prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]); - prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); + prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); + prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]); + prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes); - prt_printf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes); - prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim); + prt_printf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes); + prt_printf(out, "average write size:\t"); + prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0); + prt_newline(out); + prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim); prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim); prt_printf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked); - prt_printf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now) + prt_printf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now) ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0); - prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); - prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]); + prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); + prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]); prt_printf(out, "current entry:\t\t"); switch (s.cur_entry_offset) { diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 3eb6c3f62a81..62d409f793d3 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1890,6 +1890,8 @@ CLOSURE_CALLBACK(bch2_journal_write) if (ret) goto err; + j->entry_bytes_written += vstruct_bytes(w->data); + while (1) { spin_lock(&j->lock); ret = journal_write_alloc(j, w); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index a756b69582e3..1acce6ecfca0 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -267,6 +267,7 @@ struct journal { u64 nr_flush_writes; u64 nr_noflush_writes; + u64 entry_bytes_written; struct bch2_time_stats *flush_write_time; struct bch2_time_stats *noflush_write_time; From df8e13ccf3c0cc2b16e931f71ef69834db71eda9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 7 Nov 2023 12:32:50 -0500 Subject: [PATCH 010/226] bcachefs: Add an assertion in bch2_journal_pin_set() Previously, bch2_journal_pin_set() would silently ignore a request to pin a journal sequence number that was no longer dirty, because it was used internally by bch2_journal_pin_copy() which could race with the src pin being flushed. Split these apart so that we can properly assert that @seq is a currently dirty journal sequence number - this is almost always a bug. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 59 ++++++++++++++++++++++++++++------- fs/bcachefs/journal_reclaim.h | 15 +++------ 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ec712104addb..f2668d9d8e24 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -367,15 +367,34 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) return JOURNAL_PIN_other; } -void bch2_journal_pin_set(struct journal *j, u64 seq, +static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, struct journal_entry_pin *pin, - journal_pin_flush_fn flush_fn) + journal_pin_flush_fn flush_fn, + enum journal_pin_type type) +{ + struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq); + + atomic_inc(&pin_list->count); + pin->seq = seq; + pin->flush = flush_fn; + + if (flush_fn) + list_add(&pin->list, &pin_list->list[type]); + else + list_add(&pin->list, &pin_list->flushed); +} + +void bch2_journal_pin_copy(struct journal *j, + struct journal_entry_pin *dst, + struct journal_entry_pin *src, + journal_pin_flush_fn flush_fn) { - struct journal_entry_pin_list *pin_list; bool reclaim; spin_lock(&j->lock); + u64 seq = READ_ONCE(src->seq); + if (seq < journal_last_seq(j)) { /* * bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on @@ -387,18 +406,34 @@ void bch2_journal_pin_set(struct journal *j, u64 seq, return; } - pin_list = journal_seq_pin(j, seq); + reclaim = __journal_pin_drop(j, dst); + + bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn)); + + if (reclaim) + bch2_journal_reclaim_fast(j); + spin_unlock(&j->lock); + + /* + * If the journal is currently full, we might want to call flush_fn + * immediately: + */ + journal_wake(j); +} + +void bch2_journal_pin_set(struct journal *j, u64 seq, + struct journal_entry_pin *pin, + journal_pin_flush_fn flush_fn) +{ + bool reclaim; + + spin_lock(&j->lock); + + BUG_ON(seq < journal_last_seq(j)); reclaim = __journal_pin_drop(j, pin); - atomic_inc(&pin_list->count); - pin->seq = seq; - pin->flush = flush_fn; - - if (flush_fn) - list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]); - else - list_add(&pin->list, &pin_list->flushed); + bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn)); if (reclaim) bch2_journal_reclaim_fast(j); diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h index 494d1a6eddb0..7b15d682a0f5 100644 --- a/fs/bcachefs/journal_reclaim.h +++ b/fs/bcachefs/journal_reclaim.h @@ -47,17 +47,10 @@ static inline void bch2_journal_pin_add(struct journal *j, u64 seq, bch2_journal_pin_set(j, seq, pin, flush_fn); } -static inline void bch2_journal_pin_copy(struct journal *j, - struct journal_entry_pin *dst, - struct journal_entry_pin *src, - journal_pin_flush_fn flush_fn) -{ - /* Guard against racing with journal_pin_drop(src): */ - u64 seq = READ_ONCE(src->seq); - - if (seq) - bch2_journal_pin_add(j, seq, dst, flush_fn); -} +void bch2_journal_pin_copy(struct journal *, + struct journal_entry_pin *, + struct journal_entry_pin *, + journal_pin_flush_fn); static inline void bch2_journal_pin_update(struct journal *j, u64 seq, struct journal_entry_pin *pin, From 3eedfe1af9beb6c65eca1080298086e6e0031428 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Nov 2023 13:19:00 -0500 Subject: [PATCH 011/226] bcachefs: Journal pins must always have a flush_fn flush_fn is how we identify journal pins in debugfs - this is a debugging aid. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 9 ++++++++- fs/bcachefs/btree_update_interior.c | 21 ++++++++++++++++++--- fs/bcachefs/btree_write_buffer.c | 18 +++++++----------- fs/bcachefs/journal_reclaim.c | 12 +++++++----- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 9cedc1755d34..3d66d9c595e2 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -840,6 +840,12 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, return -EINVAL; } +static int bch2_trans_commit_journal_pin_flush(struct journal *j, + struct journal_entry_pin *_pin, u64 seq) +{ + return 0; +} + /* * Get journal reservation, take write locks, and attempt to do btree update(s): */ @@ -883,7 +889,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags if (!ret && trans->journal_pin) bch2_journal_pin_add(&c->journal, trans->journal_res.seq, - trans->journal_pin, NULL); + trans->journal_pin, + bch2_trans_commit_journal_pin_flush); /* * Drop journal reservation after dropping write locks, since dropping diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 239fcc3c7c99..6482c07ae479 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -815,6 +815,12 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) mutex_unlock(&c->btree_interior_update_lock); } +static int bch2_update_reparent_journal_pin_flush(struct journal *j, + struct journal_entry_pin *_pin, u64 seq) +{ + return 0; +} + static void btree_update_reparent(struct btree_update *as, struct btree_update *child) { @@ -825,7 +831,8 @@ static void btree_update_reparent(struct btree_update *as, child->b = NULL; child->mode = BTREE_INTERIOR_UPDATING_AS; - bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL); + bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, + bch2_update_reparent_journal_pin_flush); } static void btree_update_updated_root(struct btree_update *as, struct btree *b) @@ -934,6 +941,12 @@ static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct b b->ob.v[--b->ob.nr]; } +static int bch2_btree_update_will_free_node_journal_pin_flush(struct journal *j, + struct journal_entry_pin *_pin, u64 seq) +{ + return 0; +} + /* * @b is being split/rewritten: it may have pointers to not-yet-written btree * nodes and thus outstanding btree_updates - redirect @b's @@ -985,11 +998,13 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as, * when the new nodes are persistent and reachable on disk: */ w = btree_current_write(b); - bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL); + bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, + bch2_btree_update_will_free_node_journal_pin_flush); bch2_journal_pin_drop(&c->journal, &w->journal); w = btree_prev_write(b); - bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL); + bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, + bch2_btree_update_will_free_node_journal_pin_flush); bch2_journal_pin_drop(&c->journal, &w->journal); mutex_unlock(&c->btree_interior_update_lock); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 4e6241db518b..9609eb18f38d 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -11,6 +11,9 @@ #include +static int bch2_btree_write_buffer_journal_flush(struct journal *, + struct journal_entry_pin *, u64); + static int btree_write_buffered_key_cmp(const void *_l, const void *_r) { const struct btree_write_buffered_key *l = _l; @@ -150,7 +153,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f if (!locked && !mutex_trylock(&wb->flush_lock)) return 0; - bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL); + bch2_journal_pin_copy(j, &pin, &wb->journal_pin, + bch2_btree_write_buffer_journal_flush); bch2_journal_pin_drop(j, &wb->journal_pin); s = btree_write_buffer_switch(wb); @@ -252,16 +256,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f if (!i->journal_seq) continue; - if (i->journal_seq > pin.seq) { - struct journal_entry_pin pin2; - - memset(&pin2, 0, sizeof(pin2)); - - bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL); - bch2_journal_pin_drop(j, &pin); - bch2_journal_pin_copy(j, &pin, &pin2, NULL); - bch2_journal_pin_drop(j, &pin2); - } + bch2_journal_pin_update(j, i->journal_seq, &pin, + bch2_btree_write_buffer_journal_flush); ret = commit_do(trans, NULL, NULL, commit_flags| diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index f2668d9d8e24..64928e091d3b 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -374,14 +374,16 @@ static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, { struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq); + /* + * flush_fn is how we identify journal pins in debugfs, so must always + * exist, even if it doesn't do anything: + */ + BUG_ON(!flush_fn); + atomic_inc(&pin_list->count); pin->seq = seq; pin->flush = flush_fn; - - if (flush_fn) - list_add(&pin->list, &pin_list->list[type]); - else - list_add(&pin->list, &pin_list->flushed); + list_add(&pin->list, &pin_list->list[type]); } void bch2_journal_pin_copy(struct journal *j, From 066a26460bb209d987a138519fc32b3806c1288a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Nov 2023 22:07:42 -0500 Subject: [PATCH 012/226] bcachefs: track_event_change() This introduces a new helper for connecting time_stats to state changes, i.e. when taking journal reservations is blocked for some reason. We use this to track separately the different reasons the journal might be blocked - i.e. space in the journal full, or the journal pin fifo full. Also do some cleanup and improvements on the time stats code. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 +- fs/bcachefs/journal.c | 16 +--- fs/bcachefs/journal_io.c | 3 + fs/bcachefs/journal_reclaim.c | 25 +++--- fs/bcachefs/journal_types.h | 6 +- fs/bcachefs/super.c | 3 +- fs/bcachefs/util.c | 144 +++++++++++++++++++--------------- fs/bcachefs/util.h | 33 +++++++- 8 files changed, 141 insertions(+), 93 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index b62737fdf5ab..19bc88f05911 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -401,7 +401,9 @@ BCH_DEBUG_PARAMS_DEBUG() x(journal_flush_write) \ x(journal_noflush_write) \ x(journal_flush_seq) \ - x(blocked_journal) \ + x(blocked_journal_low_on_space) \ + x(blocked_journal_low_on_pin) \ + x(blocked_journal_max_in_flight) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(nocow_lock_contended) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5df417cd6743..63fb115da157 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -363,11 +363,6 @@ static int journal_entry_open(struct journal *j) } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); - if (j->res_get_blocked_start) - bch2_time_stats_update(j->blocked_time, - j->res_get_blocked_start); - j->res_get_blocked_start = 0; - mod_delayed_work(c->io_complete_wq, &j->write_work, msecs_to_jiffies(c->opts.journal_flush_delay)); @@ -467,15 +462,12 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); ret = journal_entry_open(j); - if (ret == JOURNAL_ERR_max_in_flight) + if (ret == JOURNAL_ERR_max_in_flight) { + track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], + &j->max_in_flight_start, true); trace_and_count(c, journal_entry_full, c); -unlock: - if ((ret && ret != JOURNAL_ERR_insufficient_devices) && - !j->res_get_blocked_start) { - j->res_get_blocked_start = local_clock() ?: 1; - trace_and_count(c, journal_full, c); } - +unlock: can_discard = j->can_discard; spin_unlock(&j->lock); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 62d409f793d3..30fb8e950613 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1604,6 +1604,9 @@ static CLOSURE_CALLBACK(journal_write_done) bch2_journal_reclaim_fast(j); bch2_journal_space_available(j); + track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], + &j->max_in_flight_start, false); + closure_wake_up(&w->wait); journal_wake(j); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 64928e091d3b..bd33a7c9634c 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -50,17 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, return available; } -static inline void journal_set_watermark(struct journal *j, bool low_on_space) +static inline void journal_set_watermark(struct journal *j) { - unsigned watermark = BCH_WATERMARK_stripe; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + bool low_on_space = j->space[journal_space_clean].total * 4 <= + j->space[journal_space_total].total; + bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4; + unsigned watermark = low_on_space || low_on_pin + ? BCH_WATERMARK_reclaim + : BCH_WATERMARK_stripe; - if (low_on_space) - watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); - if (fifo_free(&j->pin) < j->pin.size / 4) - watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); - - if (watermark == j->watermark) - return; + if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space], + &j->low_on_space_start, low_on_space) || + track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], + &j->low_on_pin_start, low_on_pin)) + trace_and_count(c, journal_full, c); swap(watermark, j->watermark); if (watermark > j->watermark) @@ -226,7 +230,7 @@ void bch2_journal_space_available(struct journal *j) else clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); - journal_set_watermark(j, clean * 4 <= total); + journal_set_watermark(j); out: j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; j->cur_entry_error = ret; @@ -833,6 +837,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) { + /* time_stats this */ bool did_work = false; if (!test_bit(JOURNAL_STARTED, &j->flags)) diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 1acce6ecfca0..2427cce64fed 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -262,16 +262,18 @@ struct journal { unsigned long last_flush_write; - u64 res_get_blocked_start; u64 write_start_time; u64 nr_flush_writes; u64 nr_noflush_writes; u64 entry_bytes_written; + u64 low_on_space_start; + u64 low_on_pin_start; + u64 max_in_flight_start; + struct bch2_time_stats *flush_write_time; struct bch2_time_stats *noflush_write_time; - struct bch2_time_stats *blocked_time; struct bch2_time_stats *flush_seq_time; #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6e38e2d4abf2..00db787a3883 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -661,7 +661,9 @@ static int bch2_fs_online(struct bch_fs *c) ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?: kobject_add(&c->internal, &c->kobj, "internal") ?: kobject_add(&c->opts_dir, &c->kobj, "options") ?: +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT kobject_add(&c->time_stats, &c->kobj, "time_stats") ?: +#endif kobject_add(&c->counters_kobj, &c->kobj, "counters") ?: bch2_opts_create_sysfs_files(&c->opts_dir); if (ret) { @@ -773,7 +775,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write]; c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; - c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal]; c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; bch2_fs_btree_cache_init_early(&c->btree_cache); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 84b142fcc3df..8d580d5a9c4d 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -315,6 +315,57 @@ int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task) return ret; } +#ifndef __KERNEL__ +#include +void bch2_prt_datetime(struct printbuf *out, time64_t sec) +{ + time_t t = sec; + char buf[64]; + ctime_r(&t, buf); + prt_str(out, buf); +} +#else +void bch2_prt_datetime(struct printbuf *out, time64_t sec) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%ptT", &sec); + prt_u64(out, sec); +} +#endif + +static const struct time_unit { + const char *name; + u64 nsecs; +} time_units[] = { + { "ns", 1 }, + { "us", NSEC_PER_USEC }, + { "ms", NSEC_PER_MSEC }, + { "s", NSEC_PER_SEC }, + { "m", (u64) NSEC_PER_SEC * 60}, + { "h", (u64) NSEC_PER_SEC * 3600}, + { "eon", U64_MAX }, +}; + +static const struct time_unit *pick_time_units(u64 ns) +{ + const struct time_unit *u; + + for (u = time_units; + u + 1 < time_units + ARRAY_SIZE(time_units) && + ns >= u[1].nsecs << 1; + u++) + ; + + return u; +} + +void bch2_pr_time_units(struct printbuf *out, u64 ns) +{ + const struct time_unit *u = pick_time_units(ns); + + prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); +} + /* time stats: */ #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT @@ -359,6 +410,7 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); stats->max_duration = max(stats->max_duration, duration); stats->min_duration = min(stats->min_duration, duration); + stats->total_duration += duration; bch2_quantiles_update(&stats->quantiles, duration); } @@ -372,22 +424,26 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, } } -static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, - struct bch2_time_stat_buffer *b) +static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, + struct bch2_time_stat_buffer *b) { - struct bch2_time_stat_buffer_entry *i; - unsigned long flags; - - spin_lock_irqsave(&stats->lock, flags); - for (i = b->entries; + for (struct bch2_time_stat_buffer_entry *i = b->entries; i < b->entries + ARRAY_SIZE(b->entries); i++) bch2_time_stats_update_one(stats, i->start, i->end); - spin_unlock_irqrestore(&stats->lock, flags); - b->nr = 0; } +static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, + struct bch2_time_stat_buffer *b) +{ + unsigned long flags; + + spin_lock_irqsave(&stats->lock, flags); + __bch2_time_stats_clear_buffer(stats, b); + spin_unlock_irqrestore(&stats->lock, flags); +} + void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) { unsigned long flags; @@ -423,40 +479,6 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) preempt_enable(); } } -#endif - -static const struct time_unit { - const char *name; - u64 nsecs; -} time_units[] = { - { "ns", 1 }, - { "us", NSEC_PER_USEC }, - { "ms", NSEC_PER_MSEC }, - { "s", NSEC_PER_SEC }, - { "m", (u64) NSEC_PER_SEC * 60}, - { "h", (u64) NSEC_PER_SEC * 3600}, - { "eon", U64_MAX }, -}; - -static const struct time_unit *pick_time_units(u64 ns) -{ - const struct time_unit *u; - - for (u = time_units; - u + 1 < time_units + ARRAY_SIZE(time_units) && - ns >= u[1].nsecs << 1; - u++) - ; - - return u; -} - -void bch2_pr_time_units(struct printbuf *out, u64 ns) -{ - const struct time_unit *u = pick_time_units(ns); - - prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); -} static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) { @@ -467,26 +489,6 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) prt_printf(out, "%s", u->name); } -#ifndef __KERNEL__ -#include -void bch2_prt_datetime(struct printbuf *out, time64_t sec) -{ - time_t t = sec; - char buf[64]; - ctime_r(&t, buf); - prt_str(out, buf); -} -#else -void bch2_prt_datetime(struct printbuf *out, time64_t sec) -{ - char buf[64]; - snprintf(buf, sizeof(buf), "%ptT", &sec); - prt_u64(out, sec); -} -#endif - -#define TABSTOP_SIZE 12 - static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) { prt_str(out, name); @@ -495,12 +497,24 @@ static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 prt_newline(out); } +#define TABSTOP_SIZE 12 + void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) { const struct time_unit *u; s64 f_mean = 0, d_mean = 0; u64 q, last_q = 0, f_stddev = 0, d_stddev = 0; int i; + + if (stats->buffer) { + int cpu; + + spin_lock_irq(&stats->lock); + for_each_possible_cpu(cpu) + __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu)); + spin_unlock_irq(&stats->lock); + } + /* * avoid divide by zero */ @@ -546,6 +560,7 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats pr_name_and_units(out, "min:", stats->min_duration); pr_name_and_units(out, "max:", stats->max_duration); + pr_name_and_units(out, "total:", stats->total_duration); prt_printf(out, "mean:"); prt_tab(out); @@ -603,6 +618,9 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats last_q = q; } } +#else +void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {} +#endif void bch2_time_stats_exit(struct bch2_time_stats *stats) { diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index b701f7fe0784..7eb567ab4457 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -374,8 +374,9 @@ struct bch2_time_stat_buffer { struct bch2_time_stats { spinlock_t lock; /* all fields are in nanoseconds */ - u64 max_duration; u64 min_duration; + u64 max_duration; + u64 total_duration; u64 max_freq; u64 min_freq; u64 last_event; @@ -390,15 +391,39 @@ struct bch2_time_stats { #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64); -#else -static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {} -#endif static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) { __bch2_time_stats_update(stats, start, local_clock()); } +static inline bool track_event_change(struct bch2_time_stats *stats, + u64 *start, bool v) +{ + if (v != !!*start) { + if (!v) { + bch2_time_stats_update(stats, *start); + *start = 0; + } else { + *start = local_clock() ?: 1; + return true; + } + } + + return false; +} +#else +static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {} +static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) {} +static inline bool track_event_change(struct bch2_time_stats *stats, + u64 *start, bool v) +{ + bool ret = v && !*start; + *start = v; + return ret; +} +#endif + void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *); void bch2_time_stats_exit(struct bch2_time_stats *); From 389c92b36e302d866c2850e70560667ac4a826c6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 7 Nov 2023 11:16:14 -0500 Subject: [PATCH 013/226] bcachefs: Clear k->needs_whitout earlier in commit path The upcoming btree write buffer rework is going to use the journal itself as the first stage of the write buffer; this is a cleanup to make sure k->needs_whiteout is initialized before keys hit the journal. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 3d66d9c595e2..4d0f388f2be1 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -655,6 +655,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, *stopped_at = i; return ret; } + + i->k->k.needs_whiteout = false; } if (trans->nr_wb_updates && @@ -777,8 +779,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, } trans_for_each_update(trans, i) { - i->k->k.needs_whiteout = false; - if (!i->cached) { u64 seq = trans->journal_res.seq; From 9a71de675f97acafdcda7bf4ce4ba10247c1db00 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Nov 2023 22:00:00 -0500 Subject: [PATCH 014/226] bcachefs: BTREE_INSERT_JOURNAL_REPLAY now "don't init trans->journal_res" This slightly changes how trans->journal_res works, in preparation for changing the btree write buffer flush path to use it. Now, BTREE_INSERT_JOURNAL_REPLAY means "don't take a journal reservation; trans->journal_res.seq already refers to the journal sequence number to pin". Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 19 +++++++++++++++---- fs/bcachefs/recovery.c | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 4d0f388f2be1..6c5510c4a2c4 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -676,8 +676,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (unlikely(trans->journal_transaction_names)) journal_transaction_name(trans); - } else { - trans->journal_res.seq = c->journal.replay_journal_seq; } /* @@ -896,7 +894,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags * Drop journal reservation after dropping write locks, since dropping * the journal reservation may kick off a journal write: */ - bch2_journal_res_put(&c->journal, &trans->journal_res); + if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) + bch2_journal_res_put(&c->journal, &trans->journal_res); return ret; } @@ -1139,7 +1138,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) } retry: bch2_trans_verify_not_in_restart(trans); - memset(&trans->journal_res, 0, sizeof(trans->journal_res)); + if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) + memset(&trans->journal_res, 0, sizeof(trans->journal_res)); ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_); @@ -1164,5 +1164,16 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (ret) goto out; + /* + * We might have done another transaction commit in the error path - + * i.e. btree write buffer flush - which will have made use of + * trans->journal_res, but with BTREE_INSERT_JOURNAL_REPLAY that is how + * the journal sequence number to pin is passed in - so we must restart: + */ + if (flags & BTREE_INSERT_JOURNAL_REPLAY) { + ret = -BCH_ERR_transaction_restart_nested; + goto out; + } + goto retry; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 7b625fa37d92..d56c28b4c8d4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -99,6 +99,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans, unsigned update_flags = BTREE_TRIGGER_NORUN; int ret; + trans->journal_res.seq = k->journal_seq; + /* * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to * keep the key cache coherent with the underlying btree. Nothing From 43c7ede0095d7020ca03113b2fde84b00dd5cd49 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Nov 2023 22:04:29 -0500 Subject: [PATCH 015/226] bcachefs: Kill BTREE_UPDATE_PREJOURNAL With the previous patch that reworks BTREE_INSERT_JOURNAL_REPLAY, we can now switch the btree write buffer to use it for flushing. This has the advantage that transaction commits don't need to take a journal reservation at all. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.h | 2 -- fs/bcachefs/btree_trans_commit.c | 7 +------ fs/bcachefs/btree_types.h | 1 - fs/bcachefs/btree_update.c | 23 ----------------------- fs/bcachefs/btree_write_buffer.c | 14 ++++++++++---- 5 files changed, 11 insertions(+), 36 deletions(-) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 3a370b7087ac..912adadfb4dd 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -93,7 +93,6 @@ static inline int bch2_mark_key(struct btree_trans *trans, enum btree_update_flags { __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, __BTREE_UPDATE_NOJOURNAL, - __BTREE_UPDATE_PREJOURNAL, __BTREE_UPDATE_KEY_CACHE_RECLAIM, __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ @@ -108,7 +107,6 @@ enum btree_update_flags { #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) #define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) -#define BTREE_UPDATE_PREJOURNAL (1U << __BTREE_UPDATE_PREJOURNAL) #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) #define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 6c5510c4a2c4..403b7310d21a 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -778,12 +778,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, trans_for_each_update(trans, i) { if (!i->cached) { - u64 seq = trans->journal_res.seq; - - if (i->flags & BTREE_UPDATE_PREJOURNAL) - seq = i->seq; - - bch2_btree_insert_key_leaf(trans, i->path, i->k, seq); + bch2_btree_insert_key_leaf(trans, i->path, i->k, trans->journal_res.seq); } else if (!i->key_cache_already_flushed) bch2_btree_insert_key_cached(trans, flags, i); else { diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 60453ba86c4b..b667da4e8403 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -356,7 +356,6 @@ struct btree_insert_entry { u8 old_btree_u64s; struct bkey_i *k; struct btree_path *path; - u64 seq; /* key being overwritten: */ struct bkey old_k; const struct bch_val *old_v; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 2fd3c8cc6f51..82f85e3a5787 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -381,21 +381,12 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, { struct bch_fs *c = trans->c; struct btree_insert_entry *i, n; - u64 seq = 0; int cmp; EBUG_ON(!path->should_be_locked); EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); EBUG_ON(!bpos_eq(k->k.p, path->pos)); - /* - * The transaction journal res hasn't been allocated at this point. - * That occurs at commit time. Reuse the seq field to pass in the seq - * of a prejournaled key. - */ - if (flags & BTREE_UPDATE_PREJOURNAL) - seq = trans->journal_res.seq; - n = (struct btree_insert_entry) { .flags = flags, .bkey_type = __btree_node_type(path->level, path->btree_id), @@ -404,7 +395,6 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, .cached = path->cached, .path = path, .k = k, - .seq = seq, .ip_allocated = ip, }; @@ -432,7 +422,6 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, i->cached = n.cached; i->k = n.k; i->path = n.path; - i->seq = n.seq; i->ip_allocated = n.ip_allocated; } else { array_insert_item(trans->updates, trans->nr_updates, @@ -543,18 +532,6 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); } -/* - * Add a transaction update for a key that has already been journaled. - */ -int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, - struct btree_iter *iter, struct bkey_i *k, - enum btree_update_flags flags) -{ - trans->journal_res.seq = seq; - return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL| - BTREE_UPDATE_PREJOURNAL); -} - static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 9609eb18f38d..7f3147e064a5 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -78,12 +78,15 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, } return 0; trans_commit: - return bch2_trans_update_seq(trans, wb->journal_seq, iter, &wb->k, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + trans->journal_res.seq = wb->journal_seq; + + return bch2_trans_update(trans, iter, &wb->k, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, NULL, NULL, commit_flags| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_JOURNAL_RECLAIM); } @@ -127,9 +130,11 @@ btree_write_buffered_insert(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k), BTREE_ITER_CACHED|BTREE_ITER_INTENT); + trans->journal_res.seq = wb->journal_seq; + ret = bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update_seq(trans, wb->journal_seq, &iter, &wb->k, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch2_trans_update(trans, &iter, &wb->k, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -262,6 +267,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f ret = commit_do(trans, NULL, NULL, commit_flags| BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_JOURNAL_RECLAIM, btree_write_buffered_insert(trans, i)); if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret))) From bdde9829de1ef3c98edd872c6f181de9fe610cf6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Nov 2023 20:41:58 -0500 Subject: [PATCH 016/226] bcachefs: Go rw before journal replay This gets us slightly nicer log messages. Also, this slightly clarifies synchronization of c->journal_keys; after we go RW it's in use by multiple threads (so that the btree iterator code can overlay keys from the journal); so it has to be prepped before that point. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d56c28b4c8d4..92b70e7977bd 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -149,9 +149,6 @@ static int bch2_journal_replay(struct bch_fs *c) size_t i; int ret = 0; - move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); - keys->gap = keys->nr; - keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL); if (!keys_sorted) return -BCH_ERR_ENOMEM_journal_replay; @@ -180,7 +177,6 @@ static int bch2_journal_replay(struct bch_fs *c) replay_now_at(j, k->journal_seq); ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL| (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim @@ -497,7 +493,19 @@ static int bch2_check_allocations(struct bch_fs *c) static int bch2_set_may_go_rw(struct bch_fs *c) { + struct journal_keys *keys = &c->journal_keys; + + /* + * After we go RW, the journal keys buffer can't be modified (except for + * setting journal_key->overwritten: it will be accessed by multiple + * threads + */ + move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); + keys->gap = keys->nr; + set_bit(BCH_FS_MAY_GO_RW, &c->flags); + if (keys->nr) + return bch2_fs_read_write_early(c); return 0; } From 573224301c56ae6c169b77cc003be7690da70b9b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Nov 2023 21:02:58 -0500 Subject: [PATCH 017/226] bcachefs: Make journal replay more efficient Journal replay now first attempts to replay keys in sorted order, similar to how the btree write buffer flush path works. Any keys that can not be replayed due to journal deadlock are then left for later and replayed in journal order, unpinning journal entries as we go. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 - fs/bcachefs/recovery.c | 92 +++++++++++++++++++++++++++++------------- 2 files changed, 63 insertions(+), 30 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 9ce29681eec9..d49a4994666d 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -73,7 +73,6 @@ x(ENOMEM, ENOMEM_fsck_add_nlink) \ x(ENOMEM, ENOMEM_journal_key_insert) \ x(ENOMEM, ENOMEM_journal_keys_sort) \ - x(ENOMEM, ENOMEM_journal_replay) \ x(ENOMEM, ENOMEM_read_superblock_clean) \ x(ENOMEM, ENOMEM_fs_alloc) \ x(ENOMEM, ENOMEM_fs_name_alloc) \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 92b70e7977bd..88dbf11ba25d 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -99,6 +99,9 @@ static int bch2_journal_replay_key(struct btree_trans *trans, unsigned update_flags = BTREE_TRIGGER_NORUN; int ret; + if (k->overwritten) + return 0; + trans->journal_res.seq = k->journal_seq; /* @@ -142,24 +145,14 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) static int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; - struct journal_key **keys_sorted, *k; + DARRAY(struct journal_key *) keys_sorted = { 0 }; + struct journal_key **kp; struct journal *j = &c->journal; u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; - size_t i; + struct btree_trans *trans = bch2_trans_get(c); int ret = 0; - keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL); - if (!keys_sorted) - return -BCH_ERR_ENOMEM_journal_replay; - - for (i = 0; i < keys->nr; i++) - keys_sorted[i] = &keys->d[i]; - - sort(keys_sorted, keys->nr, - sizeof(keys_sorted[0]), - journal_sort_seq_cmp, NULL); - if (keys->nr) { ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", keys->nr, start_seq, end_seq); @@ -169,26 +162,67 @@ static int bch2_journal_replay(struct bch_fs *c) BUG_ON(!atomic_read(&keys->ref)); - for (i = 0; i < keys->nr; i++) { - k = keys_sorted[i]; - + /* + * First, attempt to replay keys in sorted order. This is more + * efficient - better locality of btree access - but some might fail if + * that would cause a journal deadlock. + */ + for (size_t i = 0; i < keys->nr; i++) { cond_resched(); + struct journal_key *k = keys->d + i; + + /* Skip fastpath if we're low on space in the journal */ + ret = c->journal.watermark ? -1 : + commit_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_RECLAIM| + (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0), + bch2_journal_replay_key(trans, k)); + BUG_ON(!ret && !k->overwritten); + if (ret) { + ret = darray_push(&keys_sorted, k); + if (ret) + goto err; + } + } + + /* + * Now, replay any remaining keys in the order in which they appear in + * the journal, unpinning those journal entries as we go: + */ + sort(keys_sorted.data, keys_sorted.nr, + sizeof(keys_sorted.data[0]), + journal_sort_seq_cmp, NULL); + + darray_for_each(keys_sorted, kp) { + cond_resched(); + + struct journal_key *k = *kp; + replay_now_at(j, k->journal_seq); - ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_NOFAIL| - (!k->allocated - ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim - : 0), + ret = commit_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + (!k->allocated + ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim + : 0), bch2_journal_replay_key(trans, k)); - if (ret) { - bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", - bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret)); + bch_err_msg(c, ret, "while replaying key at btree %s level %u:", + bch2_btree_id_str(k->btree_id), k->level); + if (ret) goto err; - } + + BUG_ON(!k->overwritten); } + /* + * We need to put our btree_trans before calling flush_all_pins(), since + * that will use a btree_trans internally + */ + bch2_trans_put(trans); + trans = NULL; + if (!c->opts.keep_journal) bch2_journal_keys_put_initial(c); @@ -202,10 +236,10 @@ static int bch2_journal_replay(struct bch_fs *c) if (keys->nr && !ret) bch2_journal_log_msg(c, "journal replay finished"); err: - kvfree(keys_sorted); - - if (ret) - bch_err_fn(c, ret); + if (trans) + bch2_trans_put(trans); + darray_exit(&keys_sorted); + bch_err_fn(c, ret); return ret; } From e17b93eb36729c7f889264ebe7e46817a8253560 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 7 Nov 2023 10:42:53 -0500 Subject: [PATCH 018/226] bcachefs: Avoiding dropping/retaking write locks in bch2_btree_write_buffer_flush_one() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 7f3147e064a5..f40ac365620f 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -48,6 +48,13 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, if (ret) return ret; + /* + * We can't clone a path that has write locks: unshare it now, before + * set_pos and traverse(): + */ + if (iter->path->ref > 1) + iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_); + path = iter->path; if (!*write_locked) { @@ -67,15 +74,6 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); (*fast)++; - - if (path->ref > 1) { - /* - * We can't clone a path that has write locks: if the path is - * shared, unlock before set_pos(), traverse(): - */ - bch2_btree_node_unlock_write(trans, path, path->l[0].b); - *write_locked = false; - } return 0; trans_commit: trans->journal_res.seq = wb->journal_seq; From cd5bd1628284f874ad6180be1f67e9b6739cc02a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Nov 2023 16:02:15 -0500 Subject: [PATCH 019/226] bcachefs: Fix redundant variable initialization path->level was being read, but never used. Reported-by: Colin Ian King Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index da594e006769..bf4d2ade4535 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1214,8 +1214,6 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos new_pos, bool intent, unsigned long ip, int cmp) { - unsigned level = path->level; - bch2_trans_verify_not_in_restart(trans); EBUG_ON(!path->ref); @@ -1231,7 +1229,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, goto out; } - level = btree_path_up_until_good_node(trans, path, cmp); + unsigned level = btree_path_up_until_good_node(trans, path, cmp); if (btree_path_node(path, level)) { struct btree_path_level *l = &path->l[level]; From aa62aabbc7ab758138df4ffed59fdf8324839d4a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Nov 2023 16:20:58 -0500 Subject: [PATCH 020/226] bcachefs: Kill dead BTREE_INSERT flags BTREE_INSERT_NOWAIT and BTREE_INSERT_GC_LOCK_HELD are no longer used, and can be deleted. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 4 ---- fs/bcachefs/btree_update.h | 6 ------ fs/bcachefs/btree_update_interior.c | 14 ++++---------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 403b7310d21a..117ce5352242 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -982,7 +982,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) && - !(flags & BTREE_INSERT_NOWAIT) && (flags & BTREE_INSERT_NOFAIL), c, "%s: incorrectly got %s\n", __func__, bch2_err_str(ret)); @@ -1040,9 +1039,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) !trans->extra_journal_entries.nr) goto out_reset; - if (flags & BTREE_INSERT_GC_LOCK_HELD) - lockdep_assert_held(&c->gc_lock); - ret = bch2_trans_commit_run_triggers(trans); if (ret) goto out_reset; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 9816d2286540..a67105c1e5f7 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -28,8 +28,6 @@ enum btree_insert_flags { __BTREE_INSERT_LAZY_RW, __BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_RECLAIM, - __BTREE_INSERT_NOWAIT, - __BTREE_INSERT_GC_LOCK_HELD, __BCH_HASH_SET_MUST_CREATE, __BCH_HASH_SET_MUST_REPLACE, }; @@ -46,10 +44,6 @@ enum btree_insert_flags { /* Insert is being called from journal reclaim path: */ #define BTREE_INSERT_JOURNAL_RECLAIM BIT(__BTREE_INSERT_JOURNAL_RECLAIM) -/* Don't block on allocation failure (for new btree nodes: */ -#define BTREE_INSERT_NOWAIT BIT(__BTREE_INSERT_NOWAIT) -#define BTREE_INSERT_GC_LOCK_HELD BIT(__BTREE_INSERT_GC_LOCK_HELD) - #define BCH_HASH_SET_MUST_CREATE BIT(__BCH_HASH_SET_MUST_CREATE) #define BCH_HASH_SET_MUST_REPLACE BIT(__BCH_HASH_SET_MUST_REPLACE) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6482c07ae479..ae09cc00c19a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -476,9 +476,6 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, /* * Protects reaping from the btree node cache and using the btree node * open bucket reserve: - * - * BTREE_INSERT_NOWAIT only applies to btree node allocation, not - * blocking on this lock: */ ret = bch2_btree_cache_cannibalize_lock(c, cl); if (ret) @@ -488,9 +485,8 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, struct prealloc_nodes *p = as->prealloc_nodes + interior; while (p->nr < nr_nodes[interior]) { - b = __bch2_btree_node_alloc(trans, &as->disk_res, - flags & BTREE_INSERT_NOWAIT ? NULL : cl, - interior, flags); + b = __bch2_btree_node_alloc(trans, &as->disk_res, cl, + interior, flags); if (IS_ERR(b)) { ret = PTR_ERR(b); goto err; @@ -1109,9 +1105,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); } - if (flags & BTREE_INSERT_GC_LOCK_HELD) - lockdep_assert_held(&c->gc_lock); - else if (!down_read_trylock(&c->gc_lock)) { + if (!down_read_trylock(&c->gc_lock)) { ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0)); if (ret) { up_read(&c->gc_lock); @@ -1125,7 +1119,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, as->c = c; as->start_time = start_time; as->mode = BTREE_INTERIOR_NO_UPDATE; - as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); + as->took_gc_lock = true; as->btree_id = path->btree_id; as->update_level = update_level; INIT_LIST_HEAD(&as->list); From 5927310dcfc9360ca7f8366b468ad52681bf5a27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Nov 2023 16:31:50 -0500 Subject: [PATCH 021/226] bcachefs: bch_str_hash_flags_t Create a separate enum for str_hash flags - instead of abusing the btree_insert_flags enum - and create a __bitwise typedef for sparse typechecking. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.h | 5 ----- fs/bcachefs/dirent.c | 5 +++-- fs/bcachefs/dirent.h | 3 ++- fs/bcachefs/str_hash.h | 25 ++++++++++++++++++------- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index a67105c1e5f7..49a2d832b460 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -28,8 +28,6 @@ enum btree_insert_flags { __BTREE_INSERT_LAZY_RW, __BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_RECLAIM, - __BCH_HASH_SET_MUST_CREATE, - __BCH_HASH_SET_MUST_REPLACE, }; /* Don't check for -ENOSPC: */ @@ -44,9 +42,6 @@ enum btree_insert_flags { /* Insert is being called from journal reclaim path: */ #define BTREE_INSERT_JOURNAL_RECLAIM BIT(__BTREE_INSERT_JOURNAL_RECLAIM) -#define BCH_HASH_SET_MUST_CREATE BIT(__BCH_HASH_SET_MUST_CREATE) -#define BCH_HASH_SET_MUST_REPLACE BIT(__BCH_HASH_SET_MUST_REPLACE) - int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, unsigned, unsigned); int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 2bfff0da7000..580c1c95361e 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -201,7 +201,8 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, const struct bch_hash_info *hash_info, u8 type, const struct qstr *name, u64 dst_inum, - u64 *dir_offset, int flags) + u64 *dir_offset, + bch_str_hash_flags_t str_hash_flags) { struct bkey_i_dirent *dirent; int ret; @@ -212,7 +213,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, return ret; ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, - dir, &dirent->k_i, flags); + dir, &dirent->k_i, str_hash_flags); *dir_offset = dirent->k.p.offset; return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 1e3431990abd..10dc3ad7e80f 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -37,7 +37,8 @@ int bch2_dirent_read_target(struct btree_trans *, subvol_inum, int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, - const struct qstr *, u64, u64 *, int); + const struct qstr *, u64, u64 *, + bch_str_hash_flags_t); static inline unsigned vfs_d_type(unsigned type) { diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index ae21a8cca1b4..89fdb7c21134 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -15,6 +15,16 @@ #include #include +typedef unsigned __bitwise bch_str_hash_flags_t; + +enum bch_str_hash_flags { + __BCH_HASH_SET_MUST_CREATE, + __BCH_HASH_SET_MUST_REPLACE, +}; + +#define BCH_HASH_SET_MUST_CREATE (__force bch_str_hash_flags_t) BIT(__BCH_HASH_SET_MUST_CREATE) +#define BCH_HASH_SET_MUST_REPLACE (__force bch_str_hash_flags_t) BIT(__BCH_HASH_SET_MUST_REPLACE) + static inline enum bch_str_hash_type bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) { @@ -246,7 +256,7 @@ int bch2_hash_set_snapshot(struct btree_trans *trans, const struct bch_hash_info *info, subvol_inum inum, u32 snapshot, struct bkey_i *insert, - int flags, + bch_str_hash_flags_t str_hash_flags, int update_flags) { struct btree_iter iter, slot = { NULL }; @@ -269,7 +279,7 @@ int bch2_hash_set_snapshot(struct btree_trans *trans, } if (!slot.path && - !(flags & BCH_HASH_SET_MUST_REPLACE)) + !(str_hash_flags & BCH_HASH_SET_MUST_REPLACE)) bch2_trans_copy_iter(&slot, &iter); if (k.k->type != KEY_TYPE_hash_whiteout) @@ -287,16 +297,16 @@ int bch2_hash_set_snapshot(struct btree_trans *trans, found = true; not_found: - if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) { + if (!found && (str_hash_flags & BCH_HASH_SET_MUST_REPLACE)) { ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; - } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { + } else if (found && (str_hash_flags & BCH_HASH_SET_MUST_CREATE)) { ret = -EEXIST; } else { if (!found && slot.path) swap(iter, slot); insert->k.p = iter.pos; - ret = bch2_trans_update(trans, &iter, insert, 0); + ret = bch2_trans_update(trans, &iter, insert, update_flags); } goto out; @@ -307,7 +317,8 @@ int bch2_hash_set(struct btree_trans *trans, const struct bch_hash_desc desc, const struct bch_hash_info *info, subvol_inum inum, - struct bkey_i *insert, int flags) + struct bkey_i *insert, + bch_str_hash_flags_t str_hash_flags) { u32 snapshot; int ret; @@ -319,7 +330,7 @@ int bch2_hash_set(struct btree_trans *trans, insert->k.p.inode = inum.inum; return bch2_hash_set_snapshot(trans, desc, info, inum, - snapshot, insert, flags, 0); + snapshot, insert, str_hash_flags, 0); } static __always_inline From cb52d23e77a6e5e9588962ba7e283b0d0d2c1976 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Nov 2023 16:31:50 -0500 Subject: [PATCH 022/226] bcachefs: Rename BTREE_INSERT flags BTREE_INSERT flags are actually transaction commit flags - rename them for clarity. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 30 +++++++++++----------- fs/bcachefs/backpointers.c | 12 ++++----- fs/bcachefs/btree_gc.c | 10 ++++---- fs/bcachefs/btree_io.c | 6 ++--- fs/bcachefs/btree_key_cache.c | 6 ++--- fs/bcachefs/btree_trans_commit.c | 39 ++++++++++++++-------------- fs/bcachefs/btree_update.c | 4 +-- fs/bcachefs/btree_update.h | 36 ++++++++++++++------------ fs/bcachefs/btree_update_interior.c | 16 ++++++------ fs/bcachefs/btree_write_buffer.c | 18 ++++++------- fs/bcachefs/data_update.c | 6 ++--- fs/bcachefs/ec.c | 10 ++++---- fs/bcachefs/fs.c | 6 ++--- fs/bcachefs/fsck.c | 40 ++++++++++++++--------------- fs/bcachefs/inode.c | 10 ++++---- fs/bcachefs/io_misc.c | 12 ++++----- fs/bcachefs/io_read.c | 2 +- fs/bcachefs/io_write.c | 6 ++--- fs/bcachefs/logged_ops.c | 4 +-- fs/bcachefs/lru.c | 2 +- fs/bcachefs/migrate.c | 2 +- fs/bcachefs/rebalance.c | 6 ++--- fs/bcachefs/recovery.c | 12 ++++----- fs/bcachefs/reflink.c | 2 +- fs/bcachefs/snapshot.c | 10 ++++---- fs/bcachefs/subvolume.c | 6 ++--- 26 files changed, 158 insertions(+), 155 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 6002bd0cf89c..3326b7a90204 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -561,8 +561,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); if (ret) break; @@ -581,8 +581,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (have_bucket_gens_key && !ret) ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); bch2_trans_put(trans); @@ -1267,7 +1267,7 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran ret = bch2_btree_delete_extent_at(trans, iter, iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW); + BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw); goto out; } @@ -1404,8 +1404,8 @@ int bch2_check_alloc_info(struct bch_fs *c) } ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw); if (ret) goto bkey_err; @@ -1463,7 +1463,7 @@ int bch2_check_alloc_info(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, bch2_check_bucket_gens_key(trans, &iter, k)); err: bch2_trans_put(trans); @@ -1556,7 +1556,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, bch2_check_alloc_to_lru_ref(trans, &iter))); if (ret) bch_err_fn(c, ret); @@ -1665,7 +1665,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); if (ret) goto out; @@ -1770,7 +1770,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, BTREE_TRIGGER_BUCKET_INVALIDATE) ?: bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); if (ret) goto out; @@ -1894,8 +1894,8 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, ret = bch2_bucket_do_index(trans, k, a, true) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; @@ -1915,8 +1915,8 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 23c0834a97a4..5025a71ad685 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -382,7 +382,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, bch2_check_btree_backpointer(trans, &iter, k))); if (ret) bch_err_fn(c, ret); @@ -629,8 +629,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, do { ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc, check_extent_to_backpointers(trans, &iter, bucket_start, bucket_end, &last_flushed)); @@ -644,8 +644,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, break; ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc, check_btree_root_to_backpointers(trans, btree_id, bucket_start, bucket_end, &last_flushed)); @@ -807,7 +807,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_one_backpointer(trans, start, end, bkey_s_c_to_backpointer(k), &last_flushed_pos)); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 30ab78a24517..f53afa2acd8c 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1499,7 +1499,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, ca->mi.first_bucket), BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, bch2_alloc_write_key(trans, &iter, k, metadata_only)); if (ret < 0) { @@ -1657,7 +1657,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_gc_write_reflink_key(trans, &iter, k, &idx)); c->reflink_gc_nr = 0; @@ -1781,7 +1781,7 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_gc_write_stripes_key(trans, &iter, k)); bch2_trans_put(trans); @@ -2017,7 +2017,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_no_enospc, gc_btree_gens_key(trans, &iter, k)); if (ret && !bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); @@ -2030,7 +2030,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_ITER_PREFETCH, k, NULL, NULL, - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_no_enospc, bch2_alloc_write_oldest_gen(trans, &iter, k)); if (ret && !bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5a720f0cd5a6..cf19621304f7 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1800,9 +1800,9 @@ static void btree_node_write_work(struct work_struct *work) ret = bch2_trans_do(c, NULL, NULL, 0, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_reclaim| - BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_NOCHECK_RW, + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw, !wbio->wbio.failed.nr)); if (ret) goto err; diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 1b7a5668df7c..4599e999c231 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -656,8 +656,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc| (ck->journal.seq == journal_last_seq(j) ? BCH_WATERMARK_reclaim : 0)| @@ -734,7 +734,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, ret = commit_do(trans, NULL, NULL, 0, btree_key_cache_flush_pos(trans, key, seq, - BTREE_INSERT_JOURNAL_RECLAIM, false)); + BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 117ce5352242..4bdb847dd688 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -408,7 +408,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bch2_btree_key_cache_must_wait(c) && - !(flags & BTREE_INSERT_JOURNAL_RECLAIM)) + !(flags & BCH_TRANS_COMMIT_journal_reclaim)) return -BCH_ERR_btree_insert_need_journal_reclaim; /* @@ -667,7 +667,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, * Don't get journal reservation until after we know insert will * succeed: */ - if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { ret = bch2_trans_journal_res_get(trans, (flags & BCH_WATERMARK_MASK)| JOURNAL_RES_GET_NONBLOCK); @@ -684,7 +684,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && - !(flags & BTREE_INSERT_JOURNAL_REPLAY)) { + !(flags & BCH_TRANS_COMMIT_no_journal_res)) { if (bch2_journal_seq_verify) trans_for_each_update(trans, i) i->k->k.version.lo = trans->journal_res.seq; @@ -698,7 +698,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, return -BCH_ERR_btree_insert_need_mark_replicas; if (trans->nr_wb_updates) { - EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY); + EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res); ret = bch2_btree_insert_keys_write_buffer(trans); if (ret) @@ -735,7 +735,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, trans->journal_res.u64s -= trans->extra_journal_entries.nr; } - if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { struct journal *j = &c->journal; struct jset_entry *entry; @@ -889,7 +889,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags * Drop journal reservation after dropping write locks, since dropping * the journal reservation may kick off a journal write: */ - if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) bch2_journal_res_put(&c->journal, &trans->journal_res); return ret; @@ -927,7 +927,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK * flag */ - if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) && + if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; break; @@ -962,7 +962,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, if (wb->state.nr > wb->size * 3 / 4) { bch2_trans_begin(trans); ret = __bch2_btree_write_buffer_flush(trans, - flags|BTREE_INSERT_NOCHECK_RW, true); + flags|BCH_TRANS_COMMIT_no_check_rw, true); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); @@ -982,7 +982,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) && - (flags & BTREE_INSERT_NOFAIL), c, + (flags & BCH_TRANS_COMMIT_no_enospc), c, "%s: incorrectly got %s\n", __func__, bch2_err_str(ret)); return ret; @@ -994,7 +994,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) struct bch_fs *c = trans->c; int ret; - if (likely(!(flags & BTREE_INSERT_LAZY_RW)) || + if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) || test_bit(BCH_FS_STARTED, &c->flags)) return -BCH_ERR_erofs_trans_commit; @@ -1047,7 +1047,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) struct printbuf buf = PRINTBUF; enum bkey_invalid_flags invalid_flags = 0; - if (!(flags & BTREE_INSERT_JOURNAL_REPLAY)) + if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT; if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), @@ -1065,7 +1065,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; } - if (!(flags & BTREE_INSERT_NOCHECK_RW) && + if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { ret = bch2_trans_commit_get_rw_cold(trans, flags); if (ret) @@ -1078,7 +1078,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) bch2_trans_unlock(trans); ret = __bch2_btree_write_buffer_flush(trans, - flags|BTREE_INSERT_NOCHECK_RW, true); + flags|BCH_TRANS_COMMIT_no_check_rw, true); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); @@ -1122,14 +1122,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (trans->extra_journal_res) { ret = bch2_disk_reservation_add(c, trans->disk_res, trans->extra_journal_res, - (flags & BTREE_INSERT_NOFAIL) + (flags & BCH_TRANS_COMMIT_no_enospc) ? BCH_DISK_RESERVATION_NOFAIL : 0); if (ret) goto err; } retry: bch2_trans_verify_not_in_restart(trans); - if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) + if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) memset(&trans->journal_res, 0, sizeof(trans->journal_res)); ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_); @@ -1142,7 +1142,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) trace_and_count(c, transaction_commit, trans, _RET_IP_); out: - if (likely(!(flags & BTREE_INSERT_NOCHECK_RW))) + if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw))) bch2_write_ref_put(c, BCH_WRITE_REF_trans); out_reset: if (!ret) @@ -1158,10 +1158,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) /* * We might have done another transaction commit in the error path - * i.e. btree write buffer flush - which will have made use of - * trans->journal_res, but with BTREE_INSERT_JOURNAL_REPLAY that is how - * the journal sequence number to pin is passed in - so we must restart: + * trans->journal_res, but with BCH_TRANS_COMMIT_no_journal_res that is + * how the journal sequence number to pin is passed in - so we must + * restart: */ - if (flags & BTREE_INSERT_JOURNAL_REPLAY) { + if (flags & BCH_TRANS_COMMIT_no_journal_res) { ret = -BCH_ERR_transaction_restart_nested; goto out; } diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 82f85e3a5787..0d6830ec73c4 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -786,7 +786,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: bch2_trans_commit(trans, &disk_res, journal_seq, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(trans->c, &disk_res); err: /* @@ -891,7 +891,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); } else { ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_LAZY_RW|commit_flags, + BCH_TRANS_COMMIT_lazy_rw|commit_flags, __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); } diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 49a2d832b460..14a2315aa88e 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -21,26 +21,28 @@ void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *, struct bkey_i *, u64); -enum btree_insert_flags { +#define BCH_TRANS_COMMIT_FLAGS() \ + x(no_enospc, "don't check for enospc") \ + x(no_check_rw, "don't attempt to take a ref on c->writes") \ + x(lazy_rw, "go read-write if we haven't yet - only for use in recovery") \ + x(no_journal_res, "don't take a journal reservation, instead " \ + "pin journal entry referred to by trans->journal_res.seq") \ + x(journal_reclaim, "operation required for journal reclaim; may return error" \ + "instead of deadlocking if BCH_WATERMARK_reclaim not specified")\ + +enum __bch_trans_commit_flags { /* First bits for bch_watermark: */ - __BTREE_INSERT_NOFAIL = BCH_WATERMARK_BITS, - __BTREE_INSERT_NOCHECK_RW, - __BTREE_INSERT_LAZY_RW, - __BTREE_INSERT_JOURNAL_REPLAY, - __BTREE_INSERT_JOURNAL_RECLAIM, + __BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS, +#define x(n, ...) __BCH_TRANS_COMMIT_##n, + BCH_TRANS_COMMIT_FLAGS() +#undef x }; -/* Don't check for -ENOSPC: */ -#define BTREE_INSERT_NOFAIL BIT(__BTREE_INSERT_NOFAIL) - -#define BTREE_INSERT_NOCHECK_RW BIT(__BTREE_INSERT_NOCHECK_RW) -#define BTREE_INSERT_LAZY_RW BIT(__BTREE_INSERT_LAZY_RW) - -/* Insert is for journal replay - don't get journal reservations: */ -#define BTREE_INSERT_JOURNAL_REPLAY BIT(__BTREE_INSERT_JOURNAL_REPLAY) - -/* Insert is being called from journal reclaim path: */ -#define BTREE_INSERT_JOURNAL_RECLAIM BIT(__BTREE_INSERT_JOURNAL_RECLAIM) +enum bch_trans_commit_flags { +#define x(n, ...) BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n), + BCH_TRANS_COMMIT_FLAGS() +#undef x +}; int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, unsigned, unsigned); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index ae09cc00c19a..cafa26723b54 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -641,9 +641,9 @@ static void btree_update_nodes_written(struct btree_update *as) */ ret = commit_do(trans, &as->disk_res, &journal_seq, BCH_WATERMARK_reclaim| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_JOURNAL_RECLAIM, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_journal_reclaim, btree_update_nodes_written_trans(trans, as)); bch2_trans_unlock(trans); @@ -1050,7 +1050,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, struct bch_fs *c = trans->c; struct btree_update *as; u64 start_time = local_clock(); - int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) + int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc) ? BCH_DISK_RESERVATION_NOFAIL : 0; unsigned nr_nodes[2] = { 0, 0 }; unsigned update_level = level; @@ -1068,7 +1068,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; - if (!(flags & BTREE_INSERT_JOURNAL_RECLAIM) && + if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) && watermark < c->journal.watermark) { struct journal_res res = { 0 }; @@ -1162,7 +1162,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, * flag */ if (bch2_err_matches(ret, ENOSPC) && - (flags & BTREE_INSERT_JOURNAL_RECLAIM) && + (flags & BCH_TRANS_COMMIT_journal_reclaim) && watermark != BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; goto err; @@ -1862,7 +1862,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, parent = btree_node_parent(path, b); as = bch2_btree_update_start(trans, path, level, false, - BTREE_INSERT_NOFAIL|flags); + BCH_TRANS_COMMIT_no_enospc|flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto err; @@ -1948,7 +1948,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_update *as; int ret; - flags |= BTREE_INSERT_NOFAIL; + flags |= BCH_TRANS_COMMIT_no_enospc; parent = btree_node_parent(iter->path, b); as = bch2_btree_update_start(trans, iter->path, b->c.level, diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index f40ac365620f..a6bf6ed37ced 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -82,10 +82,10 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, NULL, NULL, commit_flags| - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_REPLAY| - BTREE_INSERT_JOURNAL_RECLAIM); + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_journal_res| + BCH_TRANS_COMMIT_journal_reclaim); } static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb) @@ -175,7 +175,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f * However, since we're not flushing in the order they appear in the * journal we won't be able to drop our journal pin until everything is * flushed - which means this could deadlock the journal if we weren't - * passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail + * passing BCH_TRANS_COMMIT_journal_reclaim. This causes the update to fail * if it would block taking a journal reservation. * * If that happens, simply skip the key so we can optimistically insert @@ -264,9 +264,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f ret = commit_do(trans, NULL, NULL, commit_flags| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_REPLAY| - BTREE_INSERT_JOURNAL_RECLAIM, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_journal_res| + BCH_TRANS_COMMIT_journal_reclaim, btree_write_buffered_insert(trans, i)); if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret))) break; @@ -296,7 +296,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, mutex_lock(&wb->flush_lock); return bch2_trans_run(c, - __bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true)); + __bch2_btree_write_buffer_flush(trans, BCH_TRANS_COMMIT_no_check_rw, true)); } static inline u64 btree_write_buffer_ref(int idx) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 37d6ecae8c30..8ed3a9bde029 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -278,8 +278,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, NULL, - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc| m->data_opts.btree_insert_flags); if (!ret) { bch2_btree_iter_set_pos(&iter, next_pos); @@ -476,7 +476,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, return bch2_trans_relock(trans) ?: bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } int bch2_data_update_init(struct btree_trans *trans, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 2a77de18c004..c730f0933d29 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -803,7 +803,7 @@ static void ec_stripe_delete_work(struct work_struct *work) if (!idx) break; - ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ec_stripe_delete(trans, idx)); if (ret) { bch_err_fn(c, ret); @@ -983,8 +983,8 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b while (1) { ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc, ec_stripe_update_extent(trans, bucket_pos, bucket.gen, s, &bp_pos)); if (ret) @@ -1121,8 +1121,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) } ret = bch2_trans_do(c, &s->res, NULL, - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc, ec_stripe_key_update(trans, bkey_i_to_stripe(&s->new_stripe.key), !s->have_existing_stripe)); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 49da8db1d9e9..50976ca0a5f0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -93,7 +93,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, BTREE_ITER_INTENT) ?: (set ? set(trans, inode, &inode_u, p) : 0) ?: bch2_inode_write(trans, &iter, &inode_u) ?: - bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); /* * the btree node lock protects inode->ei_inode, not ei_update_lock; @@ -455,7 +455,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_no_enospc, bch2_unlink_trans(trans, inode_inum(dir), &dir_u, &inode_u, &dentry->d_name, @@ -729,7 +729,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_trans_iter_exit(trans, &inode_iter); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d95940075757..654309a1a5d5 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -208,8 +208,8 @@ static int fsck_write_inode(struct btree_trans *trans, u32 snapshot) { int ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, __write_inode(trans, inode, snapshot)); if (ret) bch_err_fn(trans->c, ret); @@ -354,8 +354,8 @@ static int reattach_inode(struct btree_trans *trans, u32 inode_snapshot) { int ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc, __reattach_inode(trans, inode, inode_snapshot)); bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum); return ret; @@ -757,8 +757,8 @@ static int hash_redo_key(struct btree_trans *trans, BCH_HASH_SET_MUST_CREATE, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw); } static int hash_check_key(struct btree_trans *trans, @@ -1015,7 +1015,7 @@ int bch2_check_inodes(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_inode(trans, &iter, k, &prev, &s, full)); snapshots_seen_exit(&s); @@ -1230,7 +1230,7 @@ static int overlapping_extents_found(struct btree_trans *trans, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, k1, k2) ?: bch2_trans_commit(trans, &res, NULL, - BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(c, &res); if (ret) @@ -1469,7 +1469,7 @@ int bch2_check_extents(struct bch_fs *c) POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, &res, NULL, - BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ + BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: check_extent_overbig(trans, &iter, k); @@ -1498,7 +1498,7 @@ int bch2_check_indirect_extents(struct bch_fs *c) POS_MIN, BTREE_ITER_PREFETCH, k, &res, NULL, - BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ + BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent_overbig(trans, &iter, k); })); @@ -1871,7 +1871,7 @@ int bch2_check_dirents(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)); bch2_trans_put(trans); @@ -1935,7 +1935,7 @@ int bch2_check_xattrs(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); bch_err_fn(c, ret); return ret; @@ -1966,8 +1966,8 @@ static int check_root_trans(struct btree_trans *trans) root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.inode = cpu_to_le64(inum); ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0)); bch_err_msg(c, ret, "writing root subvol"); @@ -2003,8 +2003,8 @@ int bch2_check_root(struct bch_fs *c) int ret; ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, check_root_trans(trans)); bch_err_fn(c, ret); return ret; @@ -2133,8 +2133,8 @@ static int check_path(struct btree_trans *trans, return 0; ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, remove_backpointer(trans, inode)); if (ret) { bch_err(c, "error removing dirent: %i", ret); @@ -2415,7 +2415,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS(0, range_start), BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); if (ret < 0) { bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); @@ -2500,7 +2500,7 @@ int bch2_fix_reflink_p(struct bch_fs *c) BTREE_ID_extents, POS_MIN, BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, fix_reflink_p_key(trans, &iter, k))); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 9309cfeecd8d..4001b56c14d0 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -831,7 +831,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, ret = bch2_trans_update(trans, &iter, &delete, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); err: if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; @@ -894,7 +894,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); err: bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -1058,7 +1058,7 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); err: bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -1175,8 +1175,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass)); if (ret < 0) break; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index bebc11444ef5..eab0c8c57785 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -256,7 +256,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, u64 new_i_size = le64_to_cpu(op->v.new_i_size); int ret; - ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, truncate_set_isize(trans, inum, new_i_size)); if (ret) goto err; @@ -378,7 +378,7 @@ case LOGGED_OP_FINSERT_start: op->v.state = LOGGED_OP_FINSERT_shift_extents; if (insert) { - ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, len) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) @@ -390,7 +390,7 @@ case LOGGED_OP_FINSERT_start: if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; - ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_logged_op_update(trans, &op->k_i)); } @@ -455,7 +455,7 @@ case LOGGED_OP_FINSERT_shift_extents: bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: - bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL); + bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_disk_reservation_put(c, &disk_res); @@ -470,12 +470,12 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.state = LOGGED_OP_FINSERT_finish; if (!insert) { - ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, shift) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ - ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, 0, 0) ?: bch2_logged_op_update(trans, &op->k_i)); } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 36763865facd..3281c4dd1d52 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -526,7 +526,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_trans_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, __bch2_rbio_narrow_crcs(trans, rbio)); } diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 8c8cb1541ac9..1dad2a351b71 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -316,8 +316,8 @@ int bch2_extent_update(struct btree_trans *trans, i_sectors_delta) ?: bch2_trans_update(trans, iter, k, 0) ?: bch2_trans_commit(trans, disk_res, NULL, - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc); if (unlikely(ret)) return ret; @@ -1176,7 +1176,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, bkey_start_pos(&orig->k), orig->k.p, BTREE_ITER_INTENT, k, - NULL, NULL, BTREE_INSERT_NOFAIL, ({ + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); })); diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 8640f7dee0de..9a76a9aab5c3 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -85,13 +85,13 @@ static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) { - return commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, __bch2_logged_op_start(trans, k)); } void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) { - int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0)); /* * This needs to be a fatal error because we've left an unfinished diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index a5cc0ed195d6..e6d081c0592c 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -155,7 +155,7 @@ int bch2_check_lrus(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index e3a51f6d6c9b..8e5688d0a8ca 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -90,7 +90,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags)); if (ret) break; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 3319190b8d9c..db2139c0545d 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -69,7 +69,7 @@ static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) { - int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, __bch2_set_rebalance_needs_scan(trans, inum)); rebalance_wakeup(c); return ret; @@ -125,7 +125,7 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, extent_entry_drop(bkey_i_to_s(n), (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); - return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); + return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, @@ -273,7 +273,7 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) r->state = BCH_REBALANCE_scanning; ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?: - commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_clear_rebalance_needs_scan(trans, inum, cookie)); bch2_move_stats_exit(&r->scan_stats, trans->c); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 88dbf11ba25d..5cbef7ad8cc8 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -175,9 +175,9 @@ static int bch2_journal_replay(struct bch_fs *c) /* Skip fastpath if we're low on space in the journal */ ret = c->journal.watermark ? -1 : commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_RECLAIM| - (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0), + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_journal_reclaim| + (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); BUG_ON(!ret && !k->overwritten); if (ret) { @@ -203,9 +203,9 @@ static int bch2_journal_replay(struct bch_fs *c) replay_now_at(j, k->journal_seq); ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| + BCH_TRANS_COMMIT_no_enospc| (!k->allocated - ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim + ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim : 0), bch2_journal_replay_key(trans, k)); bch_err_msg(c, ret, "while replaying key at btree %s level %u:", @@ -506,7 +506,7 @@ static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) noinline_for_stack static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { - int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, __bch2_fs_upgrade_for_subvolumes(trans)); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 37d16e04e671..c39f172e7a64 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -398,7 +398,7 @@ s64 bch2_remap_range(struct bch_fs *c, inode_u.bi_size = new_i_size; ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); } bch2_trans_iter_exit(trans, &inode_iter); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 5dac038f0851..b23550b44098 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -590,7 +590,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c) for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_snapshot_tree(trans, &iter, k))); if (ret) @@ -868,7 +868,7 @@ int bch2_check_snapshots(struct bch_fs *c) for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_snapshot(trans, &iter, k))); if (ret) bch_err_fn(c, ret); @@ -1449,12 +1449,12 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - &res, NULL, BTREE_INSERT_NOFAIL, + &res, NULL, BCH_TRANS_COMMIT_no_enospc, snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?: for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - &res, NULL, BTREE_INSERT_NOFAIL, + &res, NULL, BCH_TRANS_COMMIT_no_enospc, move_key_to_correct_snapshot(trans, &iter, k)); bch2_disk_reservation_put(c, &res); @@ -1489,7 +1489,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) */ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_INTENT, k, - NULL, NULL, BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); if (ret) goto err_create_lock; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 22b34a8e4d6e..9a21fe86af78 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -89,7 +89,7 @@ int bch2_check_subvols(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, check_subvol(trans, &iter, k))); if (ret) bch_err_fn(c, ret); @@ -237,7 +237,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d BTREE_ITER_CACHED, &s)) ?: for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_NOFAIL, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_subvolume_reparent(trans, &iter, k, subvolid_to_delete, le32_to_cpu(s.parent))); } @@ -274,7 +274,7 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { return bch2_subvolumes_reparent(trans, subvolid) ?: - commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, __bch2_subvolume_delete(trans, subvolid)); } From eb54e81f27b56ba101e277029b153a7c892317f1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 12 Nov 2023 21:47:15 -0500 Subject: [PATCH 023/226] bcachefs: Improve btree_path_dowgrade tracepoint Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_locking.c | 4 ++-- fs/bcachefs/trace.h | 23 +++++++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 3d48834d091f..6039278121dc 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -665,7 +665,7 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, struct btree_path *path, unsigned new_locks_want) { - unsigned l; + unsigned l, old_locks_want = path->locks_want; if (trans->restarted) return; @@ -690,7 +690,7 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, bch2_btree_path_verify_locks(path); path->downgrade_seq++; - trace_path_downgrade(trans, _RET_IP_, path); + trace_path_downgrade(trans, _RET_IP_, path, old_locks_want); } /* Btree transaction locking: */ diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index fd49b63562c3..f663f21b07f3 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1252,22 +1252,37 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, TRACE_EVENT(path_downgrade, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path), + struct btree_path *path, + unsigned old_locks_want), + TP_ARGS(trans, caller_ip, path, old_locks_want), TP_STRUCT__entry( __array(char, trans_fn, 32 ) __field(unsigned long, caller_ip ) + __field(unsigned, old_locks_want ) + __field(unsigned, new_locks_want ) + __field(unsigned, btree ) + TRACE_BPOS_entries(pos) ), TP_fast_assign( strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; + __entry->old_locks_want = old_locks_want; + __entry->new_locks_want = path->locks_want; + __entry->btree = path->btree_id; + TRACE_BPOS_assign(pos, path->pos); ), - TP_printk("%s %pS", + TP_printk("%s %pS locks_want %u -> %u %s %llu:%llu:%u", __entry->trans_fn, - (void *) __entry->caller_ip) + (void *) __entry->caller_ip, + __entry->old_locks_want, + __entry->new_locks_want, + bch2_btree_id_str(__entry->btree), + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot) ); DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, From cd404e5b05eb40f3ff27ab9d796f41f0a3b5a2c4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 12 Nov 2023 20:20:35 -0500 Subject: [PATCH 024/226] bcachefs: backpointers fsck no longer uses BTREE_ITER_ALL_LEVELS It appears that BTREE_ITER_ALL_LEVELS is racy with concurrent interior node btree updates; unfortunate but not terribly surprising it's a difficult problem - that was the original reason for gc_lock. BTREE_ITER_ALL_LEVELS will probably be deleted in a subsequent patch, this changes backpointers fsck to instead walk keys at one level of the btree at a time. This fixes the tiering_drop_alloc test, which stopped working with the patch to not flush the journal after journal replay. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 127 ++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 64 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 5025a71ad685..69bd6c281d1a 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -426,11 +426,13 @@ static int check_bp_exists(struct btree_trans *trans, memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { if (last_flushed->level != bp.level || !bpos_eq(last_flushed->pos, orig_k.k->p)) { + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; + last_flushed->level = bp.level; last_flushed->pos = orig_k.k->p; - - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; + ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } goto missing; @@ -457,25 +459,18 @@ static int check_bp_exists(struct btree_trans *trans, } static int check_extent_to_backpointers(struct btree_trans *trans, - struct btree_iter *iter, + enum btree_id btree, unsigned level, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed) + struct bpos_level *last_flushed, + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs; const union bch_extent_entry *entry; struct extent_ptr_decoded p; - struct bkey_s_c k; int ret; - k = bch2_btree_iter_peek_all_levels(iter); - ret = bkey_err(k); - if (ret) - return ret; - if (!k.k) - return 0; - ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { struct bpos bucket_pos; @@ -484,7 +479,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, if (p.ptr.cached) continue; - bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level, + bch2_extent_ptr_to_bp(c, btree, level, k, p, &bucket_pos, &bp); ret = check_bp_exists(trans, bucket_pos, bp, k, @@ -501,44 +496,33 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, enum btree_id btree_id, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed) + struct bpos_level *last_flushed, + int *level) { struct bch_fs *c = trans->c; - struct btree_root *r = bch2_btree_id_root(c, btree_id); struct btree_iter iter; struct btree *b; struct bkey_s_c k; - struct bkey_ptrs_c ptrs; - struct extent_ptr_decoded p; - const union bch_extent_entry *entry; int ret; - - bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0); +retry: + bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, + 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); b = bch2_btree_iter_peek_node(&iter); ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; - BUG_ON(b != btree_node_root(c, b)); + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry; + } + + *level = b->c.level; k = bkey_i_to_s_c(&b->key); - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bpos bucket_pos; - struct bch_backpointer bp; - - if (p.ptr.cached) - continue; - - bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1, - k, p, &bucket_pos, &bp); - - ret = check_bp_exists(trans, bucket_pos, bp, k, + ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1, bucket_start, bucket_end, - last_flushed); - if (ret) - goto err; - } + last_flushed, k); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -616,43 +600,58 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; enum btree_id btree_id; - struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; + struct bkey_s_c k; int ret = 0; for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; - - bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, - depth, - BTREE_ITER_ALL_LEVELS| - BTREE_ITER_PREFETCH); - - do { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| - BCH_TRANS_COMMIT_no_enospc, - check_extent_to_backpointers(trans, &iter, - bucket_start, bucket_end, - &last_flushed)); - if (ret) - break; - } while (!bch2_btree_iter_advance(&iter)); - - bch2_trans_iter_exit(trans, &iter); - - if (ret) - break; + struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; + int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc, check_btree_root_to_backpointers(trans, btree_id, bucket_start, bucket_end, - &last_flushed)); + &last_flushed, &level)); if (ret) - break; + return ret; + + while (level >= depth) { + bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, + level, + BTREE_ITER_PREFETCH); + while (1) { + bch2_trans_begin(trans); + k = bch2_btree_iter_peek(&iter); + if (!k.k) + break; + ret = bkey_err(k) ?: + check_extent_to_backpointers(trans, btree_id, level, + bucket_start, bucket_end, + &last_flushed, k) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } + if (ret) + break; + if (bpos_eq(iter.pos, SPOS_MAX)) + break; + bch2_btree_iter_advance(&iter); + } + bch2_trans_iter_exit(trans, &iter); + + if (ret) + return ret; + + --level; + } } - return ret; + + return 0; } static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c, From e56978c80d86523e90c8fb4a23dfca9db5f60bf7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 12 Nov 2023 20:35:51 -0500 Subject: [PATCH 025/226] bcachefs: Kill BTREE_ITER_ALL_LEVELS As discussed in the previous patch, BTREE_ITER_ALL_LEVELS appears to be racy with concurrent interior node updates - and perhaps it is fixable, but it's tricky and unnecessary. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 123 +++----------------------------------- fs/bcachefs/btree_iter.h | 10 +--- fs/bcachefs/btree_types.h | 31 +++++----- 3 files changed, 24 insertions(+), 140 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index bf4d2ade4535..f58e03e3e038 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1797,23 +1797,15 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) inline bool bch2_btree_iter_advance(struct btree_iter *iter) { - if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) { - struct bpos pos = iter->k.p; - bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS - ? bpos_eq(pos, SPOS_MAX) - : bkey_eq(pos, SPOS_MAX)); + struct bpos pos = iter->k.p; + bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS + ? bpos_eq(pos, SPOS_MAX) + : bkey_eq(pos, SPOS_MAX)); - if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - pos = bkey_successor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); - return ret; - } else { - if (!btree_path_node(iter->path, iter->path->level)) - return true; - - iter->advanced = true; - return false; - } + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) + pos = bkey_successor(iter, pos); + bch2_btree_iter_set_pos(iter, pos); + return ret; } inline bool bch2_btree_iter_rewind(struct btree_iter *iter) @@ -2064,7 +2056,6 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e struct bpos iter_pos; int ret; - EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS); EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX)); if (iter->update_path) { @@ -2203,103 +2194,6 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e goto out_no_locked; } -/** - * bch2_btree_iter_peek_all_levels() - returns the first key greater than or - * equal to iterator's current position, returning keys from every level of the - * btree. For keys at different levels of the btree that compare equal, the key - * from the lower level (leaf) is returned first. - * @iter: iterator to peek from - * - * Returns: key if found, or an error extractable with bkey_err(). - */ -struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) -{ - struct btree_trans *trans = iter->trans; - struct bkey_s_c k; - int ret; - - EBUG_ON(iter->path->cached); - bch2_btree_iter_verify(iter); - BUG_ON(iter->path->level < iter->min_depth); - BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); - EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS)); - - while (1) { - iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos, - iter->flags & BTREE_ITER_INTENT, - btree_iter_ip_allocated(iter)); - - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (unlikely(ret)) { - /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); - goto out_no_locked; - } - - /* Already at end? */ - if (!btree_path_node(iter->path, iter->path->level)) { - k = bkey_s_c_null; - goto out_no_locked; - } - - k = btree_path_level_peek_all(trans->c, - &iter->path->l[iter->path->level], &iter->k); - - /* Check if we should go up to the parent node: */ - if (!k.k || - (iter->advanced && - bpos_eq(path_l(iter->path)->b->key.k.p, iter->pos))) { - iter->pos = path_l(iter->path)->b->key.k.p; - btree_path_set_level_up(trans, iter->path); - iter->advanced = false; - continue; - } - - /* - * Check if we should go back down to a leaf: - * If we're not in a leaf node, we only return the current key - * if it exactly matches iter->pos - otherwise we first have to - * go back to the leaf: - */ - if (iter->path->level != iter->min_depth && - (iter->advanced || - !k.k || - !bpos_eq(iter->pos, k.k->p))) { - btree_path_set_level_down(trans, iter->path, iter->min_depth); - iter->pos = bpos_successor(iter->pos); - iter->advanced = false; - continue; - } - - /* Check if we should go to the next key: */ - if (iter->path->level == iter->min_depth && - iter->advanced && - k.k && - bpos_eq(iter->pos, k.k->p)) { - iter->pos = bpos_successor(iter->pos); - iter->advanced = false; - continue; - } - - if (iter->advanced && - iter->path->level == iter->min_depth && - !bpos_eq(k.k->p, iter->pos)) - iter->advanced = false; - - BUG_ON(iter->advanced); - BUG_ON(!k.k); - break; - } - - iter->pos = k.k->p; - btree_path_set_should_be_locked(iter->path); -out_no_locked: - bch2_btree_iter_verify(iter); - - return k; -} - /** * bch2_btree_iter_next() - returns first key greater than iterator's current * position @@ -2466,7 +2360,6 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS); EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE)); /* extents can't span inode numbers: */ diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index eaffced4c132..7286f92b1974 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -348,8 +348,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *); - static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) { return bch2_btree_iter_peek_upto(iter, SPOS_MAX); @@ -408,9 +406,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, unsigned btree_id, unsigned flags) { - if (flags & BTREE_ITER_ALL_LEVELS) - flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS; - if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && btree_id_is_extents(btree_id)) flags |= BTREE_ITER_IS_EXTENTS; @@ -606,8 +601,6 @@ u32 bch2_trans_begin(struct btree_trans *); static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) { - BUG_ON(flags & BTREE_ITER_ALL_LEVELS); - return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : bch2_btree_iter_peek_prev(iter); } @@ -615,8 +608,7 @@ static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter * static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, unsigned flags) { - return flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) : - flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : + return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : bch2_btree_iter_peek(iter); } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index b667da4e8403..7f17c23326c7 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -185,33 +185,32 @@ struct btree_node_iter { * Iterate over all possible positions, synthesizing deleted keys for holes: */ static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; -static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1; /* * Indicates that intent locks should be taken on leaf nodes, because we expect * to be doing updates: */ -static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2; +static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1; /* * Causes the btree iterator code to prefetch additional btree nodes from disk: */ -static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3; +static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2; /* * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * @pos or the first key strictly greater than @pos */ -static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4; -static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5; -static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6; -static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7; -static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8; -static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9; -static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; -static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11; -static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12; -static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13; -static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14; -static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15; -#define __BTREE_ITER_FLAGS_END 16 +static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3; +static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4; +static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5; +static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6; +static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7; +static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8; +static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9; +static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; +static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11; +static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12; +static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13; +static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14; +#define __BTREE_ITER_FLAGS_END 15 enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, From 5fd24caf572db0d1ccb03d3132fa1e249e84dd8b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 Nov 2023 19:55:09 -0500 Subject: [PATCH 026/226] bcachefs: Fix userspace bch2_prt_datetime() ctime_r() outputs a newline, which we don't want. Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 8d580d5a9c4d..e85b1f467295 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -322,6 +322,7 @@ void bch2_prt_datetime(struct printbuf *out, time64_t sec) time_t t = sec; char buf[64]; ctime_r(&t, buf); + strim(buf); prt_str(out, buf); } #else From b4b79b076445308389781a94934f411cb3e79c79 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 Nov 2023 21:12:35 -0500 Subject: [PATCH 027/226] bcachefs: Don't rejournal keys in key cache flush Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 4599e999c231..6d98018912be 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -645,11 +645,19 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, if (journal_seq && ck->journal.seq != journal_seq) goto out; + trans->journal_res.seq = ck->journal.seq; + /* - * Since journal reclaim depends on us making progress here, and the - * allocator/copygc depend on journal reclaim making progress, we need - * to be using alloc reserves: + * If we're at the end of the journal, we really want to free up space + * in the journal right away - we don't want to pin that old journal + * sequence number with a new btree node write, we want to re-journal + * the update */ + if (ck->journal.seq == journal_last_seq(j)) + commit_flags |= BCH_WATERMARK_reclaim; + else + commit_flags |= BCH_TRANS_COMMIT_no_journal_res; + ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_KEY_CACHE_RECLAIM| @@ -658,9 +666,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc| - (ck->journal.seq == journal_last_seq(j) - ? BCH_WATERMARK_reclaim - : 0)| commit_flags); bch2_fs_fatal_err_on(ret && From b27d7afb791715f4017f94c58168676c76398986 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Nov 2023 23:43:35 -0500 Subject: [PATCH 028/226] bcachefs: Don't flush journal after replay The flush_all_pins() after journal replay was unecessary, and trying to completely flush the journal while RW is not a great idea - it's not guaranteed to terminate if other threads keep adding things to the jorunal. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 5cbef7ad8cc8..a588092241ca 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -230,10 +230,8 @@ static int bch2_journal_replay(struct bch_fs *c) j->replay_journal_seq = 0; bch2_journal_set_replay_done(j); - bch2_journal_flush_all_pins(j); - ret = bch2_journal_error(j); - if (keys->nr && !ret) + if (keys->nr) bch2_journal_log_msg(c, "journal replay finished"); err: if (trans) From ae0e61175e22b77267c2c59d0061925a0a604074 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 20:41:10 -0500 Subject: [PATCH 029/226] bcachefs: Add a tracepoint for journal entry close Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 2 ++ fs/bcachefs/trace.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 63fb115da157..fb983061af4a 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -184,6 +184,8 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val) /* Close out old buffer: */ buf->data->u64s = cpu_to_le32(old.cur_entry_offset); + trace_journal_entry_close(c, vstruct_bytes(buf->data)); + sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; BUG_ON(sectors > buf->sectors); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index f663f21b07f3..52db4df67c3a 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -188,6 +188,25 @@ DEFINE_EVENT(bch_fs, journal_entry_full, TP_ARGS(c) ); +TRACE_EVENT(journal_entry_close, + TP_PROTO(struct bch_fs *c, unsigned bytes), + TP_ARGS(c, bytes), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u32, bytes ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->bytes = bytes; + ), + + TP_printk("%d,%d entry bytes %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->bytes) +); + DEFINE_EVENT(bio, journal_write, TP_PROTO(struct bio *bio), TP_ARGS(bio) From 1ae8a0904a6a7b16ee2ffa30153d4e5c38719321 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 22:35:29 -0500 Subject: [PATCH 030/226] bcachefs: Kill memset() in bch2_btree_iter_init() Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 7 +++++++ fs/bcachefs/btree_iter.h | 19 +++++++++++-------- fs/bcachefs/btree_types.h | 3 +-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index fe78e87603fc..60b2264e019e 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -307,6 +307,13 @@ struct bkey_i { struct bch_val v; }; +#define POS_KEY(_pos) \ +((struct bkey) { \ + .u64s = BKEY_U64s, \ + .format = KEY_FORMAT_CURRENT, \ + .p = _pos, \ +}) + #define KEY(_inode, _offset, _size) \ ((struct bkey) { \ .u64s = BKEY_U64s, \ diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 7286f92b1974..064720171551 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -445,14 +445,17 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, unsigned flags, unsigned long ip) { - memset(iter, 0, sizeof(*iter)); - iter->trans = trans; - iter->btree_id = btree_id; - iter->flags = flags; - iter->snapshot = pos.snapshot; - iter->pos = pos; - iter->k.p = pos; - + iter->trans = trans; + iter->update_path = NULL; + iter->key_cache_path = NULL; + iter->btree_id = btree_id; + iter->min_depth = 0; + iter->flags = flags; + iter->snapshot = pos.snapshot; + iter->pos = pos; + iter->k = POS_KEY(pos); + iter->journal_idx = 0; + iter->journal_pos = POS_MIN; #ifdef CONFIG_BCACHEFS_DEBUG iter->ip_allocated = ip; #endif diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 7f17c23326c7..662852b1e12f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -287,8 +287,7 @@ struct btree_iter { struct btree_path *key_cache_path; enum btree_id btree_id:8; - unsigned min_depth:3; - unsigned advanced:1; + u8 min_depth; /* btree_iter_copy starts here: */ u16 flags; From ad9c7992eb9906caf17aa88f1fd41770879d1cb6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 17 Nov 2023 18:38:09 -0500 Subject: [PATCH 031/226] bcachefs: Kill btree_iter->journal_pos For BTREE_ITER_WITH_JOURNAL, we memoize lookups in the journal keys, to avoid the binary search overhead. Previously we stashed the pos of the last key returned from the journal, in order to force the lookup to be redone when rewinding. Now bch2_journal_keys_peek_upto() handles rewinding itself when necessary - so we can slim down btree_iter. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 18 +++++------------- fs/bcachefs/btree_iter.h | 1 - fs/bcachefs/btree_journal_iter.c | 19 +++++++++++++++++-- fs/bcachefs/btree_types.h | 1 - 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index f58e03e3e038..89592a4f3563 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1854,19 +1854,11 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, struct btree_iter *iter, struct bpos end_pos) { - struct bkey_i *k; - - if (bpos_lt(iter->path->pos, iter->journal_pos)) - iter->journal_idx = 0; - - k = bch2_journal_keys_peek_upto(trans->c, iter->btree_id, - iter->path->level, - iter->path->pos, - end_pos, - &iter->journal_idx); - - iter->journal_pos = k ? k->k.p : end_pos; - return k; + return bch2_journal_keys_peek_upto(trans->c, iter->btree_id, + iter->path->level, + iter->path->pos, + end_pos, + &iter->journal_idx); } static noinline diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 064720171551..9ef5021a6d37 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -455,7 +455,6 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, iter->pos = pos; iter->k = POS_KEY(pos); iter->journal_idx = 0; - iter->journal_pos = POS_MIN; #ifdef CONFIG_BCACHEFS_DEBUG iter->ip_allocated = ip; #endif diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index ec52f50d249d..7a5e0a893df9 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -73,6 +73,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); } +/* Returns first non-overwritten key >= search key: */ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, unsigned level, struct bpos pos, struct bpos end_pos, size_t *idx) @@ -86,12 +87,26 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); + while (*idx && + __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + --(*idx); + iters++; + if (iters == 10) { + *idx = 0; + goto search; + } + } + while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) return NULL; - if (__journal_key_cmp(btree_id, level, pos, k) <= 0 && - !k->overwritten) + if (k->overwritten) { + (*idx)++; + continue; + } + + if (__journal_key_cmp(btree_id, level, pos, k) <= 0) return k->k; (*idx)++; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 662852b1e12f..ca7526603d06 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -304,7 +304,6 @@ struct btree_iter { /* BTREE_ITER_WITH_JOURNAL: */ size_t journal_idx; - struct bpos journal_pos; #ifdef TRACK_PATH_ALLOCATED unsigned long ip_allocated; #endif From 086a52f7fa9d7dd0755a93c368f51253ea0852c8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 9 Nov 2023 13:52:35 -0500 Subject: [PATCH 032/226] bcachefs: Rename bch_replicas_entry -> bch_replicas_entry_v1 Prep work for introducing bch_replicas_entry_v2 Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 6 ++-- fs/bcachefs/bcachefs_ioctl.h | 2 +- fs/bcachefs/btree_gc.c | 2 +- fs/bcachefs/buckets.c | 10 +++--- fs/bcachefs/chardev.c | 2 +- fs/bcachefs/ec_types.h | 2 +- fs/bcachefs/replicas.c | 60 +++++++++++++++++------------------ fs/bcachefs/replicas.h | 22 ++++++------- fs/bcachefs/replicas_types.h | 4 +-- fs/bcachefs/sb-clean.c | 2 +- 10 files changed, 56 insertions(+), 56 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 60b2264e019e..d2eb1bd0c8ee 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1449,7 +1449,7 @@ struct bch_sb_field_replicas_v0 { struct bch_replicas_entry_v0 entries[]; } __packed __aligned(8); -struct bch_replicas_entry { +struct bch_replicas_entry_v1 { __u8 data_type; __u8 nr_devs; __u8 nr_required; @@ -1461,7 +1461,7 @@ struct bch_replicas_entry { struct bch_sb_field_replicas { struct bch_sb_field field; - struct bch_replicas_entry entries[]; + struct bch_replicas_entry_v1 entries[]; } __packed __aligned(8); /* BCH_SB_FIELD_quota: */ @@ -2210,7 +2210,7 @@ struct jset_entry_usage { struct jset_entry_data_usage { struct jset_entry entry; __le64 v; - struct bch_replicas_entry r; + struct bch_replicas_entry_v1 r; } __packed; struct jset_entry_clock { diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index f05881f7e113..934da1b0e37b 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -237,7 +237,7 @@ struct bch_ioctl_data_event { struct bch_replicas_usage { __u64 sectors; - struct bch_replicas_entry r; + struct bch_replicas_entry_v1 r; } __packed; static inline struct bch_replicas_usage * diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index f53afa2acd8c..2304a6ab0d0c 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1284,7 +1284,7 @@ static int bch2_gc_done(struct bch_fs *c, } for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *e = + struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); if (metadata_only && diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 5a91d3189fcf..dd794b63a358 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -61,7 +61,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c) usage->reserved += usage->persistent_reserved[i]; for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *e = + struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); @@ -214,7 +214,7 @@ void bch2_fs_usage_to_text(struct printbuf *out, } for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *e = + struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); prt_printf(out, "\t"); @@ -345,7 +345,7 @@ static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, static inline int __update_replicas(struct bch_fs *c, struct bch_fs_usage *fs_usage, - struct bch_replicas_entry *r, + struct bch_replicas_entry_v1 *r, s64 sectors) { int idx = bch2_replicas_entry_idx(c, r); @@ -359,7 +359,7 @@ static inline int __update_replicas(struct bch_fs *c, } static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, - struct bch_replicas_entry *r, s64 sectors, + struct bch_replicas_entry_v1 *r, s64 sectors, unsigned journal_seq, bool gc) { struct bch_fs_usage *fs_usage; @@ -454,7 +454,7 @@ int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more) } static inline int update_replicas_list(struct btree_trans *trans, - struct bch_replicas_entry *r, + struct bch_replicas_entry_v1 *r, s64 sectors) { struct replicas_delta_list *d; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 4bb88aefed12..de3d82de9d29 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -444,7 +444,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, dst_end = (void *) arg->replicas + replica_entries_bytes; for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *src_e = + struct bch_replicas_entry_v1 *src_e = cpu_replicas_entry(&c->replicas, i); /* check that we have enough space for one replicas entry */ diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h index e2b02a82de32..976426da3a12 100644 --- a/fs/bcachefs/ec_types.h +++ b/fs/bcachefs/ec_types.h @@ -5,7 +5,7 @@ #include "bcachefs_format.h" struct bch_replicas_padded { - struct bch_replicas_entry e; + struct bch_replicas_entry_v1 e; u8 pad[BCH_BKEY_PTRS_MAX]; }; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 2008fe8bf706..ccb776e045dc 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -11,7 +11,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, /* Replicas tracking - in memory: */ -static void verify_replicas_entry(struct bch_replicas_entry *e) +static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG unsigned i; @@ -26,7 +26,7 @@ static void verify_replicas_entry(struct bch_replicas_entry *e) #endif } -void bch2_replicas_entry_sort(struct bch_replicas_entry *e) +void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) { bubble_sort(e->devs, e->nr_devs, u8_cmp); } @@ -53,7 +53,7 @@ static void bch2_replicas_entry_v0_to_text(struct printbuf *out, } void bch2_replicas_entry_to_text(struct printbuf *out, - struct bch_replicas_entry *e) + struct bch_replicas_entry_v1 *e) { unsigned i; @@ -68,7 +68,7 @@ void bch2_replicas_entry_to_text(struct printbuf *out, prt_printf(out, "]"); } -int bch2_replicas_entry_validate(struct bch_replicas_entry *r, +int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, struct bch_sb *sb, struct printbuf *err) { @@ -98,7 +98,7 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry *r, void bch2_cpu_replicas_to_text(struct printbuf *out, struct bch_replicas_cpu *r) { - struct bch_replicas_entry *e; + struct bch_replicas_entry_v1 *e; bool first = true; for_each_cpu_replicas_entry(r, e) { @@ -111,7 +111,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *out, } static void extent_to_replicas(struct bkey_s_c k, - struct bch_replicas_entry *r) + struct bch_replicas_entry_v1 *r) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -131,7 +131,7 @@ static void extent_to_replicas(struct bkey_s_c k, } static void stripe_to_replicas(struct bkey_s_c k, - struct bch_replicas_entry *r) + struct bch_replicas_entry_v1 *r) { struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); const struct bch_extent_ptr *ptr; @@ -144,7 +144,7 @@ static void stripe_to_replicas(struct bkey_s_c k, r->devs[r->nr_devs++] = ptr->dev; } -void bch2_bkey_to_replicas(struct bch_replicas_entry *e, +void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, struct bkey_s_c k) { e->nr_devs = 0; @@ -169,7 +169,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e, bch2_replicas_entry_sort(e); } -void bch2_devlist_to_replicas(struct bch_replicas_entry *e, +void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, enum bch_data_type data_type, struct bch_devs_list devs) { @@ -192,7 +192,7 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e, static struct bch_replicas_cpu cpu_replicas_add_entry(struct bch_fs *c, struct bch_replicas_cpu *old, - struct bch_replicas_entry *new_entry) + struct bch_replicas_entry_v1 *new_entry) { unsigned i; struct bch_replicas_cpu new = { @@ -225,7 +225,7 @@ cpu_replicas_add_entry(struct bch_fs *c, } static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, - struct bch_replicas_entry *search) + struct bch_replicas_entry_v1 *search) { int idx, entry_size = replicas_entry_bytes(search); @@ -243,7 +243,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, } int bch2_replicas_entry_idx(struct bch_fs *c, - struct bch_replicas_entry *search) + struct bch_replicas_entry_v1 *search) { bch2_replicas_entry_sort(search); @@ -251,13 +251,13 @@ int bch2_replicas_entry_idx(struct bch_fs *c, } static bool __replicas_has_entry(struct bch_replicas_cpu *r, - struct bch_replicas_entry *search) + struct bch_replicas_entry_v1 *search) { return __replicas_entry_idx(r, search) >= 0; } bool bch2_replicas_marked(struct bch_fs *c, - struct bch_replicas_entry *search) + struct bch_replicas_entry_v1 *search) { bool marked; @@ -374,7 +374,7 @@ static int replicas_table_update(struct bch_fs *c, static unsigned reserve_journal_replicas(struct bch_fs *c, struct bch_replicas_cpu *r) { - struct bch_replicas_entry *e; + struct bch_replicas_entry_v1 *e; unsigned journal_res_u64s = 0; /* nr_inodes: */ @@ -399,7 +399,7 @@ static unsigned reserve_journal_replicas(struct bch_fs *c, noinline static int bch2_mark_replicas_slowpath(struct bch_fs *c, - struct bch_replicas_entry *new_entry) + struct bch_replicas_entry_v1 *new_entry) { struct bch_replicas_cpu new_r, new_gc; int ret = 0; @@ -464,7 +464,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, goto out; } -int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r) +int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) { return likely(bch2_replicas_marked(c, r)) ? 0 : bch2_mark_replicas_slowpath(c, r); @@ -515,7 +515,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) { - struct bch_replicas_entry *e; + struct bch_replicas_entry_v1 *e; unsigned i = 0; lockdep_assert_held(&c->replicas_gc_lock); @@ -590,7 +590,7 @@ int bch2_replicas_gc2(struct bch_fs *c) } for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *e = + struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); if (e->data_type == BCH_DATA_journal || @@ -621,7 +621,7 @@ int bch2_replicas_gc2(struct bch_fs *c) } int bch2_replicas_set_usage(struct bch_fs *c, - struct bch_replicas_entry *r, + struct bch_replicas_entry_v1 *r, u64 sectors) { int ret, idx = bch2_replicas_entry_idx(c, r); @@ -654,7 +654,7 @@ static int __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, struct bch_replicas_cpu *cpu_r) { - struct bch_replicas_entry *e, *dst; + struct bch_replicas_entry_v1 *e, *dst; unsigned nr = 0, entry_size = 0, idx = 0; for_each_replicas_entry(sb_r, e) { @@ -692,7 +692,7 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, nr++; } - entry_size += sizeof(struct bch_replicas_entry) - + entry_size += sizeof(struct bch_replicas_entry_v1) - sizeof(struct bch_replicas_entry_v0); cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); @@ -703,7 +703,7 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, cpu_r->entry_size = entry_size; for_each_replicas_entry(sb_r, e) { - struct bch_replicas_entry *dst = + struct bch_replicas_entry_v1 *dst = cpu_replicas_entry(cpu_r, idx++); dst->data_type = e->data_type; @@ -747,7 +747,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, { struct bch_sb_field_replicas_v0 *sb_r; struct bch_replicas_entry_v0 *dst; - struct bch_replicas_entry *src; + struct bch_replicas_entry_v1 *src; size_t bytes; bytes = sizeof(struct bch_sb_field_replicas); @@ -785,7 +785,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, struct bch_replicas_cpu *r) { struct bch_sb_field_replicas *sb_r; - struct bch_replicas_entry *dst, *src; + struct bch_replicas_entry_v1 *dst, *src; bool need_v1 = false; size_t bytes; @@ -836,7 +836,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, memcmp, NULL); for (i = 0; i < cpu_r->nr; i++) { - struct bch_replicas_entry *e = + struct bch_replicas_entry_v1 *e = cpu_replicas_entry(cpu_r, i); int ret = bch2_replicas_entry_validate(e, sb, err); @@ -844,7 +844,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, return ret; if (i + 1 < cpu_r->nr) { - struct bch_replicas_entry *n = + struct bch_replicas_entry_v1 *n = cpu_replicas_entry(cpu_r, i + 1); BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); @@ -881,7 +881,7 @@ static void bch2_sb_replicas_to_text(struct printbuf *out, struct bch_sb_field *f) { struct bch_sb_field_replicas *r = field_to_type(f, replicas); - struct bch_replicas_entry *e; + struct bch_replicas_entry_v1 *e; bool first = true; for_each_replicas_entry(r, e) { @@ -943,7 +943,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, unsigned flags, bool print) { - struct bch_replicas_entry *e; + struct bch_replicas_entry_v1 *e; bool ret = true; percpu_down_read(&c->mark_lock); @@ -1003,7 +1003,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) replicas_v0 = bch2_sb_field_get(sb, replicas_v0); if (replicas) { - struct bch_replicas_entry *r; + struct bch_replicas_entry_v1 *r; for_each_replicas_entry(replicas, r) for (i = 0; i < r->nr_devs; i++) diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index f70a642775d1..654a4b26d3a3 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -6,28 +6,28 @@ #include "eytzinger.h" #include "replicas_types.h" -void bch2_replicas_entry_sort(struct bch_replicas_entry *); +void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); void bch2_replicas_entry_to_text(struct printbuf *, - struct bch_replicas_entry *); -int bch2_replicas_entry_validate(struct bch_replicas_entry *, + struct bch_replicas_entry_v1 *); +int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *, struct bch_sb *, struct printbuf *); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); -static inline struct bch_replicas_entry * +static inline struct bch_replicas_entry_v1 * cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i) { return (void *) r->entries + r->entry_size * i; } int bch2_replicas_entry_idx(struct bch_fs *, - struct bch_replicas_entry *); + struct bch_replicas_entry_v1 *); -void bch2_devlist_to_replicas(struct bch_replicas_entry *, +void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *, enum bch_data_type, struct bch_devs_list); -bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *); +bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry_v1 *); int bch2_mark_replicas(struct bch_fs *, - struct bch_replicas_entry *); + struct bch_replicas_entry_v1 *); static inline struct replicas_delta * replicas_delta_next(struct replicas_delta *d) @@ -37,9 +37,9 @@ replicas_delta_next(struct replicas_delta *d) int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *); -void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c); +void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *, struct bkey_s_c); -static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e, +static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e, unsigned dev) { e->data_type = BCH_DATA_cached; @@ -59,7 +59,7 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned); int bch2_replicas_gc2(struct bch_fs *); int bch2_replicas_set_usage(struct bch_fs *, - struct bch_replicas_entry *, + struct bch_replicas_entry_v1 *, u64); #define for_each_cpu_replicas_entry(_r, _i) \ diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h index 5cfff489bbc3..030324078bba 100644 --- a/fs/bcachefs/replicas_types.h +++ b/fs/bcachefs/replicas_types.h @@ -5,12 +5,12 @@ struct bch_replicas_cpu { unsigned nr; unsigned entry_size; - struct bch_replicas_entry *entries; + struct bch_replicas_entry_v1 *entries; }; struct replicas_delta { s64 delta; - struct bch_replicas_entry r; + struct bch_replicas_entry_v1 r; } __packed; struct replicas_delta_list { diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index c76ad8ea5e4a..bc1d9808808d 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -235,7 +235,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, } for (i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry *e = + struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); struct jset_entry_data_usage *u = container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs), From 25f64e997e4bd864b4426ba40b3a48d276665fea Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Nov 2023 17:40:45 -0500 Subject: [PATCH 033/226] bcachefs: Don't use update_cached_sectors() in bch2_mark_alloc() bch2_update_cached_sectors_list() is closer to how the new disk space accounting works, called from trans_mark(). Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 13 +++++++++++ fs/bcachefs/buckets.c | 40 ++++++++++------------------------ fs/bcachefs/buckets.h | 3 +++ 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3326b7a90204..dcfe26fdb500 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -847,6 +847,19 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, return ret; } + /* + * need to know if we're getting called from the invalidate path or + * not: + */ + + if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && + old_a->cached_sectors) { + ret = bch2_update_cached_sectors_list(trans, new->k.p.inode, + -((s64) old_a->cached_sectors)); + if (ret) + return ret; + } + return 0; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index dd794b63a358..0908da5e9886 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -453,9 +453,9 @@ int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more) __replicas_deltas_realloc(trans, more, _gfp)); } -static inline int update_replicas_list(struct btree_trans *trans, - struct bch_replicas_entry_v1 *r, - s64 sectors) +int bch2_update_replicas_list(struct btree_trans *trans, + struct bch_replicas_entry_v1 *r, + s64 sectors) { struct replicas_delta_list *d; struct replicas_delta *n; @@ -481,14 +481,13 @@ static inline int update_replicas_list(struct btree_trans *trans, return 0; } -static inline int update_cached_sectors_list(struct btree_trans *trans, - unsigned dev, s64 sectors) +int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 sectors) { struct bch_replicas_padded r; bch2_replicas_entry_cached(&r.e, dev); - return update_replicas_list(trans, &r.e, sectors); + return bch2_update_replicas_list(trans, &r.e, sectors); } int bch2_mark_alloc(struct btree_trans *trans, @@ -580,23 +579,6 @@ int bch2_mark_alloc(struct btree_trans *trans, } percpu_up_read(&c->mark_lock); - /* - * need to know if we're getting called from the invalidate path or - * not: - */ - - if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && - old_a->cached_sectors) { - ret = update_cached_sectors(c, new, ca->dev_idx, - -((s64) old_a->cached_sectors), - journal_seq, gc); - if (ret) { - bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", - __func__); - return ret; - } - } - if (new_a->data_type == BCH_DATA_free && (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) closure_wake_up(&c->freelist_wait); @@ -1474,7 +1456,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); r.e.data_type = data_type; - ret = update_replicas_list(trans, &r.e, sectors); + ret = bch2_update_replicas_list(trans, &r.e, sectors); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1517,8 +1499,8 @@ static int __trans_mark_extent(struct btree_trans *trans, if (p.ptr.cached) { if (!stale) { - ret = update_cached_sectors_list(trans, p.ptr.dev, - disk_sectors); + ret = bch2_update_cached_sectors_list(trans, p.ptr.dev, + disk_sectors); if (ret) return ret; } @@ -1536,7 +1518,7 @@ static int __trans_mark_extent(struct btree_trans *trans, } if (r.e.nr_devs) - ret = update_replicas_list(trans, &r.e, dirty_sectors); + ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors); return ret; } @@ -1673,7 +1655,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, s64 sectors = le16_to_cpu(new_s->sectors); bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new)); - ret = update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); if (ret) return ret; } @@ -1682,7 +1664,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); bch2_bkey_to_replicas(&r.e, old); - ret = update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); if (ret) return ret; } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 21f6cb356921..5574b62e0553 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -315,6 +315,9 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, : c->usage[journal_seq & JOURNAL_BUF_MASK]); } +int bch2_update_replicas_list(struct btree_trans *, + struct bch_replicas_entry_v1 *, s64); +int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64); int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); void bch2_fs_usage_initialize(struct bch_fs *); From 2b161cc7cb077e9f8479f1aa132c78c37ce845cd Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Mon, 20 Nov 2023 09:53:36 +1300 Subject: [PATCH 034/226] bcachefs: add a quieter bch2_read_super If we're looking for a bcachefs supers iteratively we don't want to see this error. This function replaces KERN_ERR with KERN_INFO for when we don't find a bcachefs superblock but preserves other errors. Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 27 ++++++++++++++++++++++++--- fs/bcachefs/super-io.h | 1 + 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 4c98d8cc2a79..320a04a42e5b 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -692,12 +692,13 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf return 0; } -int bch2_read_super(const char *path, struct bch_opts *opts, - struct bch_sb_handle *sb) +static int __bch2_read_super(const char *path, struct bch_opts *opts, + struct bch_sb_handle *sb, bool ignore_notbchfs_msg) { u64 offset = opt_get(*opts, sb); struct bch_sb_layout layout; struct printbuf err = PRINTBUF; + struct printbuf err2 = PRINTBUF; __le64 *i; int ret; #ifndef __KERNEL__ @@ -760,8 +761,14 @@ int bch2_read_super(const char *path, struct bch_opts *opts, if (opt_defined(*opts, sb)) goto err; - printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n", + prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n", path, err.buf); + if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg) + printk(KERN_INFO "%s", err2.buf); + else + printk(KERN_ERR "%s", err2.buf); + + printbuf_exit(&err2); printbuf_reset(&err); /* @@ -837,6 +844,20 @@ int bch2_read_super(const char *path, struct bch_opts *opts, goto out; } +int bch2_read_super(const char *path, struct bch_opts *opts, + struct bch_sb_handle *sb) +{ + return __bch2_read_super(path, opts, sb, false); +} + +/* provide a silenced version for mount.bcachefs */ + +int bch2_read_super_silent(const char *path, struct bch_opts *opts, + struct bch_sb_handle *sb) +{ + return __bch2_read_super(path, opts, sb, true); +} + /* write superblock: */ static void write_super_endio(struct bio *bio) diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index e41e5de531a0..654c5d4d522a 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -84,6 +84,7 @@ void bch2_free_super(struct bch_sb_handle *); int bch2_sb_realloc(struct bch_sb_handle *, unsigned); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); +int bch2_read_super_silent(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_write_super(struct bch_fs *); void __bch2_check_set_feature(struct bch_fs *, unsigned); From 225879f403c6196624f0ed5c118b2cdd7de02e64 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 21 Nov 2023 09:05:15 +0800 Subject: [PATCH 035/226] bcachefs: clean up one inconsistent indenting fs/bcachefs/journal_io.c:1843 bch2_journal_write_pick_flush() warn: inconsistent indenting Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7585 Signed-off-by: Yang Li Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 30fb8e950613..c46b3bf09fd5 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1850,7 +1850,7 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf * (!w->must_flush && (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) { - w->noflush = true; + w->noflush = true; SET_JSET_NO_FLUSH(w->data, true); w->data->last_seq = 0; w->last_seq = 0; From 01e9564540d7e264147e3ee380e85cfa875ef48f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 20 Nov 2023 18:43:48 -0500 Subject: [PATCH 036/226] bcachefs: x-macro-ify bch_data_ops enum This will let us add an enum -> string table for a to_text() fn. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 15 ++++++++++----- fs/bcachefs/move.c | 29 +++++++++++++++++------------ fs/bcachefs/move.h | 4 +++- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 934da1b0e37b..d40ebfe05b0b 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -173,12 +173,17 @@ struct bch_ioctl_disk_set_state { __u64 dev; }; +#define BCH_DATA_OPS() \ + x(scrub, 0) \ + x(rereplicate, 1) \ + x(migrate, 2) \ + x(rewrite_old_nodes, 3) + enum bch_data_ops { - BCH_DATA_OP_SCRUB = 0, - BCH_DATA_OP_REREPLICATE = 1, - BCH_DATA_OP_MIGRATE = 2, - BCH_DATA_OP_REWRITE_OLD_NODES = 3, - BCH_DATA_OP_NR = 4, +#define x(t, n) BCH_DATA_OP_##t = n, + BCH_DATA_OPS() +#undef x + BCH_DATA_OP_NR }; /* diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 54830ee0ed88..b47c0d28c6d5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -27,6 +27,13 @@ #include #include +const char * const bch2_data_ops_strs[] = { +#define x(t, n, ...) [n] = #t, + BCH_DATA_OPS() +#undef x + NULL +}; + static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k) { if (trace_move_extent_enabled()) { @@ -211,7 +218,7 @@ void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) trace_move_data(c, stats); } -void bch2_move_stats_init(struct bch_move_stats *stats, char *name) +void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) { memset(stats, 0, sizeof(*stats)); stats->data_type = BCH_DATA_user; @@ -1012,9 +1019,13 @@ int bch2_data_job(struct bch_fs *c, { int ret = 0; + if (op.op >= BCH_DATA_OP_NR) + return -EINVAL; + + bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); + switch (op.op) { - case BCH_DATA_OP_REREPLICATE: - bch2_move_stats_init(stats, "rereplicate"); + case BCH_DATA_OP_rereplicate: stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); @@ -1033,14 +1044,11 @@ int bch2_data_job(struct bch_fs *c, true, rereplicate_pred, c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - - bch2_move_stats_exit(stats, c); break; - case BCH_DATA_OP_MIGRATE: + case BCH_DATA_OP_migrate: if (op.migrate.dev >= c->sb.nr_devices) return -EINVAL; - bch2_move_stats_init(stats, "migrate"); stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); @@ -1059,18 +1067,15 @@ int bch2_data_job(struct bch_fs *c, true, migrate_pred, &op) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - - bch2_move_stats_exit(stats, c); break; - case BCH_DATA_OP_REWRITE_OLD_NODES: - bch2_move_stats_init(stats, "rewrite_old_nodes"); + case BCH_DATA_OP_rewrite_old_nodes: ret = bch2_scan_old_btree_nodes(c, stats); - bch2_move_stats_exit(stats, c); break; default: ret = -EINVAL; } + bch2_move_stats_exit(stats, c); return ret; } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 0906aa2d1de2..531965674a31 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -75,6 +75,8 @@ do { \ typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, struct bch_io_opts *, struct data_update_opts *); +extern const char * const bch2_data_ops_strs[]; + void bch2_moving_ctxt_exit(struct moving_context *); void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct bch_ratelimit *, struct bch_move_stats *, @@ -149,7 +151,7 @@ int bch2_data_job(struct bch_fs *, void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); -void bch2_move_stats_init(struct bch_move_stats *, char *); +void bch2_move_stats_init(struct bch_move_stats *, const char *); void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *); From 3c843a67595187d53f1610ff3ff44130b3247842 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 20 Nov 2023 18:52:33 -0500 Subject: [PATCH 037/226] bcachefs: Convert bch2_move_btree() to bbpos Minor cleanup. Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index b47c0d28c6d5..0772b973405e 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -819,8 +819,8 @@ typedef bool (*move_btree_pred)(struct bch_fs *, void *, struct data_update_opts *); static int bch2_move_btree(struct bch_fs *c, - enum btree_id start_btree_id, struct bpos start_pos, - enum btree_id end_btree_id, struct bpos end_pos, + struct bbpos start, + struct bbpos end, move_btree_pred pred, void *arg, struct bch_move_stats *stats) { @@ -830,7 +830,7 @@ static int bch2_move_btree(struct bch_fs *c, struct btree_trans *trans; struct btree_iter iter; struct btree *b; - enum btree_id id; + enum btree_id btree; struct data_update_opts data_opts; int ret = 0; @@ -841,15 +841,15 @@ static int bch2_move_btree(struct bch_fs *c, stats->data_type = BCH_DATA_btree; - for (id = start_btree_id; - id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); - id++) { - stats->pos = BBPOS(id, POS_MIN); + for (btree = start.btree; + btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); + btree ++) { + stats->pos = BBPOS(btree, POS_MIN); - if (!bch2_btree_id_root(c, id)->b) + if (!bch2_btree_id_root(c, btree)->b) continue; - bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, + bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0, BTREE_ITER_PREFETCH); retry: ret = 0; @@ -859,8 +859,8 @@ static int bch2_move_btree(struct bch_fs *c, if (kthread && kthread_should_stop()) break; - if ((cmp_int(id, end_btree_id) ?: - bpos_cmp(b->key.k.p, end_pos)) > 0) + if ((cmp_int(btree, end.btree) ?: + bpos_cmp(b->key.k.p, end.pos)) > 0) break; stats->pos = BBPOS(iter.btree_id, iter.pos); @@ -997,8 +997,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) int ret; ret = bch2_move_btree(c, - 0, POS_MIN, - BTREE_ID_NR, SPOS_MAX, + BBPOS_MIN, + BBPOS_MAX, rewrite_old_nodes_pred, c, stats); if (!ret) { mutex_lock(&c->sb_lock); @@ -1017,6 +1017,8 @@ int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) { + struct bbpos start = BBPOS(op.start_btree, op.start_pos); + struct bbpos end = BBPOS(op.end_btree, op.end_pos); int ret = 0; if (op.op >= BCH_DATA_OP_NR) @@ -1029,15 +1031,11 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + ret = bch2_move_btree(c, start, end, rereplicate_btree_pred, c, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, - (struct bbpos) { op.start_btree, op.start_pos }, - (struct bbpos) { op.end_btree, op.end_pos }, + ret = bch2_move_data(c, start, end, NULL, stats, writepoint_hashed((unsigned long) current), @@ -1052,15 +1050,11 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + ret = bch2_move_btree(c, start, end, migrate_btree_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, - (struct bbpos) { op.start_btree, op.start_pos }, - (struct bbpos) { op.end_btree, op.end_pos }, + ret = bch2_move_data(c, start, end, NULL, stats, writepoint_hashed((unsigned long) current), From ba11c7d67a53b88d8605d7cf14942a59ae5632c0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 20 Nov 2023 19:12:40 -0500 Subject: [PATCH 038/226] bcachefs: BCH_DATA_OP_drop_extra_replicas Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 3 ++- fs/bcachefs/move.c | 50 +++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index d40ebfe05b0b..18eb325401cf 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -177,7 +177,8 @@ struct bch_ioctl_disk_set_state { x(scrub, 0) \ x(rereplicate, 1) \ x(migrate, 2) \ - x(rewrite_old_nodes, 3) + x(rewrite_old_nodes, 3) \ + x(drop_extra_replicas, 4) enum bch_data_ops { #define x(t, n) BCH_DATA_OP_##t = n, diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 0772b973405e..d99742fbdb19 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -1013,6 +1013,41 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) return ret; } +static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + unsigned durability = bch2_bkey_durability(c, k); + unsigned replicas = bkey_is_btree_ptr(k.k) + ? c->opts.metadata_replicas + : io_opts->data_replicas; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + unsigned d = bch2_extent_ptr_durability(c, &p); + + if (d && durability - d >= replicas) { + data_opts->kill_ptrs |= BIT(i); + durability -= d; + } + + i++; + } + + return data_opts->kill_ptrs != 0; +} + +static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -1030,11 +1065,8 @@ int bch2_data_job(struct bch_fs *c, case BCH_DATA_OP_rereplicate: stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, start, end, rereplicate_btree_pred, c, stats) ?: ret; - ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, start, end, NULL, stats, @@ -1049,11 +1081,8 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, start, end, migrate_btree_pred, &op, stats) ?: ret; - ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, start, end, NULL, stats, @@ -1065,6 +1094,15 @@ int bch2_data_job(struct bch_fs *c, case BCH_DATA_OP_rewrite_old_nodes: ret = bch2_scan_old_btree_nodes(c, stats); break; + case BCH_DATA_OP_drop_extra_replicas: + ret = bch2_move_btree(c, start, end, + drop_extra_replicas_btree_pred, c, stats) ?: ret; + ret = bch2_move_data(c, start, end, NULL, stats, + writepoint_hashed((unsigned long) current), + true, + drop_extra_replicas_pred, c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; + break; default: ret = -EINVAL; } From ee841b77b3bfc3443112b1be53ca23d522b82333 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 13 Sep 2023 19:59:03 -0400 Subject: [PATCH 039/226] powerpc: Export kvm_guest static key, for bcachefs six locks bcachefs's six locks need kvm_guest, via ower_on_cpu() -> vcpu_is_preempted() -> is_kvm_guest() Signed-off-by: Kent Overstreet Cc: linuxppc-dev@lists.ozlabs.org Acked-by: Michael Ellerman (powerpc) --- arch/powerpc/kernel/firmware.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 20328f72f9f2..8987eee33dc8 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -23,6 +23,8 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) DEFINE_STATIC_KEY_FALSE(kvm_guest); +EXPORT_SYMBOL_GPL(kvm_guest); + int __init check_kvm_guest(void) { struct device_node *hyper_node; From 011173321f6fb36f47c3aaaf04218bf758b29e17 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Oct 2023 21:03:32 -0400 Subject: [PATCH 040/226] bcachefs: six locks: Simplify optimistic spinning osq lock maintainers don't want it to be used outside of kernel/locking/ - but, we can do better. Since we have lock handoff signalled via waitlist entries, there's no reason for optimistic spinning to have to look at the lock at all - aside from checking lock-owner; we can just spin looking at our waitlist entry. Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 10 ++++ fs/bcachefs/six.c | 119 ++++++++++++-------------------------------- fs/bcachefs/six.h | 7 --- 3 files changed, 43 insertions(+), 93 deletions(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index fddc7be58022..7d3ac9556c98 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -85,6 +85,16 @@ config BCACHEFS_NO_LATENCY_ACCT help This disables device latency tracking and time stats, only for performance testing +config BCACHEFS_SIX_OPTIMISTIC_SPIN + bool "Optimistic spinning for six locks" + depends on BCACHEFS_FS + depends on SMP + default y + help + Instead of immediately sleeping when attempting to take a six lock that + is held by another thread, spin for a short while, as long as the + thread owning the lock is running. + config MEAN_AND_VARIANCE_UNIT_TEST tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 97790445e67a..3a494c5d1247 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -324,101 +324,57 @@ bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, } EXPORT_SYMBOL_GPL(six_relock_ip); -#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER +#ifdef CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN -static inline bool six_can_spin_on_owner(struct six_lock *lock) +static inline bool six_owner_running(struct six_lock *lock) { - struct task_struct *owner; - bool ret; - - if (need_resched()) - return false; - + /* + * When there's no owner, we might have preempted between the owner + * acquiring the lock and setting the owner field. If we're an RT task + * that will live-lock because we won't let the owner complete. + */ rcu_read_lock(); - owner = READ_ONCE(lock->owner); - ret = !owner || owner_on_cpu(owner); + struct task_struct *owner = READ_ONCE(lock->owner); + bool ret = owner ? owner_on_cpu(owner) : !rt_task(current); rcu_read_unlock(); return ret; } -static inline bool six_spin_on_owner(struct six_lock *lock, - struct task_struct *owner, - u64 end_time) +static inline bool six_optimistic_spin(struct six_lock *lock, + struct six_lock_waiter *wait, + enum six_lock_type type) { - bool ret = true; unsigned loop = 0; - - rcu_read_lock(); - while (lock->owner == owner) { - /* - * Ensure we emit the owner->on_cpu, dereference _after_ - * checking lock->owner still matches owner. If that fails, - * owner might point to freed memory. If it still matches, - * the rcu_read_lock() ensures the memory stays valid. - */ - barrier(); - - if (!owner_on_cpu(owner) || need_resched()) { - ret = false; - break; - } - - if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { - six_set_bitmask(lock, SIX_LOCK_NOSPIN); - ret = false; - break; - } - - cpu_relax(); - } - rcu_read_unlock(); - - return ret; -} - -static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) -{ - struct task_struct *task = current; u64 end_time; if (type == SIX_LOCK_write) return false; + if (lock->wait_list.next != &wait->list) + return false; + + if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN) + return false; + preempt_disable(); - if (!six_can_spin_on_owner(lock)) - goto fail; - - if (!osq_lock(&lock->osq)) - goto fail; - end_time = sched_clock() + 10 * NSEC_PER_USEC; - while (1) { - struct task_struct *owner; - + while (!need_resched() && six_owner_running(lock)) { /* - * If there's an owner, wait for it to either - * release the lock or go to sleep. + * Ensures that writes to the waitlist entry happen after we see + * wait->lock_acquired: pairs with the smp_store_release in + * __six_lock_wakeup */ - owner = READ_ONCE(lock->owner); - if (owner && !six_spin_on_owner(lock, owner, end_time)) - break; - - if (do_six_trylock(lock, type, false)) { - osq_unlock(&lock->osq); + if (smp_load_acquire(&wait->lock_acquired)) { preempt_enable(); return true; } - /* - * When there's no owner, we might have preempted between the - * owner acquiring the lock and setting the owner field. If - * we're an RT task that will live-lock because we won't let - * the owner complete. - */ - if (!owner && (need_resched() || rt_task(task))) + if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { + six_set_bitmask(lock, SIX_LOCK_NOSPIN); break; + } /* * The cpu_relax() call is a compiler barrier which forces @@ -429,24 +385,15 @@ static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type cpu_relax(); } - osq_unlock(&lock->osq); -fail: preempt_enable(); - - /* - * If we fell out of the spin path because of need_resched(), - * reschedule now, before we try-lock again. This avoids getting - * scheduled out right after we obtained the lock. - */ - if (need_resched()) - schedule(); - return false; } -#else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */ +#else /* CONFIG_LOCK_SPIN_ON_OWNER */ -static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) +static inline bool six_optimistic_spin(struct six_lock *lock, + struct six_lock_waiter *wait, + enum six_lock_type type) { return false; } @@ -470,9 +417,6 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, trace_contention_begin(lock, 0); lock_contended(&lock->dep_map, ip); - if (six_optimistic_spin(lock, type)) - goto out; - wait->task = current; wait->lock_want = type; wait->lock_acquired = false; @@ -510,6 +454,9 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, ret = 0; } + if (six_optimistic_spin(lock, wait, type)) + goto out; + while (1) { set_current_state(TASK_UNINTERRUPTIBLE); diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h index 4c268b0b8316..a7104ac1d35c 100644 --- a/fs/bcachefs/six.h +++ b/fs/bcachefs/six.h @@ -127,10 +127,6 @@ #include #include -#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER -#include -#endif - enum six_lock_type { SIX_LOCK_read, SIX_LOCK_intent, @@ -143,9 +139,6 @@ struct six_lock { unsigned intent_lock_recurse; struct task_struct *owner; unsigned __percpu *readers; -#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER - struct optimistic_spin_queue osq; -#endif raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC From 3b05b8e08292eafec277e8780d79b2a2d8584af2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Nov 2023 17:17:38 -0500 Subject: [PATCH 041/226] bcachefs: Simplify check_bucket_ref() We only need the sector count being modified. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 0908da5e9886..ee3a2f5271c3 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -657,14 +657,11 @@ static int check_bucket_ref(struct btree_trans *trans, const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 b_gen, u8 bucket_data_type, - u32 dirty_sectors, u32 cached_sectors) + u32 bucket_sectors) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); - u32 bucket_sectors = !ptr->cached - ? dirty_sectors - : cached_sectors; struct printbuf buf = PRINTBUF; int ret = 0; @@ -799,7 +796,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, ret = check_bucket_ref(trans, k, ptr, sectors, data_type, g->gen, g->data_type, - g->dirty_sectors, g->cached_sectors); + g->dirty_sectors); if (ret) goto err; @@ -829,8 +826,7 @@ static int __mark_pointer(struct btree_trans *trans, ? dirty_sectors : cached_sectors; int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, - bucket_gen, *bucket_data_type, - *dirty_sectors, *cached_sectors); + bucket_gen, *bucket_data_type, *dst_sectors); if (ret) return ret; @@ -1563,7 +1559,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, a->v.gen, a->v.data_type, - a->v.dirty_sectors, a->v.cached_sectors); + a->v.dirty_sectors); if (ret) goto err; From e6674decb2195999ae9fe074bf048ba91e336144 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Nov 2023 19:26:27 -0500 Subject: [PATCH 042/226] bcachefs: BCH_IOCTL_DEV_USAGE_V2 BCH_IOCTL_DEV_USAGE mistakenly put the per-data-type array in struct bch_ioctl_dev_usage; since ioctl numbers encode the size of the arg, that means adding new data types breaks the ioctl. This adds a new version that includes the number of data types as a parameter: the old version is fixed at 10 so as to not break when adding new types. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 17 +++++++- fs/bcachefs/chardev.c | 84 ++++++++++++++++++++++++++++-------- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 18eb325401cf..44ba7a87aea7 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -81,6 +81,8 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume) #define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume) +#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) + /* ioctl below act on a particular file, not the filesystem as a whole: */ #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) @@ -298,7 +300,20 @@ struct bch_ioctl_dev_usage { __u64 buckets; __u64 sectors; __u64 fragmented; - } d[BCH_DATA_NR]; + } d[10]; +}; + +struct bch_ioctl_dev_usage_v2 { + __u64 dev; + __u32 flags; + __u8 state; + __u8 nr_data_types; + __u8 pad[6]; + + __u32 bucket_size; + __u64 nr_buckets; + + struct bch_ioctl_dev_usage_type d[0]; }; /* diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index de3d82de9d29..118f0c0c4e30 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -23,6 +23,12 @@ #include #include +__must_check +static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n) +{ + return copy_to_user(to, from, n) ? -EFAULT : 0; +} + /* returns with ref on ca->ref */ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, unsigned flags) @@ -149,10 +155,8 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg) static long bch2_ioctl_query_uuid(struct bch_fs *c, struct bch_ioctl_query_uuid __user *user_arg) { - if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid, - sizeof(c->sb.user_uuid))) - return -EFAULT; - return 0; + return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid, + sizeof(c->sb.user_uuid)); } #if 0 @@ -341,10 +345,7 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, if (len < sizeof(e)) return -EINVAL; - if (copy_to_user(buf, &e, sizeof(e))) - return -EFAULT; - - return sizeof(e); + return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e); } static const struct file_operations bcachefs_data_ops = { @@ -474,14 +475,15 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, if (ret) goto err; - if (copy_to_user(user_arg, arg, - sizeof(*arg) + arg->replica_entries_bytes)) - ret = -EFAULT; + + ret = copy_to_user_errcode(user_arg, arg, + sizeof(*arg) + arg->replica_entries_bytes); err: kfree(arg); return ret; } +/* obsolete, didn't allow for new data types: */ static long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { @@ -511,7 +513,6 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - arg.buckets_ec = src.buckets_ec; for (i = 0; i < BCH_DATA_NR; i++) { arg.d[i].buckets = src.d[i].buckets; @@ -521,10 +522,58 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, percpu_ref_put(&ca->ref); - if (copy_to_user(user_arg, &arg, sizeof(arg))) + return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); +} + +static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, + struct bch_ioctl_dev_usage_v2 __user *user_arg) +{ + struct bch_ioctl_dev_usage_v2 arg; + struct bch_dev_usage src; + struct bch_dev *ca; + int ret = 0; + + if (!test_bit(BCH_FS_STARTED, &c->flags)) + return -EINVAL; + + if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; - return 0; + if ((arg.flags & ~BCH_BY_INDEX) || + arg.pad[0] || + arg.pad[1] || + arg.pad[2]) + return -EINVAL; + + ca = bch2_device_lookup(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + src = bch2_dev_usage_read(ca); + + arg.state = ca->mi.state; + arg.bucket_size = ca->mi.bucket_size; + arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR); + arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; + + ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); + if (ret) + goto err; + + for (unsigned i = 0; i < arg.nr_data_types; i++) { + struct bch_ioctl_dev_usage_type t = { + .buckets = src.d[i].buckets, + .sectors = src.d[i].sectors, + .fragmented = src.d[i].fragmented, + }; + + ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t)); + if (ret) + goto err; + } +err: + percpu_ref_put(&ca->ref); + return ret; } static long bch2_ioctl_read_super(struct bch_fs *c, @@ -561,9 +610,8 @@ static long bch2_ioctl_read_super(struct bch_fs *c, goto err; } - if (copy_to_user((void __user *)(unsigned long)arg.sb, sb, - vstruct_bytes(sb))) - ret = -EFAULT; + ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb, + vstruct_bytes(sb)); err: if (!IS_ERR_OR_NULL(ca)) percpu_ref_put(&ca->ref); @@ -663,6 +711,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) return bch2_ioctl_fs_usage(c, arg); case BCH_IOCTL_DEV_USAGE: return bch2_ioctl_dev_usage(c, arg); + case BCH_IOCTL_DEV_USAGE_V2: + return bch2_ioctl_dev_usage_v2(c, arg); #if 0 case BCH_IOCTL_START: BCH_IOCTL(start, struct bch_ioctl_start); From dafff7e57508e87b473d91bac3e38181e2b829f2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Nov 2023 18:05:18 -0500 Subject: [PATCH 043/226] bcachefs: New bucket sector count helpers This introduces bch2_bucket_sectors() and bch2_bucket_sectors_dirty(), prep work for separately accounting stripe sectors. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 22 ++++++-------- fs/bcachefs/alloc_background.h | 23 +++++++++++++-- fs/bcachefs/buckets.c | 52 ++++++++++++++-------------------- fs/bcachefs/move.c | 2 +- fs/bcachefs/movinggc.c | 2 +- 5 files changed, 53 insertions(+), 48 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index dcfe26fdb500..3b1ddb8397b0 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -261,10 +261,8 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: - bkey_fsck_err_on(a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe, c, err, - alloc_key_empty_but_have_data, + bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe, + c, err, alloc_key_empty_but_have_data, "empty data type free but have data"); break; case BCH_DATA_sb: @@ -272,22 +270,21 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - bkey_fsck_err_on(!a.v->dirty_sectors, c, err, - alloc_key_dirty_sectors_0, + bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v), + c, err, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_types[a.v->data_type]); break; case BCH_DATA_cached: bkey_fsck_err_on(!a.v->cached_sectors || - a.v->dirty_sectors || - a.v->stripe, c, err, - alloc_key_cached_inconsistency, + bch2_bucket_sectors_dirty(*a.v) || + a.v->stripe, + c, err, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, - alloc_key_cached_but_read_time_zero, + c, err, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -790,8 +787,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, new_a->data_type = alloc_data_type(*new_a, new_a->data_type); - if (new_a->dirty_sectors > old_a->dirty_sectors || - new_a->cached_sectors > old_a->cached_sectors) { + if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 73faf99a222a..96671f166dd8 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type) return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type; } +static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a) +{ + return a.dirty_sectors + a.cached_sectors; +} + +static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a) +{ + return a.dirty_sectors; +} + +static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca, + struct bch_alloc_v4 a) +{ + int d = bch2_bucket_sectors_dirty(a); + + return d ? max(0, ca->mi.bucket_size - d) : 0; +} + static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; @@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, struct bch_dev *ca) { if (!data_type_movable(a.data_type) || - a.dirty_sectors >= ca->mi.bucket_size) + !bch2_bucket_sectors_fragmented(ca, a)) return 0; - return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); + u64 d = bch2_bucket_sectors_dirty(a); + return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ee3a2f5271c3..bb2ded894b7e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -277,14 +277,6 @@ void bch2_dev_usage_init(struct bch_dev *ca) ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; } -static inline int bucket_sectors_fragmented(struct bch_dev *ca, - struct bch_alloc_v4 a) -{ - return a.dirty_sectors - ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors) - : 0; -} - static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 old, struct bch_alloc_v4 new, @@ -306,41 +298,40 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, u->d[old.data_type].buckets--; u->d[new.data_type].buckets++; - u->buckets_ec -= (int) !!old.stripe; - u->buckets_ec += (int) !!new.stripe; + u->buckets_ec -= !!old.stripe; + u->buckets_ec += !!new.stripe; - u->d[old.data_type].sectors -= old.dirty_sectors; - u->d[new.data_type].sectors += new.dirty_sectors; + u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old); + u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new); u->d[BCH_DATA_cached].sectors += new.cached_sectors; u->d[BCH_DATA_cached].sectors -= old.cached_sectors; - u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old); - u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); + u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old); + u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new); preempt_enable(); } +static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b) +{ + return (struct bch_alloc_v4) { + .gen = b.gen, + .data_type = b.data_type, + .dirty_sectors = b.dirty_sectors, + .cached_sectors = b.cached_sectors, + .stripe = b.stripe, + }; +} + static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, struct bucket old, struct bucket new, u64 journal_seq, bool gc) { - struct bch_alloc_v4 old_a = { - .gen = old.gen, - .data_type = old.data_type, - .dirty_sectors = old.dirty_sectors, - .cached_sectors = old.cached_sectors, - .stripe = old.stripe, - }; - struct bch_alloc_v4 new_a = { - .gen = new.gen, - .data_type = new.data_type, - .dirty_sectors = new.dirty_sectors, - .cached_sectors = new.cached_sectors, - .stripe = new.stripe, - }; - - bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); + bch2_dev_usage_update(c, ca, + bucket_m_to_alloc(old), + bucket_m_to_alloc(new), + journal_seq, gc); } static inline int __update_replicas(struct bch_fs *c, @@ -640,7 +631,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, goto err; } - g->data_type = data_type; g->dirty_sectors += sectors; new = *g; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index d99742fbdb19..615f18a0311a 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -677,7 +677,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, } a = bch2_alloc_to_v4(k, &a_convert); - dirty_sectors = a->dirty_sectors; + dirty_sectors = bch2_bucket_sectors_dirty(*a); bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; fragmentation = a->fragmentation_lru; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index a84e79f79e5e..b8d0542222c3 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -91,7 +91,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; - b->sectors = a->dirty_sectors; + b->sectors = bch2_bucket_sectors_dirty(*a); ret = data_type_movable(a->data_type) && a->fragmentation_lru && From ed0cd515cd8adb052881b371dd3f80cca5995174 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Nov 2023 18:25:31 -0500 Subject: [PATCH 044/226] bcachefs: bch2_dev_usage_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 30 ++++++++++++++++++++++++++++++ fs/bcachefs/buckets.h | 1 + fs/bcachefs/sysfs.c | 27 +-------------------------- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index bb2ded894b7e..36ed3c53e81c 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -277,6 +277,36 @@ void bch2_dev_usage_init(struct bch_dev *ca) ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; } +void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +{ + prt_tab(out); + prt_str(out, "buckets"); + prt_tab_rjust(out); + prt_str(out, "sectors"); + prt_tab_rjust(out); + prt_str(out, "fragmented"); + prt_tab_rjust(out); + prt_newline(out); + + for (unsigned i = 0; i < BCH_DATA_NR; i++) { + prt_str(out, bch2_data_types[i]); + prt_tab(out); + prt_u64(out, usage->d[i].buckets); + prt_tab_rjust(out); + prt_u64(out, usage->d[i].sectors); + prt_tab_rjust(out); + prt_u64(out, usage->d[i].fragmented); + prt_tab_rjust(out); + prt_newline(out); + } + + prt_str(out, "ec"); + prt_tab(out); + prt_u64(out, usage->buckets_ec); + prt_tab_rjust(out); + prt_newline(out); +} + static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 old, struct bch_alloc_v4 new, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 5574b62e0553..bc088673009b 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -203,6 +203,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) } void bch2_dev_usage_init(struct bch_dev *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index f3cb7115b530..0aa097892938 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -786,32 +786,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - prt_tab(out); - prt_str(out, "buckets"); - prt_tab_rjust(out); - prt_str(out, "sectors"); - prt_tab_rjust(out); - prt_str(out, "fragmented"); - prt_tab_rjust(out); - prt_newline(out); - - for (i = 0; i < BCH_DATA_NR; i++) { - prt_str(out, bch2_data_types[i]); - prt_tab(out); - prt_u64(out, stats.d[i].buckets); - prt_tab_rjust(out); - prt_u64(out, stats.d[i].sectors); - prt_tab_rjust(out); - prt_u64(out, stats.d[i].fragmented); - prt_tab_rjust(out); - prt_newline(out); - } - - prt_str(out, "ec"); - prt_tab(out); - prt_u64(out, stats.buckets_ec); - prt_tab_rjust(out); - prt_newline(out); + bch2_dev_usage_to_text(out, &stats); prt_newline(out); From 9b34f02cdcc1000b0147d5751349a3ac1b697590 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 23 Nov 2023 18:43:23 -0500 Subject: [PATCH 045/226] bcachefs: Kill dev_usage->buckets_ec This counter is redundant; it's simply the sum of BCH_DATA_stripe and BCH_DATA_parity buckets. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 4 ++-- fs/bcachefs/btree_gc.c | 3 --- fs/bcachefs/buckets.c | 9 --------- fs/bcachefs/buckets_types.h | 2 -- fs/bcachefs/journal_io.c | 2 -- fs/bcachefs/recovery.c | 2 -- fs/bcachefs/sb-clean.c | 1 - 7 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index d2eb1bd0c8ee..41fa8b9b69f4 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -2231,8 +2231,8 @@ struct jset_entry_dev_usage { __le32 dev; __u32 pad; - __le64 buckets_ec; - __le64 _buckets_unavailable; /* No longer used */ + __le64 _buckets_ec; /* No longer used */ + __le64 _buckets_unavailable; /* No longer used */ struct jset_entry_dev_usage_type d[]; }; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2304a6ab0d0c..0db18d0d47e8 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1251,9 +1251,6 @@ static int bch2_gc_done(struct bch_fs *c, copy_dev_field(dev_usage_fragmented_wrong, d[i].fragmented, "%s fragmented", bch2_data_types[i]); } - - copy_dev_field(dev_usage_buckets_ec_wrong, - buckets_ec, "buckets_ec"); } { diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 36ed3c53e81c..4a8c4f0f2bb3 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -299,12 +299,6 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) prt_tab_rjust(out); prt_newline(out); } - - prt_str(out, "ec"); - prt_tab(out); - prt_u64(out, usage->buckets_ec); - prt_tab_rjust(out); - prt_newline(out); } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, @@ -328,9 +322,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, u->d[old.data_type].buckets--; u->d[new.data_type].buckets++; - u->buckets_ec -= !!old.stripe; - u->buckets_ec += !!new.stripe; - u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old); u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new); diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 2a9dab9006ef..783f71017204 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -33,8 +33,6 @@ struct bucket_gens { }; struct bch_dev_usage { - u64 buckets_ec; - struct { u64 buckets; u64 sectors; /* _compressed_ sectors: */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index c46b3bf09fd5..f2e038116b0f 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -687,8 +687,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].sectors), le64_to_cpu(u->d[i].fragmented)); } - - prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec)); } static int journal_entry_log_validate(struct bch_fs *c, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a588092241ca..d49c0af42623 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -305,8 +305,6 @@ static int journal_replay_entry_early(struct bch_fs *c, struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev)); unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); - ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec); - for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors); diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index bc1d9808808d..c100f2ec2b96 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -256,7 +256,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.type = BCH_JSET_ENTRY_dev_usage; u->dev = cpu_to_le32(dev); - u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec); for (i = 0; i < BCH_DATA_NR; i++) { u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); From 7541787f58d19b4fab4368681d4ef4ed8f0dbd61 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 24 Nov 2023 23:40:08 -0500 Subject: [PATCH 046/226] bcachefs: Improve sysfs compression_stats Break it out by compression type, and include average extent size. Also, format into a nice table. Signed-off-by: Kent Overstreet --- fs/bcachefs/sysfs.c | 114 +++++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 0aa097892938..67739a116c6c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -258,15 +258,16 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c struct btree_iter iter; struct bkey_s_c k; enum btree_id id; - u64 nr_uncompressed_extents = 0, - nr_compressed_extents = 0, - nr_incompressible_extents = 0, - uncompressed_sectors = 0, - incompressible_sectors = 0, - compressed_sectors_compressed = 0, - compressed_sectors_uncompressed = 0; + struct compression_type_stats { + u64 nr_extents; + u64 sectors_compressed; + u64 sectors_uncompressed; + } s[BCH_COMPRESSION_TYPE_NR]; + u64 compressed_incompressible = 0; int ret = 0; + memset(s, 0, sizeof(s)); + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; @@ -279,36 +280,30 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c ret = for_each_btree_key2(trans, iter, id, POS_MIN, BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - bool compressed = false, uncompressed = false, incompressible = false; + bool compressed = false, incompressible = false; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - switch (p.crc.compression_type) { - case BCH_COMPRESSION_TYPE_none: - uncompressed = true; - uncompressed_sectors += k.k->size; - break; - case BCH_COMPRESSION_TYPE_incompressible: - incompressible = true; - incompressible_sectors += k.k->size; - break; - default: - compressed_sectors_compressed += - p.crc.compressed_size; - compressed_sectors_uncompressed += - p.crc.uncompressed_size; - compressed = true; - break; + bkey_for_each_crc(k.k, ptrs, crc, entry) { + incompressible |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible; + compressed |= crc_is_compressed(crc); + + if (crc_is_compressed(crc)) { + s[crc.compression_type].nr_extents++; + s[crc.compression_type].sectors_compressed += crc.compressed_size; + s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size; } } - if (incompressible) - nr_incompressible_extents++; - else if (uncompressed) - nr_uncompressed_extents++; - else if (compressed) - nr_compressed_extents++; + compressed_incompressible += compressed && incompressible; + + if (!compressed) { + unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0; + + s[t].nr_extents++; + s[t].sectors_compressed += k.k->size; + s[t].sectors_uncompressed += k.k->size; + } 0; })); } @@ -318,26 +313,45 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c if (ret) return ret; - prt_printf(out, "uncompressed:\n"); - prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents); - prt_printf(out, " size: "); - prt_human_readable_u64(out, uncompressed_sectors << 9); - prt_printf(out, "\n"); + prt_str(out, "type"); + printbuf_tabstop_push(out, 12); + prt_tab(out); - prt_printf(out, "compressed:\n"); - prt_printf(out, " nr extents: %llu\n", nr_compressed_extents); - prt_printf(out, " compressed size: "); - prt_human_readable_u64(out, compressed_sectors_compressed << 9); - prt_printf(out, "\n"); - prt_printf(out, " uncompressed size: "); - prt_human_readable_u64(out, compressed_sectors_uncompressed << 9); - prt_printf(out, "\n"); + prt_str(out, "compressed"); + printbuf_tabstop_push(out, 16); + prt_tab_rjust(out); + + prt_str(out, "uncompressed"); + printbuf_tabstop_push(out, 16); + prt_tab_rjust(out); + + prt_str(out, "average extent size"); + printbuf_tabstop_push(out, 24); + prt_tab_rjust(out); + prt_newline(out); + + for (unsigned i = 0; i < ARRAY_SIZE(s); i++) { + prt_str(out, bch2_compression_types[i]); + prt_tab(out); + + prt_human_readable_u64(out, s[i].sectors_compressed << 9); + prt_tab_rjust(out); + + prt_human_readable_u64(out, s[i].sectors_uncompressed << 9); + prt_tab_rjust(out); + + prt_human_readable_u64(out, s[i].nr_extents + ? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents) + : 0); + prt_tab_rjust(out); + prt_newline(out); + } + + if (compressed_incompressible) { + prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible); + prt_newline(out); + } - prt_printf(out, "incompressible:\n"); - prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents); - prt_printf(out, " size: "); - prt_human_readable_u64(out, incompressible_sectors << 9); - prt_printf(out, "\n"); return 0; } From d05db12715c989c36e81737a737418af9e6232d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 25 Nov 2023 00:05:30 -0500 Subject: [PATCH 047/226] bcachefs: Print durability in member_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index bed0f857fe5b..259af07f4624 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -259,6 +259,11 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "(none)"); prt_newline(out); + prt_str(out, "Durability:"); + prt_tab(out); + prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m)); + prt_newline(out); + prt_printf(out, "Discard:"); prt_tab(out); prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m)); From 25d1e39df0e28494f1e82c68a3967c84dca009b3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 24 Nov 2023 21:52:17 -0500 Subject: [PATCH 048/226] bcachefs: Add a rebalance, data_update tracepoints Add a tracepoint for rebalance, printing out - the target option - the compression option - the key being rebalanced Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 14 ++++++++++++++ fs/bcachefs/rebalance.c | 15 +++++++++++++++ fs/bcachefs/trace.h | 34 +++++++++++++++++++++++----------- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 8ed3a9bde029..488279b3d08e 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -267,6 +267,20 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, goto out; } + if (trace_data_update_enabled()) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "\nold: "); + bch2_bkey_val_to_text(&buf, c, old); + prt_str(&buf, "\nk: "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\nnew: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + + trace_data_update(c, buf.buf); + printbuf_exit(&buf); + } + ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index db2139c0545d..1846f38cea38 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -171,6 +171,21 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, return bkey_s_c_null; } + if (trace_rebalance_extent_enabled()) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "target="); + bch2_target_to_text(&buf, c, r->target); + prt_str(&buf, " compression="); + struct bch_compression_opt opt = __bch2_compression_decode(r->compression); + prt_str(&buf, bch2_compression_opts[opt.type]); + prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, k); + + trace_rebalance_extent(c, buf.buf); + printbuf_exit(&buf); + } + return k; } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 52db4df67c3a..f2e405c359b5 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -32,19 +32,21 @@ DECLARE_EVENT_CLASS(bpos, TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) ); -DECLARE_EVENT_CLASS(bkey, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k), +DECLARE_EVENT_CLASS(str, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str), TP_STRUCT__entry( - __string(k, k ) + __field(dev_t, dev ) + __string(str, str ) ), TP_fast_assign( - __assign_str(k, k); + __entry->dev = c->dev; + __assign_str(str, str); ), - TP_printk("%s", __get_str(k)) + TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) ); DECLARE_EVENT_CLASS(btree_node, @@ -736,22 +738,22 @@ TRACE_EVENT(bucket_evacuate, __entry->dev_idx, __entry->bucket) ); -DEFINE_EVENT(bkey, move_extent, +DEFINE_EVENT(str, move_extent, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(bkey, move_extent_read, +DEFINE_EVENT(str, move_extent_read, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(bkey, move_extent_write, +DEFINE_EVENT(str, move_extent_write, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(bkey, move_extent_finish, +DEFINE_EVENT(str, move_extent_finish, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); @@ -773,7 +775,7 @@ TRACE_EVENT(move_extent_fail, TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) ); -DEFINE_EVENT(bkey, move_extent_start_fail, +DEFINE_EVENT(str, move_extent_start_fail, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); @@ -1349,6 +1351,16 @@ TRACE_EVENT(write_buffer_flush_slowpath, TP_printk("%zu/%zu", __entry->nr, __entry->size) ); +DEFINE_EVENT(str, rebalance_extent, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(str, data_update, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + #endif /* _TRACE_BCACHEFS_H */ /* This part must be outside protection */ From 3f59547e22b9dc3eecca3407528df8b950f4e1c2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 25 Nov 2023 15:46:02 -0500 Subject: [PATCH 049/226] bcachefs: Refactor bch2_check_alloc_to_lru_ref() This code was somewhat convoluted - because originally bch2_lru_set() could modify the LRU index if there was a collision. That's no longer the case, so the "create LRU entry" path has no reason to update the alloc key, so we can separate the handling of the two fsck errors. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 54 ++++++++++++++++------------------ 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3b1ddb8397b0..fc8555df89c8 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1505,6 +1505,27 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (a->data_type != BCH_DATA_cached) return 0; + if (fsck_err_on(!a->io_time[READ], c, + alloc_key_cached_but_read_time_zero, + "cached bucket with read_time 0\n" + " %s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + struct bkey_i_alloc_v4 *a_mut = + bch2_alloc_to_v4_mut(trans, alloc_k); + ret = PTR_ERR_OR_ZERO(a_mut); + if (ret) + goto err; + + a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); + ret = bch2_trans_update(trans, alloc_iter, + &a_mut->k_i, BTREE_TRIGGER_NORUN); + if (ret) + goto err; + + a = &a_mut->v; + } + lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, lru_pos(alloc_k.k->p.inode, bucket_to_u64(alloc_k.k->p), @@ -1513,41 +1534,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; - if (fsck_err_on(!a->io_time[READ], c, - alloc_key_cached_but_read_time_zero, - "cached bucket with read_time 0\n" - " %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || - fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, + if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, alloc_key_to_missing_lru_entry, "missing lru entry\n" " %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - u64 read_time = a->io_time[READ] ?: - atomic64_read(&c->io_clock[READ].now); - ret = bch2_lru_set(trans, alloc_k.k->p.inode, bucket_to_u64(alloc_k.k->p), - read_time); + a->io_time[READ]); if (ret) goto err; - - if (a->io_time[READ] != read_time) { - struct bkey_i_alloc_v4 *a_mut = - bch2_alloc_to_v4_mut(trans, alloc_k); - ret = PTR_ERR_OR_ZERO(a_mut); - if (ret) - goto err; - - a_mut->v.io_time[READ] = read_time; - ret = bch2_trans_update(trans, alloc_iter, - &a_mut->k_i, BTREE_TRIGGER_NORUN); - if (ret) - goto err; - } } err: fsck_err: @@ -1560,15 +1558,13 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; - int ret = 0; - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, bch2_check_alloc_to_lru_ref(trans, &iter))); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } From 9e243d3cdac173111def205674813af973ead393 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 25 Nov 2023 22:39:21 -0500 Subject: [PATCH 050/226] bcachefs: Kill journal_seq/gc args to bch2_dev_usage_update_m() This is only used by gc (fsck). Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4a8c4f0f2bb3..312bd0c86623 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -346,13 +346,12 @@ static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b) } static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, - struct bucket old, struct bucket new, - u64 journal_seq, bool gc) + struct bucket old, struct bucket new) { bch2_dev_usage_update(c, ca, bucket_m_to_alloc(old), bucket_m_to_alloc(new), - journal_seq, gc); + 0, true); } static inline int __update_replicas(struct bch_fs *c, @@ -658,7 +657,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, err: bucket_unlock(g); if (!ret) - bch2_dev_usage_update_m(c, ca, old, new, 0, true); + bch2_dev_usage_update_m(c, ca, old, new); percpu_up_read(&c->mark_lock); return ret; } @@ -773,7 +772,6 @@ static int mark_stripe_bucket(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - u64 journal_seq = trans->journal_res.seq; const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; unsigned nr_data = s->nr_blocks - s->nr_redundant; bool parity = ptr_idx >= nr_data; @@ -820,7 +818,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, err: bucket_unlock(g); if (!ret) - bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); + bch2_dev_usage_update_m(c, ca, old, new); percpu_up_read(&c->mark_lock); printbuf_exit(&buf); return ret; @@ -859,7 +857,6 @@ static int bch2_mark_pointer(struct btree_trans *trans, s64 sectors, unsigned flags) { - u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bucket old, new, *g; @@ -886,7 +883,7 @@ static int bch2_mark_pointer(struct btree_trans *trans, new = *g; bucket_unlock(g); if (!ret) - bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); + bch2_dev_usage_update_m(c, ca, old, new); percpu_up_read(&c->mark_lock); return ret; From 3c471b65889aa22d6ba4f8e3d2e5006bb70fdb58 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 Nov 2023 17:05:02 -0500 Subject: [PATCH 051/226] bcachefs: convert bch_fs_flags to x-macro Now we can print out filesystem flags in sysfs, useful for debugging various "what's my filesystem doing" issues. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 58 ++++++++++++++------------ fs/bcachefs/btree_gc.c | 14 +++---- fs/bcachefs/btree_iter.c | 4 +- fs/bcachefs/btree_journal_iter.c | 2 +- fs/bcachefs/btree_key_cache.c | 4 +- fs/bcachefs/btree_trans_commit.c | 8 ++-- fs/bcachefs/btree_update_interior.c | 4 +- fs/bcachefs/chardev.c | 8 ++-- fs/bcachefs/ec.c | 2 +- fs/bcachefs/error.c | 18 ++++----- fs/bcachefs/fs-io-buffered.c | 2 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 2 +- fs/bcachefs/inode.c | 2 +- fs/bcachefs/journal_seq_blacklist.c | 2 +- fs/bcachefs/recovery.c | 34 ++++++++-------- fs/bcachefs/snapshot.c | 8 ++-- fs/bcachefs/super-io.c | 4 +- fs/bcachefs/super.c | 63 ++++++++++++++++------------- fs/bcachefs/super.h | 6 ++- fs/bcachefs/sysfs.c | 11 +++-- 21 files changed, 139 insertions(+), 119 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 19bc88f05911..991de12974db 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -566,32 +566,38 @@ struct bch_dev { struct io_count __percpu *io_done; }; -enum { - /* startup: */ - BCH_FS_STARTED, - BCH_FS_MAY_GO_RW, - BCH_FS_RW, - BCH_FS_WAS_RW, +/* + * fsck_done - kill? + * + * replace with something more general from enumated fsck passes/errors: + * initial_gc_unfixed + * error + * topology error + */ - /* shutdown: */ - BCH_FS_STOPPING, - BCH_FS_EMERGENCY_RO, - BCH_FS_GOING_RO, - BCH_FS_WRITE_DISABLE_COMPLETE, - BCH_FS_CLEAN_SHUTDOWN, +#define BCH_FS_FLAGS() \ + x(started) \ + x(may_go_rw) \ + x(rw) \ + x(was_rw) \ + x(stopping) \ + x(emergency_ro) \ + x(going_ro) \ + x(write_disable_complete) \ + x(clean_shutdown) \ + x(fsck_done) \ + x(initial_gc_unfixed) \ + x(need_another_gc) \ + x(need_delete_dead_snapshots) \ + x(error) \ + x(topology_error) \ + x(errors_fixed) \ + x(errors_not_fixed) - /* fsck passes: */ - BCH_FS_FSCK_DONE, - BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ - BCH_FS_NEED_ANOTHER_GC, - - BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, - - /* errors: */ - BCH_FS_ERROR, - BCH_FS_TOPOLOGY_ERROR, - BCH_FS_ERRORS_FIXED, - BCH_FS_ERRORS_NOT_FIXED, +enum bch_fs_flags { +#define x(n) BCH_FS_##n, + BCH_FS_FLAGS() +#undef x }; struct btree_debug { @@ -1070,7 +1076,7 @@ static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref) static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref) { #ifdef BCH_WRITE_REF_DEBUG - return !test_bit(BCH_FS_GOING_RO, &c->flags) && + return !test_bit(BCH_FS_going_ro, &c->flags) && atomic_long_inc_not_zero(&c->writes[ref]); #else return percpu_ref_tryget_live(&c->writes); @@ -1089,7 +1095,7 @@ static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref) if (atomic_long_read(&c->writes[i])) return; - set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); + set_bit(BCH_FS_write_disable_complete, &c->flags); wake_up(&bch2_read_only_wait); #else percpu_ref_put(&c->writes); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0db18d0d47e8..5a7b72a461df 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -108,7 +108,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); goto err; } else { - set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); + set_bit(BCH_FS_initial_gc_unfixed, &c->flags); } } } @@ -134,7 +134,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); goto err; } else { - set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); + set_bit(BCH_FS_initial_gc_unfixed, &c->flags); } } @@ -619,7 +619,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id g->data_type = 0; g->dirty_sectors = 0; g->cached_sectors = 0; - set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + set_bit(BCH_FS_need_another_gc, &c->flags); } else { do_update = true; } @@ -664,7 +664,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (data_type == BCH_DATA_btree) { g->data_type = data_type; - set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + set_bit(BCH_FS_need_another_gc, &c->flags); } else { do_update = true; } @@ -996,7 +996,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b /* Continue marking when opted to not * fix the error: */ ret = 0; - set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); + set_bit(BCH_FS_initial_gc_unfixed, &c->flags); continue; } } else if (ret) { @@ -1845,7 +1845,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) #endif c->gc_count++; - if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) || + if (test_bit(BCH_FS_need_another_gc, &c->flags) || (!iter && bch2_test_restart_gc)) { if (iter++ > 2) { bch_info(c, "Unable to fix bucket gens, looping"); @@ -1857,7 +1857,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) * XXX: make sure gens we fixed got saved */ bch_info(c, "Second GC pass needed, restarting:"); - clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + clear_bit(BCH_FS_need_another_gc, &c->flags); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); bch2_gc_stripes_reset(c, metadata_only); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 89592a4f3563..ff5448fca788 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -781,7 +781,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat struct btree_node_iter node_iter = l->iter; struct bkey_packed *k; struct bkey_buf tmp; - unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) + unsigned nr = test_bit(BCH_FS_started, &c->flags) ? (path->level > 1 ? 0 : 2) : (path->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(path, path->level); @@ -816,7 +816,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p struct bch_fs *c = trans->c; struct bkey_s_c k; struct bkey_buf tmp; - unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) + unsigned nr = test_bit(BCH_FS_started, &c->flags) ? (path->level > 1 ? 0 : 2) : (path->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(path, path->level); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 7a5e0a893df9..4c084ce493a4 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -177,7 +177,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); - BUG_ON(test_bit(BCH_FS_RW, &c->flags)); + BUG_ON(test_bit(BCH_FS_rw, &c->flags)); if (idx < keys->size && journal_key_cmp(&n, &keys->d[idx]) == 0) { diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 6d98018912be..95314aaf4b58 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -778,7 +778,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, ck->valid = true; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { - EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); + EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); set_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_inc(&c->btree_key_cache.nr_dirty); @@ -1005,7 +1005,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) if (atomic_long_read(&bc->nr_dirty) && !bch2_journal_error(&c->journal) && - test_bit(BCH_FS_WAS_RW, &c->flags)) + test_bit(BCH_FS_was_rw, &c->flags)) panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n", atomic_long_read(&bc->nr_dirty)); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 4bdb847dd688..578521216b6a 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -287,7 +287,7 @@ inline void bch2_btree_insert_key_leaf(struct btree_trans *trans, bch2_btree_add_journal_pin(c, b, journal_seq); if (unlikely(!btree_node_dirty(b))) { - EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); + EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); set_btree_node_dirty_acct(c, b); } @@ -995,7 +995,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) int ret; if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) || - test_bit(BCH_FS_STARTED, &c->flags)) + test_bit(BCH_FS_started, &c->flags)) return -BCH_ERR_erofs_trans_commit; ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); @@ -1060,7 +1060,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) return ret; } - if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { + if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { ret = do_bch2_trans_commit_to_journal_replay(trans); goto out_reset; } @@ -1086,7 +1086,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out; } - EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); + EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); trans->journal_u64s = trans->extra_journal_entries.nr; trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index cafa26723b54..81e443dee714 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2080,7 +2080,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) a->seq = b->data->keys.seq; INIT_WORK(&a->work, async_btree_node_rewrite_work); - if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { + if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { mutex_lock(&c->pending_node_rewrites_lock); list_add(&a->list, &c->pending_node_rewrites); mutex_unlock(&c->pending_node_rewrites_lock); @@ -2088,7 +2088,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) } if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { - if (test_bit(BCH_FS_STARTED, &c->flags)) { + if (test_bit(BCH_FS_started, &c->flags)) { bch_err(c, "%s: error getting c->writes ref", __func__); kfree(a); return; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 118f0c0c4e30..ba0436ae6b05 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -418,7 +418,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, unsigned i; int ret = 0; - if (!test_bit(BCH_FS_STARTED, &c->flags)) + if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes)) @@ -492,7 +492,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_dev *ca; unsigned i; - if (!test_bit(BCH_FS_STARTED, &c->flags)) + if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -533,7 +533,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, struct bch_dev *ca; int ret = 0; - if (!test_bit(BCH_FS_STARTED, &c->flags)) + if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -725,7 +725,7 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx); } - if (!test_bit(BCH_FS_STARTED, &c->flags)) + if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; switch (cmd) { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index c730f0933d29..24ec634077a3 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1415,7 +1415,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, if (ret) return ERR_PTR(ret); - if (test_bit(BCH_FS_GOING_RO, &c->flags)) { + if (test_bit(BCH_FS_going_ro, &c->flags)) { h = ERR_PTR(-BCH_ERR_erofs_no_writes); goto found; } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 25cf78a7b946..aa4f7f4925f6 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -7,7 +7,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) { - set_bit(BCH_FS_ERROR, &c->flags); + set_bit(BCH_FS_error, &c->flags); switch (c->opts.errors) { case BCH_ON_ERROR_continue: @@ -26,8 +26,8 @@ bool bch2_inconsistent_error(struct bch_fs *c) void bch2_topology_error(struct bch_fs *c) { - set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags); - if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) + set_bit(BCH_FS_topology_error, &c->flags); + if (test_bit(BCH_FS_fsck_done, &c->flags)) bch2_inconsistent_error(c); } @@ -114,7 +114,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) { struct fsck_err_state *s; - if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) + if (test_bit(BCH_FS_fsck_done, &c->flags)) return NULL; list_for_each_entry(s, &c->fsck_error_msgs, list) @@ -196,7 +196,7 @@ int bch2_fsck_err(struct bch_fs *c, prt_printf(out, bch2_log_msg(c, "")); #endif - if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) { + if (test_bit(BCH_FS_fsck_done, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); @@ -256,7 +256,7 @@ int bch2_fsck_err(struct bch_fs *c, if (print) bch2_print_string_as_lines(KERN_ERR, out->buf); - if (!test_bit(BCH_FS_FSCK_DONE, &c->flags) && + if (!test_bit(BCH_FS_fsck_done, &c->flags) && (ret != -BCH_ERR_fsck_fix && ret != -BCH_ERR_fsck_ignore)) bch_err(c, "Unable to continue, halting"); @@ -274,10 +274,10 @@ int bch2_fsck_err(struct bch_fs *c, bch2_inconsistent_error(c); if (ret == -BCH_ERR_fsck_fix) { - set_bit(BCH_FS_ERRORS_FIXED, &c->flags); + set_bit(BCH_FS_errors_fixed, &c->flags); } else { - set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); - set_bit(BCH_FS_ERROR, &c->flags); + set_bit(BCH_FS_errors_not_fixed, &c->flags); + set_bit(BCH_FS_error, &c->flags); } return ret; diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 52f0e7acda3d..637a83e4d961 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -638,7 +638,7 @@ static int __bch2_writepage(struct folio *folio, /* Check for writing past i_size: */ WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > round_up(i_size, block_bytes(c)) && - !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), + !test_bit(BCH_FS_emergency_ro, &c->flags), "writing past i_size: %llu > %llu (unrounded %llu)\n", bio_end_sector(&w->io->op.wbio.bio) << 9, round_up(i_size, block_bytes(c)), diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 50976ca0a5f0..886c0f30e681 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1770,7 +1770,7 @@ static int bch2_unfreeze(struct super_block *sb) struct bch_fs *c = sb->s_fs_info; int ret; - if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) + if (test_bit(BCH_FS_emergency_ro, &c->flags)) return 0; down_write(&c->state_lock); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 654309a1a5d5..24b2e2834cfc 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -448,7 +448,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, bch2_btree_id_str(btree_id), pos.inode, pos.offset, i->id, n.id, n.equiv); - set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); } } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 4001b56c14d0..abd6cacafe93 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1182,7 +1182,7 @@ int bch2_delete_dead_inodes(struct bch_fs *c) break; if (ret) { - if (!test_bit(BCH_FS_RW, &c->flags)) { + if (!test_bit(BCH_FS_rw, &c->flags)) { bch2_trans_unlock(trans); bch2_fs_lazy_rw(c); } diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index f9d9aa95bf3a..0200e299cfbb 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -267,7 +267,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work) while (!(ret = PTR_ERR_OR_ZERO(b)) && b && - !test_bit(BCH_FS_STOPPING, &c->flags)) + !test_bit(BCH_FS_stopping, &c->flags)) b = bch2_btree_iter_next_node(&iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d49c0af42623..643369c8c512 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -533,7 +533,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); keys->gap = keys->nr; - set_bit(BCH_FS_MAY_GO_RW, &c->flags); + set_bit(BCH_FS_may_go_rw, &c->flags); if (keys->nr) return bch2_fs_read_write_early(c); return 0; @@ -961,13 +961,13 @@ int bch2_fs_recovery(struct bch_fs *c) /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && - test_bit(BCH_FS_ERRORS_FIXED, &c->flags) && - !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) && - !test_bit(BCH_FS_ERROR, &c->flags)) { + test_bit(BCH_FS_errors_fixed, &c->flags) && + !test_bit(BCH_FS_errors_not_fixed, &c->flags) && + !test_bit(BCH_FS_error, &c->flags)) { bch2_flush_fsck_errs(c); bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); - clear_bit(BCH_FS_ERRORS_FIXED, &c->flags); + clear_bit(BCH_FS_errors_fixed, &c->flags); c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; @@ -975,13 +975,13 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) || - test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { + if (test_bit(BCH_FS_errors_fixed, &c->flags) || + test_bit(BCH_FS_errors_not_fixed, &c->flags)) { bch_err(c, "Second fsck run was not clean"); - set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); + set_bit(BCH_FS_errors_not_fixed, &c->flags); } - set_bit(BCH_FS_ERRORS_FIXED, &c->flags); + set_bit(BCH_FS_errors_fixed, &c->flags); } if (enabled_qtypes(c)) { @@ -1000,13 +1000,13 @@ int bch2_fs_recovery(struct bch_fs *c) write_sb = true; } - if (!test_bit(BCH_FS_ERROR, &c->flags) && + if (!test_bit(BCH_FS_error, &c->flags) && !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) { c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); write_sb = true; } - if (!test_bit(BCH_FS_ERROR, &c->flags)) { + if (!test_bit(BCH_FS_error, &c->flags)) { struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (ext && (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || @@ -1018,8 +1018,8 @@ int bch2_fs_recovery(struct bch_fs *c) } if (c->opts.fsck && - !test_bit(BCH_FS_ERROR, &c->flags) && - !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { + !test_bit(BCH_FS_error, &c->flags) && + !test_bit(BCH_FS_errors_not_fixed, &c->flags)) { SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0); write_sb = true; @@ -1053,7 +1053,7 @@ int bch2_fs_recovery(struct bch_fs *c) ret = 0; out: - set_bit(BCH_FS_FSCK_DONE, &c->flags); + set_bit(BCH_FS_fsck_done, &c->flags); bch2_flush_fsck_errs(c); if (!c->opts.keep_journal && @@ -1061,7 +1061,7 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_journal_keys_put_initial(c); kfree(clean); - if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) { + if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) { bch2_fs_read_write_early(c); bch2_delete_dead_snapshots_async(c); } @@ -1100,8 +1100,8 @@ int bch2_fs_initialize(struct bch_fs *c) mutex_unlock(&c->sb_lock); c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); - set_bit(BCH_FS_MAY_GO_RW, &c->flags); - set_bit(BCH_FS_FSCK_DONE, &c->flags); + set_bit(BCH_FS_may_go_rw, &c->flags); + set_bit(BCH_FS_fsck_done, &c->flags); for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b23550b44098..e473c788fd64 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -318,7 +318,7 @@ int bch2_mark_snapshot(struct btree_trans *trans, __set_is_ancestor_bitmap(c, id); if (BCH_SNAPSHOT_DELETED(s.v)) { - set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) bch2_delete_dead_snapshots_async(c); } @@ -1376,10 +1376,10 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) u32 *i, id; int ret = 0; - if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) + if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) return 0; - if (!test_bit(BCH_FS_STARTED, &c->flags)) { + if (!test_bit(BCH_FS_started, &c->flags)) { ret = bch2_fs_read_write_early(c); if (ret) { bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); @@ -1680,7 +1680,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct if (BCH_SNAPSHOT_DELETED(snap.v) || bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { - set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); return 0; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 320a04a42e5b..55e560c198de 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -950,9 +950,9 @@ int bch2_write_super(struct bch_fs *c) le64_add_cpu(&c->disk_sb.sb->seq, 1); - if (test_bit(BCH_FS_ERROR, &c->flags)) + if (test_bit(BCH_FS_error, &c->flags)) SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); - if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags)) + if (test_bit(BCH_FS_topology_error, &c->flags)) SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 00db787a3883..8e566c3afcfd 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -79,6 +79,13 @@ MODULE_SOFTDEP("pre: chacha20"); MODULE_SOFTDEP("pre: poly1305"); MODULE_SOFTDEP("pre: xxhash"); +const char * const bch2_fs_flag_strs[] = { +#define x(n) #n, + BCH_FS_FLAGS() +#undef x + NULL +}; + #define KTYPE(type) \ static const struct attribute_group type ## _group = { \ .attrs = type ## _files \ @@ -246,8 +253,8 @@ static void __bch2_fs_read_only(struct bch_fs *c) journal_cur_seq(&c->journal)); if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) && - !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) - set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags); + !test_bit(BCH_FS_emergency_ro, &c->flags)) + set_bit(BCH_FS_clean_shutdown, &c->flags); bch2_fs_journal_stop(&c->journal); /* @@ -262,19 +269,19 @@ static void bch2_writes_disabled(struct percpu_ref *writes) { struct bch_fs *c = container_of(writes, struct bch_fs, writes); - set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); + set_bit(BCH_FS_write_disable_complete, &c->flags); wake_up(&bch2_read_only_wait); } #endif void bch2_fs_read_only(struct bch_fs *c) { - if (!test_bit(BCH_FS_RW, &c->flags)) { + if (!test_bit(BCH_FS_rw, &c->flags)) { bch2_journal_reclaim_stop(&c->journal); return; } - BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); + BUG_ON(test_bit(BCH_FS_write_disable_complete, &c->flags)); bch_verbose(c, "going read-only"); @@ -282,7 +289,7 @@ void bch2_fs_read_only(struct bch_fs *c) * Block new foreground-end write operations from starting - any new * writes will return -EROFS: */ - set_bit(BCH_FS_GOING_RO, &c->flags); + set_bit(BCH_FS_going_ro, &c->flags); #ifndef BCH_WRITE_REF_DEBUG percpu_ref_kill(&c->writes); #else @@ -302,30 +309,30 @@ void bch2_fs_read_only(struct bch_fs *c) * that going RO is complete: */ wait_event(bch2_read_only_wait, - test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) || - test_bit(BCH_FS_EMERGENCY_RO, &c->flags)); + test_bit(BCH_FS_write_disable_complete, &c->flags) || + test_bit(BCH_FS_emergency_ro, &c->flags)); - bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); + bool writes_disabled = test_bit(BCH_FS_write_disable_complete, &c->flags); if (writes_disabled) bch_verbose(c, "finished waiting for writes to stop"); __bch2_fs_read_only(c); wait_event(bch2_read_only_wait, - test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); + test_bit(BCH_FS_write_disable_complete, &c->flags)); if (!writes_disabled) bch_verbose(c, "finished waiting for writes to stop"); - clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); - clear_bit(BCH_FS_GOING_RO, &c->flags); - clear_bit(BCH_FS_RW, &c->flags); + clear_bit(BCH_FS_write_disable_complete, &c->flags); + clear_bit(BCH_FS_going_ro, &c->flags); + clear_bit(BCH_FS_rw, &c->flags); if (!bch2_journal_error(&c->journal) && - !test_bit(BCH_FS_ERROR, &c->flags) && - !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && - test_bit(BCH_FS_STARTED, &c->flags) && - test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) && + !test_bit(BCH_FS_error, &c->flags) && + !test_bit(BCH_FS_emergency_ro, &c->flags) && + test_bit(BCH_FS_started, &c->flags) && + test_bit(BCH_FS_clean_shutdown, &c->flags) && !c->opts.norecovery) { BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); BUG_ON(atomic_read(&c->btree_cache.dirty)); @@ -356,7 +363,7 @@ static void bch2_fs_read_only_async(struct bch_fs *c) bool bch2_fs_emergency_read_only(struct bch_fs *c) { - bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags); + bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags); bch2_journal_halt(&c->journal); bch2_fs_read_only_async(c); @@ -397,12 +404,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) unsigned i; int ret; - if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) { + if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { bch_err(c, "cannot go rw, unfixed btree errors"); return -BCH_ERR_erofs_unfixed_errors; } - if (test_bit(BCH_FS_RW, &c->flags)) + if (test_bit(BCH_FS_rw, &c->flags)) return 0; if (c->opts.norecovery) @@ -425,7 +432,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (ret) goto err; - clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags); + clear_bit(BCH_FS_clean_shutdown, &c->flags); /* * First journal write must be a flush write: after a clean shutdown we @@ -439,8 +446,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); - set_bit(BCH_FS_RW, &c->flags); - set_bit(BCH_FS_WAS_RW, &c->flags); + set_bit(BCH_FS_rw, &c->flags); + set_bit(BCH_FS_was_rw, &c->flags); #ifndef BCH_WRITE_REF_DEBUG percpu_ref_reinit(&c->writes); @@ -473,7 +480,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_do_pending_node_rewrites(c); return 0; err: - if (test_bit(BCH_FS_RW, &c->flags)) + if (test_bit(BCH_FS_rw, &c->flags)) bch2_fs_read_only(c); else __bch2_fs_read_only(c); @@ -573,7 +580,7 @@ void __bch2_fs_stop(struct bch_fs *c) bch_verbose(c, "shutting down"); - set_bit(BCH_FS_STOPPING, &c->flags); + set_bit(BCH_FS_stopping, &c->flags); cancel_work_sync(&c->journal_seq_blacklist_gc_work); @@ -966,7 +973,7 @@ int bch2_fs_start(struct bch_fs *c) down_write(&c->state_lock); - BUG_ON(test_bit(BCH_FS_STARTED, &c->flags)); + BUG_ON(test_bit(BCH_FS_started, &c->flags)); mutex_lock(&c->sb_lock); @@ -1001,12 +1008,12 @@ int bch2_fs_start(struct bch_fs *c) goto err; } - set_bit(BCH_FS_STARTED, &c->flags); + set_bit(BCH_FS_started, &c->flags); if (c->opts.read_only || c->opts.nochanges) { bch2_fs_read_only(c); } else { - ret = !test_bit(BCH_FS_RW, &c->flags) + ret = !test_bit(BCH_FS_rw, &c->flags) ? bch2_fs_read_write(c) : bch2_fs_read_write_late(c); if (ret) diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index bf762df18012..dada09331d2e 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -8,6 +8,8 @@ #include +extern const char * const bch2_fs_flag_strs[]; + struct bch_fs *bch2_dev_to_fs(dev_t); struct bch_fs *bch2_uuid_to_fs(__uuid_t); @@ -37,8 +39,8 @@ int bch2_fs_read_write_early(struct bch_fs *); */ static inline void bch2_fs_lazy_rw(struct bch_fs *c) { - if (!test_bit(BCH_FS_RW, &c->flags) && - !test_bit(BCH_FS_WAS_RW, &c->flags)) + if (!test_bit(BCH_FS_rw, &c->flags) && + !test_bit(BCH_FS_was_rw, &c->flags)) bch2_fs_read_write_early(c); } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 67739a116c6c..1b82a3a93d14 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -145,6 +145,7 @@ rw_attribute(gc_gens_pos); read_attribute(uuid); read_attribute(minor); +read_attribute(flags); read_attribute(bucket_size); read_attribute(first_bucket); read_attribute(nbuckets); @@ -268,7 +269,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c memset(s, 0, sizeof(s)); - if (!test_bit(BCH_FS_STARTED, &c->flags)) + if (!test_bit(BCH_FS_started, &c->flags)) return -EPERM; trans = bch2_trans_get(c); @@ -384,6 +385,9 @@ SHOW(bch2_fs) sysfs_print(minor, c->minor); sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b); + if (attr == &sysfs_flags) + prt_bitflags(out, bch2_fs_flag_strs, c->flags); + sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); if (attr == &sysfs_btree_write_stats) @@ -497,12 +501,12 @@ STORE(bch2_fs) /* Debugging: */ - if (!test_bit(BCH_FS_STARTED, &c->flags)) + if (!test_bit(BCH_FS_started, &c->flags)) return -EPERM; /* Debugging: */ - if (!test_bit(BCH_FS_RW, &c->flags)) + if (!test_bit(BCH_FS_rw, &c->flags)) return -EROFS; if (attr == &sysfs_prune_cache) { @@ -634,6 +638,7 @@ STORE(bch2_fs_internal) SYSFS_OPS(bch2_fs_internal); struct attribute *bch2_fs_internal_files[] = { + &sysfs_flags, &sysfs_journal_debug, &sysfs_btree_updates, &sysfs_btree_cache, From c259bd95d1df6a95d6a78b7f6ebc69c8d172126f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 Nov 2023 20:18:16 -0500 Subject: [PATCH 052/226] bcachefs: No need to allocate keys for write buffer Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 17 ++++++++++++++++- fs/bcachefs/backpointers.h | 27 ++++++++++----------------- fs/bcachefs/btree_update.c | 30 +++++------------------------- fs/bcachefs/btree_update.h | 7 ++++++- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 69bd6c281d1a..2b6ab5ce95f2 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -136,15 +136,30 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, } int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, - struct bkey_i_backpointer *bp_k, + struct bpos bucket, struct bch_backpointer bp, struct bkey_s_c orig_k, bool insert) { struct btree_iter bp_iter; struct bkey_s_c k; + struct bkey_i_backpointer *bp_k; int ret; + bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); + ret = PTR_ERR_OR_ZERO(bp_k); + if (ret) + return ret; + + bkey_backpointer_init(&bp_k->k_i); + bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); + bp_k->v = bp; + + if (!insert) { + bp_k->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&bp_k->k, 0); + } + k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p, BTREE_ITER_INTENT| diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index ab866feeaf66..737e2396ade7 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -63,7 +63,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, return ret; } -int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *, +int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool); static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, @@ -72,28 +72,21 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, struct bkey_s_c orig_k, bool insert) { - struct bch_fs *c = trans->c; - struct bkey_i_backpointer *bp_k; - int ret; + if (unlikely(bch2_backpointers_no_use_write_buffer)) + return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert); - bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); - ret = PTR_ERR_OR_ZERO(bp_k); - if (ret) - return ret; + struct bkey_i_backpointer bp_k; - bkey_backpointer_init(&bp_k->k_i); - bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset); - bp_k->v = bp; + bkey_backpointer_init(&bp_k.k_i); + bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); + bp_k.v = bp; if (!insert) { - bp_k->k.type = KEY_TYPE_deleted; - set_bkey_val_u64s(&bp_k->k, 0); + bp_k.k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&bp_k.k, 0); } - if (unlikely(bch2_backpointers_no_use_write_buffer)) - return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert); - - return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i); + return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i); } static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level, diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 0d6830ec73c4..3642e2629716 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -710,20 +710,6 @@ int bch2_btree_delete_at(struct btree_trans *trans, return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); } -int bch2_btree_delete_at_buffered(struct btree_trans *trans, - enum btree_id btree, struct bpos pos) -{ - struct bkey_i *k; - - k = bch2_trans_kmalloc(trans, sizeof(*k)); - if (IS_ERR(k)) - return PTR_ERR(k); - - bkey_init(&k->k); - k->k.p = pos; - return bch2_trans_update_buffered(trans, btree, k); -} - int bch2_btree_delete(struct btree_trans *trans, enum btree_id btree, struct bpos pos, unsigned update_flags) @@ -828,19 +814,13 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, struct bpos pos, bool set) { - struct bkey_i *k; - int ret = 0; + struct bkey_i k; - k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); - ret = PTR_ERR_OR_ZERO(k); - if (unlikely(ret)) - return ret; + bkey_init(&k.k); + k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; + k.k.p = pos; - bkey_init(&k->k); - k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; - k->k.p = pos; - - return bch2_trans_update_buffered(trans, btree, k); + return bch2_trans_update_buffered(trans, btree, &k); } __printf(2, 0) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 14a2315aa88e..fa19f3212b05 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -47,7 +47,6 @@ enum bch_trans_commit_flags { int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, unsigned, unsigned); int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); -int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos); int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, @@ -65,6 +64,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id, int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); +static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans, + enum btree_id btree, struct bpos pos) +{ + return bch2_btree_bit_mod(trans, btree, pos, false); +} + int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, struct bpos, struct bpos); From 56db2429511e14291bdb0105899e02f074885d4d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 22:31:16 -0400 Subject: [PATCH 053/226] bcachefs: Improve btree write buffer tracepoints - add a tracepoint for write_buffer_flush_sync; this is expensive - fix the write_buffer_flush_slowpath tracepoint Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 4 +++- fs/bcachefs/btree_write_buffer.c | 8 ++++++-- fs/bcachefs/trace.h | 33 ++++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 41fa8b9b69f4..545df77bcd46 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1578,7 +1578,9 @@ struct bch_sb_field_disk_groups { x(write_super, 73) \ x(trans_restart_would_deadlock_recursion_limit, 74) \ x(trans_restart_write_buffer_flush, 75) \ - x(trans_restart_split_race, 76) + x(trans_restart_split_race, 76) \ + x(write_buffer_flush_slowpath, 77) \ + x(write_buffer_flush_sync, 78) enum bch_persistent_counters { #define x(t, n, ...) BCH_COUNTER_##t, diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index a6bf6ed37ced..02ed0f2c5df5 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -241,7 +241,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f mutex_unlock(&wb->flush_lock); return ret; slowpath: - trace_write_buffer_flush_slowpath(trans, i - keys, nr); + trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); /* * Now sort the rest by journal seq and bump the journal pin as we go. @@ -277,8 +277,12 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) { + struct bch_fs *c = trans->c; + + trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); + bch2_trans_unlock(trans); - mutex_lock(&trans->c->btree_write_buffer.flush_lock); + mutex_lock(&c->btree_write_buffer.flush_lock); return __bch2_btree_write_buffer_flush(trans, 0, true); } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index f2e405c359b5..6eced95ce374 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1334,21 +1334,38 @@ TRACE_EVENT(write_buffer_flush, __entry->nr, __entry->size, __entry->skipped, __entry->fast) ); -TRACE_EVENT(write_buffer_flush_slowpath, - TP_PROTO(struct btree_trans *trans, size_t nr, size_t size), - TP_ARGS(trans, nr, size), +TRACE_EVENT(write_buffer_flush_sync, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), + TP_ARGS(trans, caller_ip), TP_STRUCT__entry( - __field(size_t, nr ) - __field(size_t, size ) + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) ), TP_fast_assign( - __entry->nr = nr; - __entry->size = size; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; ), - TP_printk("%zu/%zu", __entry->nr, __entry->size) + TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip) +); + +TRACE_EVENT(write_buffer_flush_slowpath, + TP_PROTO(struct btree_trans *trans, size_t slowpath, size_t total), + TP_ARGS(trans, slowpath, total), + + TP_STRUCT__entry( + __field(size_t, slowpath ) + __field(size_t, total ) + ), + + TP_fast_assign( + __entry->slowpath = slowpath; + __entry->total = total; + ), + + TP_printk("%zu/%zu", __entry->slowpath, __entry->total) ); DEFINE_EVENT(str, rebalance_extent, From 8ab3fa96396955bc7a0669f81007e2dbfe1e7da0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 27 Nov 2023 01:42:42 -0500 Subject: [PATCH 054/226] bcachefs: kill journal->preres_wait Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.h | 1 - fs/bcachefs/journal_types.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 2f768e11aec9..a1384433d193 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -119,7 +119,6 @@ static inline void journal_wake(struct journal *j) { wake_up(&j->wait); closure_wake_up(&j->async_wait); - closure_wake_up(&j->preres_wait); } static inline struct journal_buf *journal_cur_buf(struct journal *j) diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 2427cce64fed..4ffae252e01e 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -195,7 +195,6 @@ struct journal { /* Used when waiting because the journal was full */ wait_queue_head_t wait; struct closure_waitlist async_wait; - struct closure_waitlist preres_wait; struct closure io; struct delayed_work write_work; From cf5bacb6a5213cd7f59c1dbf11531ff96445027a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 27 Nov 2023 01:46:18 -0500 Subject: [PATCH 055/226] bcachefs: delete useless commit_do() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 95314aaf4b58..b39b28b4ae73 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -737,7 +737,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, } six_unlock_read(&ck->c.lock); - ret = commit_do(trans, NULL, NULL, 0, + ret = lockrestart_do(trans, btree_key_cache_flush_pos(trans, key, seq, BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: From 183bcc89b855c412bfefa545b799006d66f689a6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 19:37:15 -0400 Subject: [PATCH 056/226] bcachefs: Clean up btree write buffer write ref handling __bch2_btree_write_buffer_flush() now assumes a write ref is already held (as called by the transaction commit path); and the wrappers bch2_write_buffer_flush() and flush_sync() take an explicit write ref. This means internally the write buffer code can always use BTREE_INSERT_NOCHECK_RW, instead of in the previous code passing flags around and hoping the NOCHECK_RW flag was always carried around correctly. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 3 ++- fs/bcachefs/btree_trans_commit.c | 6 ++--- fs/bcachefs/btree_write_buffer.c | 38 ++++++++++++++++++++++---------- fs/bcachefs/btree_write_buffer.h | 3 ++- fs/bcachefs/ec.c | 2 +- fs/bcachefs/inode.c | 10 ++++----- fs/bcachefs/movinggc.c | 3 +++ 7 files changed, 41 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 991de12974db..4186779f1e44 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -672,7 +672,8 @@ struct btree_trans_buf { x(invalidate) \ x(delete_dead_snapshots) \ x(snapshot_delete_pagecache) \ - x(sysfs) + x(sysfs) \ + x(btree_write_buffer) enum bch_write_ref { #define x(n) BCH_WRITE_REF_##n, diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 578521216b6a..a2d0494b58f4 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -961,8 +961,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, if (wb->state.nr > wb->size * 3 / 4) { bch2_trans_begin(trans); - ret = __bch2_btree_write_buffer_flush(trans, - flags|BCH_TRANS_COMMIT_no_check_rw, true); + ret = __bch2_btree_write_buffer_flush(trans, true); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); @@ -1077,8 +1076,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) bch2_trans_begin(trans); bch2_trans_unlock(trans); - ret = __bch2_btree_write_buffer_flush(trans, - flags|BCH_TRANS_COMMIT_no_check_rw, true); + ret = __bch2_btree_write_buffer_flush(trans, true); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 02ed0f2c5df5..eae8d161e984 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -137,8 +137,7 @@ btree_write_buffered_insert(struct btree_trans *trans, return ret; } -int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags, - bool locked) +int __bch2_btree_write_buffer_flush(struct btree_trans *trans, bool locked) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; @@ -210,8 +209,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f iter.path->preserve = false; do { - ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, - commit_flags, &write_locked, &fast); + ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, 0, + &write_locked, &fast); if (!write_locked) bch2_trans_begin(trans); } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); @@ -252,9 +251,6 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f btree_write_buffered_journal_cmp, NULL); - commit_flags &= ~BCH_WATERMARK_MASK; - commit_flags |= BCH_WATERMARK_reclaim; - for (i = keys; i < keys + nr; i++) { if (!i->journal_seq) continue; @@ -263,7 +259,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f bch2_btree_write_buffer_journal_flush); ret = commit_do(trans, NULL, NULL, - commit_flags| + BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_journal_res| BCH_TRANS_COMMIT_journal_reclaim, @@ -279,16 +276,33 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) { struct bch_fs *c = trans->c; + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer)) + return -BCH_ERR_erofs_no_writes; + trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); bch2_trans_unlock(trans); - mutex_lock(&c->btree_write_buffer.flush_lock); - return __bch2_btree_write_buffer_flush(trans, 0, true); + mutex_lock(&trans->c->btree_write_buffer.flush_lock); + int ret = __bch2_btree_write_buffer_flush(trans, true); + bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + return ret; +} + +int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) +{ + return __bch2_btree_write_buffer_flush(trans, false); } int bch2_btree_write_buffer_flush(struct btree_trans *trans) { - return __bch2_btree_write_buffer_flush(trans, 0, false); + struct bch_fs *c = trans->c; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer)) + return -BCH_ERR_erofs_no_writes; + + int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans); + bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + return ret; } static int bch2_btree_write_buffer_journal_flush(struct journal *j, @@ -300,7 +314,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, mutex_lock(&wb->flush_lock); return bch2_trans_run(c, - __bch2_btree_write_buffer_flush(trans, BCH_TRANS_COMMIT_no_check_rw, true)); + __bch2_btree_write_buffer_flush(trans, true)); } static inline u64 btree_write_buffer_ref(int idx) diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 322df1c8304e..45388e636639 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -2,7 +2,8 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H #define _BCACHEFS_BTREE_WRITE_BUFFER_H -int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool); +int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); +int __bch2_btree_write_buffer_flush(struct btree_trans *, bool); int bch2_btree_write_buffer_flush_sync(struct btree_trans *); int bch2_btree_write_buffer_flush(struct btree_trans *); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 24ec634077a3..bc8b556f19a9 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1005,7 +1005,7 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) unsigned i, nr_data = v->nr_blocks - v->nr_redundant; int ret = 0; - ret = bch2_btree_write_buffer_flush(trans); + ret = bch2_btree_write_buffer_flush_nocheck_rw(trans); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index abd6cacafe93..0d2bdc7575a8 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1162,10 +1162,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) again: need_another_pass = false; - ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - goto err; - /* * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, @@ -1196,8 +1192,12 @@ int bch2_delete_dead_inodes(struct bch_fs *c) } bch2_trans_iter_exit(trans, &iter); - if (!ret && need_another_pass) + if (!ret && need_another_pass) { + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; goto again; + } err: bch2_trans_put(trans); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index b8d0542222c3..ba3a323a4843 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -154,6 +154,9 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, move_buckets_wait(ctxt, buckets_in_flight, false); ret = bch2_btree_write_buffer_flush(trans); + if (bch2_err_matches(ret, EROFS)) + return ret; + if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()", __func__, bch2_err_str(ret))) return ret; From d3083cf28d54991c299d5c05790e40e52cf75df0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 20:32:19 -0400 Subject: [PATCH 057/226] bcachefs: bch2_btree_write_buffer_flush_locked() Minor refactoring - improved naming, and move the responsibility for flush_lock to the caller instead of having it be shared. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 6 ++++-- fs/bcachefs/btree_write_buffer.c | 27 +++++++++++++++++---------- fs/bcachefs/btree_write_buffer.h | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index a2d0494b58f4..336350bd9048 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -961,7 +961,8 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, if (wb->state.nr > wb->size * 3 / 4) { bch2_trans_begin(trans); - ret = __bch2_btree_write_buffer_flush(trans, true); + ret = bch2_btree_write_buffer_flush_locked(trans); + mutex_unlock(&wb->flush_lock); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); @@ -1076,7 +1077,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) bch2_trans_begin(trans); bch2_trans_unlock(trans); - ret = __bch2_btree_write_buffer_flush(trans, true); + ret = bch2_btree_write_buffer_flush_locked(trans); + mutex_unlock(&c->btree_write_buffer.flush_lock); if (!ret) { trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index eae8d161e984..77d86db344b7 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -137,7 +137,7 @@ btree_write_buffered_insert(struct btree_trans *trans, return ret; } -int __bch2_btree_write_buffer_flush(struct btree_trans *trans, bool locked) +int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; @@ -152,9 +152,6 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, bool locked) memset(&pin, 0, sizeof(pin)); - if (!locked && !mutex_trylock(&wb->flush_lock)) - return 0; - bch2_journal_pin_copy(j, &pin, &wb->journal_pin, bch2_btree_write_buffer_journal_flush); bch2_journal_pin_drop(j, &wb->journal_pin); @@ -237,7 +234,6 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, bool locked) bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); out: bch2_journal_pin_drop(j, &pin); - mutex_unlock(&wb->flush_lock); return ret; slowpath: trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); @@ -282,15 +278,25 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); bch2_trans_unlock(trans); - mutex_lock(&trans->c->btree_write_buffer.flush_lock); - int ret = __bch2_btree_write_buffer_flush(trans, true); + mutex_lock(&c->btree_write_buffer.flush_lock); + int ret = bch2_btree_write_buffer_flush_locked(trans); + mutex_unlock(&c->btree_write_buffer.flush_lock); bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); return ret; } int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) { - return __bch2_btree_write_buffer_flush(trans, false); + struct bch_fs *c = trans->c; + struct btree_write_buffer *wb = &c->btree_write_buffer; + int ret = 0; + + if (mutex_trylock(&wb->flush_lock)) { + ret = bch2_btree_write_buffer_flush_locked(trans); + mutex_unlock(&wb->flush_lock); + } + + return ret; } int bch2_btree_write_buffer_flush(struct btree_trans *trans) @@ -312,9 +318,10 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, struct btree_write_buffer *wb = &c->btree_write_buffer; mutex_lock(&wb->flush_lock); + int ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans)); + mutex_unlock(&wb->flush_lock); - return bch2_trans_run(c, - __bch2_btree_write_buffer_flush(trans, true)); + return ret; } static inline u64 btree_write_buffer_ref(int idx) diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 45388e636639..1705b36d5875 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -2,8 +2,8 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H #define _BCACHEFS_BTREE_WRITE_BUFFER_H +int bch2_btree_write_buffer_flush_locked(struct btree_trans *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); -int __bch2_btree_write_buffer_flush(struct btree_trans *, bool); int bch2_btree_write_buffer_flush_sync(struct btree_trans *); int bch2_btree_write_buffer_flush(struct btree_trans *); From cb13f471390ce646a3d5aa9c599f7eec43ddb2ac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 20:36:00 -0400 Subject: [PATCH 058/226] bcachefs: bch2_btree_write_buffer_flush() -> bch2_btree_write_buffer_tryflush() More accurate naming. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/btree_write_buffer.c | 2 +- fs/bcachefs/btree_write_buffer.h | 2 +- fs/bcachefs/move.c | 7 +++---- fs/bcachefs/movinggc.c | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fc8555df89c8..ccc3779dd366 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1817,7 +1817,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) unsigned i; int ret = 0; - ret = bch2_btree_write_buffer_flush(trans); + ret = bch2_btree_write_buffer_tryflush(trans); if (ret) goto err; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 77d86db344b7..6a1915680407 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -299,7 +299,7 @@ int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) return ret; } -int bch2_btree_write_buffer_flush(struct btree_trans *trans) +int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) { struct bch_fs *c = trans->c; diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 1705b36d5875..dec2c9a8bab2 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -5,7 +5,7 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_flush_sync(struct btree_trans *); -int bch2_btree_write_buffer_flush(struct btree_trans *); +int bch2_btree_write_buffer_tryflush(struct btree_trans *); int bch2_btree_insert_keys_write_buffer(struct btree_trans *); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 615f18a0311a..eafa80c18ba4 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -681,11 +681,10 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; fragmentation = a->fragmentation_lru; - ret = bch2_btree_write_buffer_flush(trans); - if (ret) { - bch_err_msg(c, ret, "flushing btree write buffer"); + ret = bch2_btree_write_buffer_tryflush(trans); + bch_err_msg(c, ret, "flushing btree write buffer"); + if (ret) goto err; - } while (!(ret = bch2_move_ratelimit(ctxt))) { if (is_kthread && kthread_should_stop()) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index ba3a323a4843..7cffcf6cfa64 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -153,11 +153,11 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, move_buckets_wait(ctxt, buckets_in_flight, false); - ret = bch2_btree_write_buffer_flush(trans); + ret = bch2_btree_write_buffer_tryflush(trans); if (bch2_err_matches(ret, EROFS)) return ret; - if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()", + if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()", __func__, bch2_err_str(ret))) return ret; From 74644030098a0c4932e194fa1b2fa052226f3868 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 27 Nov 2023 22:37:27 -0500 Subject: [PATCH 059/226] bcachefs: count_event() Small helper for event counters. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/bcachefs.h | 4 +++- fs/bcachefs/data_update.c | 2 +- fs/bcachefs/move.c | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ccc3779dd366..6e719dd09e31 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1674,7 +1674,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, if (ret) goto out; - this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]); + count_event(c, bucket_discard); (*discarded)++; out: (*seen)++; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 4186779f1e44..06b5cea9980e 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -223,9 +223,11 @@ #define race_fault(...) dynamic_fault("bcachefs:race") +#define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]) + #define trace_and_count(_c, _name, ...) \ do { \ - this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]); \ + count_event(_c, _name); \ trace_##_name(__VA_ARGS__); \ } while (0) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 488279b3d08e..59b558c25977 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -321,7 +321,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, &m->stats->sectors_raced); } - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]); + count_event(c, move_extent_fail); bch2_btree_iter_advance(&iter); goto next; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index eafa80c18ba4..a6db46a70f15 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -349,7 +349,8 @@ int bch2_move_extent(struct moving_context *ctxt, bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]); + count_event(c, move_extent_start_fail); + if (trace_move_extent_start_fail_enabled()) { struct printbuf buf = PRINTBUF; From e153a0d70b31b605282e2dd16c5fb924f79f5e93 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 Nov 2023 17:02:06 -0500 Subject: [PATCH 060/226] bcachefs: Improve trace_trans_restart_too_many_iters() We now include the list of paths in use. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 16 ++++++++++++++ fs/bcachefs/btree_iter.h | 8 +++---- fs/bcachefs/trace.h | 46 ++++++++++++++++++++++++++++++---------- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index ff5448fca788..f83fab9e62fc 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1492,6 +1492,22 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) trans->nr_max_paths = hweight64(trans->paths_allocated); } +noinline __cold +int __bch2_btree_trans_too_many_iters(struct btree_trans *trans) +{ + if (trace_trans_restart_too_many_iters_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_trans_paths_to_text(&buf, trans); + trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf); + printbuf_exit(&buf); + } + + count_event(trans->c, trans_restart_too_many_iters); + + return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); +} + static noinline void btree_path_overflow(struct btree_trans *trans) { bch2_dump_trans_paths_updates(trans); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 9ef5021a6d37..3859c0b27d2b 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -627,12 +627,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter * return bch2_btree_iter_peek_slot(iter); } +int __bch2_btree_trans_too_many_iters(struct btree_trans *); + static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) { - trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_); - return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); - } + if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) + return __bch2_btree_trans_too_many_iters(trans); return 0; } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 6eced95ce374..e0c8db352bff 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(bpos, TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) ); -DECLARE_EVENT_CLASS(str, +DECLARE_EVENT_CLASS(fs_str, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str), @@ -49,6 +49,29 @@ DECLARE_EVENT_CLASS(str, TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) ); +DECLARE_EVENT_CLASS(trans_str, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str), + TP_ARGS(trans, caller_ip, str), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __string(str, str ) + ), + + TP_fast_assign( + __entry->dev = trans->c->dev; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __assign_str(str, str); + ), + + TP_printk("%d,%d %s %pS %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) +); + DECLARE_EVENT_CLASS(btree_node, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), @@ -738,22 +761,22 @@ TRACE_EVENT(bucket_evacuate, __entry->dev_idx, __entry->bucket) ); -DEFINE_EVENT(str, move_extent, +DEFINE_EVENT(fs_str, move_extent, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(str, move_extent_read, +DEFINE_EVENT(fs_str, move_extent_read, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(str, move_extent_write, +DEFINE_EVENT(fs_str, move_extent_write, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(str, move_extent_finish, +DEFINE_EVENT(fs_str, move_extent_finish, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); @@ -775,7 +798,7 @@ TRACE_EVENT(move_extent_fail, TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) ); -DEFINE_EVENT(str, move_extent_start_fail, +DEFINE_EVENT(fs_str, move_extent_start_fail, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); @@ -1008,10 +1031,11 @@ DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, TP_ARGS(trans, caller_ip) ); -DEFINE_EVENT(transaction_event, trans_restart_too_many_iters, +DEFINE_EVENT(trans_str, trans_restart_too_many_iters, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + unsigned long caller_ip, + const char *paths), + TP_ARGS(trans, caller_ip, paths) ); DECLARE_EVENT_CLASS(transaction_restart_iter, @@ -1368,12 +1392,12 @@ TRACE_EVENT(write_buffer_flush_slowpath, TP_printk("%zu/%zu", __entry->slowpath, __entry->total) ); -DEFINE_EVENT(str, rebalance_extent, +DEFINE_EVENT(fs_str, rebalance_extent, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); -DEFINE_EVENT(str, data_update, +DEFINE_EVENT(fs_str, data_update, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); From 3398124444b90079a09374ab10444cd937b72ae1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 26 May 2023 16:59:07 -0400 Subject: [PATCH 061/226] bcachefs: Improve trace_trans_restart_would_deadlock In the CI, we're seeing tests failing due to excessive would_deadlock transaction restarts - the tracepoint now includes the lock cycle that occured. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 3 ++- fs/bcachefs/btree_locking.c | 25 ++++++++++++++++++++++--- fs/bcachefs/trace.h | 7 ++++--- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index f83fab9e62fc..b304c7fc58b1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3054,6 +3054,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) struct btree_path *path; struct btree_bkey_cached_common *b; static char lock_types[] = { 'r', 'i', 'w' }; + struct task_struct *task = READ_ONCE(trans->locking_wait.task); unsigned l, idx; if (!out->nr_tabstops) { @@ -3061,7 +3062,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) printbuf_tabstop_push(out, 32); } - prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn); + prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn); trans_for_each_path_safe(trans, path, idx) { if (!path->nodes_locked) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 6039278121dc..1eca320e7574 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -142,10 +142,28 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g) return false; } +static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans, + unsigned long ip) +{ + struct bch_fs *c = trans->c; + + count_event(c, trans_restart_would_deadlock); + + if (trace_trans_restart_would_deadlock_enabled()) { + struct printbuf buf = PRINTBUF; + + buf.atomic++; + print_cycle(&buf, g); + + trace_trans_restart_would_deadlock(trans, ip, buf.buf); + printbuf_exit(&buf); + } +} + static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { if (i == g->g) { - trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_); + trace_would_deadlock(g, i->trans, _RET_IP_); return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; @@ -266,15 +284,16 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) unsigned path_idx; int ret; + g.nr = 0; + if (trans->lock_must_abort) { if (cycle) return -1; - trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_); + trace_would_deadlock(&g, trans, _RET_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } - g.nr = 0; lock_graph_down(&g, trans); next: if (!g.nr) diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index e0c8db352bff..6e2ad6f3db98 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1205,10 +1205,11 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_event, trans_restart_would_deadlock, +DEFINE_EVENT(trans_str, trans_restart_would_deadlock, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + unsigned long caller_ip, + const char *cycle), + TP_ARGS(trans, caller_ip, cycle) ); DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit, From a276132c2d2bc4baaa9bcfd86419885bd9b8567e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 28 Nov 2023 16:30:45 -0500 Subject: [PATCH 062/226] bcachefs: Don't open code bch2_dev_exists2() Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 1dad2a351b71..264d8fcef176 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -403,8 +403,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, BUG_ON(c->opts.nochanges); bkey_for_each_ptr(ptrs, ptr) { - BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || - !c->devs[ptr->dev]); + BUG_ON(!bch2_dev_exists2(c, ptr->dev)); ca = bch_dev_bkey_exists(c, ptr->dev); From 48dade81760ed551898ec20f59ace7ac2f5620b3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 28 Nov 2023 19:47:26 -0500 Subject: [PATCH 063/226] bcachefs: ONLY_SPECIFIED_DEVS doesn't mean ignore durability anymore Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 0e6157982607..a7e6554cc7dd 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -697,11 +697,9 @@ static int add_new_bucket(struct bch_fs *c, bch_dev_bkey_exists(c, ob->dev)->mi.durability; BUG_ON(*nr_effective >= nr_replicas); - BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); __clear_bit(ob->dev, devs_may_alloc->d); - *nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) - ? durability : 1; + *nr_effective += durability; *have_cache |= !durability; ob_push(c, ptrs, ob); From ab4fb4b678c37837c6ba4e7a89f1afea4671a3ff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 Nov 2023 21:58:11 -0500 Subject: [PATCH 064/226] bcachefs: wb_flush_one_slowpath() A bit of refactoring for better inlining in the main btree write buffer flush path. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 57 ++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 6a1915680407..9470d4657182 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -33,17 +33,32 @@ static int btree_write_buffered_journal_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, - struct btree_iter *iter, - struct btree_write_buffered_key *wb, - unsigned commit_flags, - bool *write_locked, - size_t *fast) +static noinline int wb_flush_one_slowpath(struct btree_trans *trans, + struct btree_iter *iter, + struct btree_write_buffered_key *wb) +{ + bch2_btree_node_unlock_write(trans, iter->path, iter->path->l[0].b); + + trans->journal_res.seq = wb->journal_seq; + + return bch2_trans_update(trans, iter, &wb->k, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_journal_res| + BCH_TRANS_COMMIT_journal_reclaim); +} + +static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter, + struct btree_write_buffered_key *wb, + bool *write_locked, size_t *fast) { struct bch_fs *c = trans->c; struct btree_path *path; int ret; + EBUG_ON(!wb->journal_seq); ret = bch2_btree_iter_traverse(iter); if (ret) return ret; @@ -66,26 +81,14 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, *write_locked = true; } - if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) { - bch2_btree_node_unlock_write(trans, path, path->l[0].b); + if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) { *write_locked = false; - goto trans_commit; + return wb_flush_one_slowpath(trans, iter, wb); } bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); (*fast)++; return 0; -trans_commit: - trans->journal_res.seq = wb->journal_seq; - - return bch2_trans_update(trans, iter, &wb->k, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, - commit_flags| - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_journal_res| - BCH_TRANS_COMMIT_journal_reclaim); } static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb) @@ -206,20 +209,18 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) iter.path->preserve = false; do { - ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, 0, - &write_locked, &fast); + ret = wb_flush_one(trans, &iter, i, &write_locked, &fast); if (!write_locked) bch2_trans_begin(trans); } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); - if (ret == -BCH_ERR_journal_reclaim_would_deadlock) { + if (!ret) { + i->journal_seq = 0; + } else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) { slowpath++; - continue; - } - if (ret) + ret = 0; + } else break; - - i->journal_seq = 0; } if (write_locked) From 8a4b4c52c0031f7f40261e41d66922a1049f0407 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 Nov 2023 22:06:48 -0500 Subject: [PATCH 065/226] bcachefs: more write buffer refactoring prep work for big rewrite - no functional changes in this patch. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 77 ++++++++++++++++---------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 9470d4657182..6ab26576252c 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -163,9 +163,6 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) keys = wb->keys[s.idx]; nr = s.nr; - if (race_fault()) - goto slowpath; - /* * We first sort so that we can detect and skip redundant updates, and * then we attempt to flush in sorted btree order, as this is most @@ -209,6 +206,11 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) iter.path->preserve = false; do { + if (race_fault()) { + ret = -BCH_ERR_journal_reclaim_would_deadlock; + break; + } + ret = wb_flush_one(trans, &iter, i, &write_locked, &fast); if (!write_locked) bch2_trans_begin(trans); @@ -227,46 +229,45 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b); bch2_trans_iter_exit(trans, &iter); - trace_write_buffer_flush(trans, nr, skipped, fast, wb->size); + if (ret) + goto err; - if (slowpath) - goto slowpath; + if (slowpath) { + /* + * Flush in the order they were present in the journal, so that + * we can release journal pins: + * The fastpath zapped the seq of keys that were successfully flushed so + * we can skip those here. + */ + trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); + sort(keys, nr, sizeof(keys[0]), + btree_write_buffered_journal_cmp, + NULL); + + for (i = keys; i < keys + nr; i++) { + if (!i->journal_seq) + continue; + + bch2_journal_pin_update(j, i->journal_seq, &pin, + bch2_btree_write_buffer_journal_flush); + + ret = commit_do(trans, NULL, NULL, + BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_journal_res| + BCH_TRANS_COMMIT_journal_reclaim, + btree_write_buffered_insert(trans, i)); + if (ret) + goto err; + } + } +err: bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); -out: + trace_write_buffer_flush(trans, nr, skipped, fast, wb->size); bch2_journal_pin_drop(j, &pin); return ret; -slowpath: - trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); - - /* - * Now sort the rest by journal seq and bump the journal pin as we go. - * The slowpath zapped the seq of keys that were successfully flushed so - * we can skip those here. - */ - sort(keys, nr, sizeof(keys[0]), - btree_write_buffered_journal_cmp, - NULL); - - for (i = keys; i < keys + nr; i++) { - if (!i->journal_seq) - continue; - - bch2_journal_pin_update(j, i->journal_seq, &pin, - bch2_btree_write_buffer_journal_flush); - - ret = commit_do(trans, NULL, NULL, - BCH_WATERMARK_reclaim| - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_journal_res| - BCH_TRANS_COMMIT_journal_reclaim, - btree_write_buffered_insert(trans, i)); - if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret))) - break; - } - - goto out; } int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) From 62286a08c3f352d2c19c08da3b2a5b9d23f34d61 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 28 Nov 2023 12:22:55 -0600 Subject: [PATCH 066/226] bcachefs: Replace zero-length arrays with flexible-array members Fake flexible arrays (zero-length and one-element arrays) are deprecated, and should be replaced by flexible-array members. So, replace zero-length arrays with flexible-array members in multiple structures. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 4 ++-- fs/bcachefs/io_read.c | 2 +- fs/bcachefs/move.c | 2 +- fs/bcachefs/replicas_types.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 44ba7a87aea7..43822c17297c 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -276,7 +276,7 @@ struct bch_ioctl_fs_usage { __u32 replica_entries_bytes; __u32 pad; - struct bch_replicas_usage replicas[0]; + struct bch_replicas_usage replicas[]; }; /* @@ -313,7 +313,7 @@ struct bch_ioctl_dev_usage_v2 { __u32 bucket_size; __u64 nr_buckets; - struct bch_ioctl_dev_usage_type d[0]; + struct bch_ioctl_dev_usage_type d[]; }; /* diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 3281c4dd1d52..4c9eaf7cea8d 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -80,7 +80,7 @@ struct promote_op { struct bpos pos; struct data_update write; - struct bio_vec bi_inline_vecs[0]; /* must be last */ + struct bio_vec bi_inline_vecs[]; /* must be last */ }; static const struct rhashtable_params bch_promote_params = { diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index a6db46a70f15..bc545613f793 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -70,7 +70,7 @@ struct moving_io { struct data_update write; /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[0]; + struct bio_vec bi_inline_vecs[]; }; static void move_free(struct moving_io *io) diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h index 030324078bba..ac90d142c4e8 100644 --- a/fs/bcachefs/replicas_types.h +++ b/fs/bcachefs/replicas_types.h @@ -21,7 +21,7 @@ struct replicas_delta_list { u64 nr_inodes; u64 persistent_reserved[BCH_REPLICAS_MAX]; struct {} memset_end; - struct replicas_delta d[0]; + struct replicas_delta d[]; }; #endif /* _BCACHEFS_REPLICAS_TYPES_H */ From 74529338805d514a07ba4e6feb25997c4be3bddd Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Sun, 26 Nov 2023 20:26:07 +1300 Subject: [PATCH 067/226] bcachefs: remove dead bch2_evacuate_bucket() Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 20 +------------------- fs/bcachefs/move.h | 8 +------- fs/bcachefs/movinggc.c | 2 +- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index bc545613f793..638603d774b5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -636,7 +636,7 @@ int bch2_move_data(struct bch_fs *c, return ret; } -int __bch2_evacuate_bucket(struct moving_context *ctxt, +int bch2_evacuate_bucket(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct bpos bucket, int gen, struct data_update_opts _data_opts) @@ -796,24 +796,6 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, return ret; } -int bch2_evacuate_bucket(struct bch_fs *c, - struct bpos bucket, int gen, - struct data_update_opts data_opts, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc) -{ - struct moving_context ctxt; - int ret; - - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts); - bch2_moving_ctxt_exit(&ctxt); - - return ret; -} - typedef bool (*move_btree_pred)(struct bch_fs *, void *, struct btree *, struct bch_io_opts *, struct data_update_opts *); diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 531965674a31..6a38fed85738 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -135,16 +135,10 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); -int __bch2_evacuate_bucket(struct moving_context *, +int bch2_evacuate_bucket(struct moving_context *, struct move_bucket_in_flight *, struct bpos, int, struct data_update_opts); -int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, - struct data_update_opts, - struct bch_ratelimit *, - struct bch_move_stats *, - struct write_point_specifier, - bool); int bch2_data_job(struct bch_fs *, struct bch_move_stats *, struct bch_ioctl_data); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 7cffcf6cfa64..dcb163a68e47 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -224,7 +224,7 @@ static int bch2_copygc(struct moving_context *ctxt, break; } - ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, + ret = bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, f->bucket.k.gen, data_opts); if (ret) goto err; From 0c069781ddfa4e31c169a8eced1ee90b8929d522 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Sun, 26 Nov 2023 19:33:31 +1300 Subject: [PATCH 068/226] bcachefs: rebalance should wakeup on shutdown if disabled Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 2 +- fs/bcachefs/move.h | 1 + fs/bcachefs/rebalance.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 638603d774b5..847ab0eba2da 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -159,7 +159,7 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) atomic_read(&ctxt->write_sectors) != sectors_pending); } -static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) +void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) { move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); bch2_trans_unlock_long(ctxt->trans); diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 6a38fed85738..9baf3093a678 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -83,6 +83,7 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct write_point_specifier, bool); struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); void bch2_moving_ctxt_do_pending_writes(struct moving_context *); +void bch2_moving_ctxt_flush_all(struct moving_context *); void bch2_move_ctxt_wait_for_io(struct moving_context *); int bch2_move_ratelimit(struct moving_context *); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 1846f38cea38..9c415e14ff9c 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -332,8 +332,16 @@ static int do_rebalance(struct moving_context *ctxt) BTREE_ID_rebalance_work, POS_MIN, BTREE_ITER_ALL_SNAPSHOTS); - while (!bch2_move_ratelimit(ctxt) && - !kthread_wait_freezable(r->enabled)) { + while (!bch2_move_ratelimit(ctxt)) { + if (!r->enabled) { + bch2_moving_ctxt_flush_all(ctxt); + kthread_wait_freezable(r->enabled || + kthread_should_stop()); + } + + if (kthread_should_stop()) + break; + bch2_trans_begin(trans); ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); From 3ec3758a814840619ceb3446a37501cfca29bdae Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Tue, 28 Nov 2023 19:24:47 +1300 Subject: [PATCH 069/226] bcachefs: copygc should wakeup on shutdown if disabled Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index dcb163a68e47..7155e2060d29 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -337,7 +337,8 @@ static int bch2_copygc_thread(void *arg) if (!c->copy_gc_enabled) { move_buckets_wait(&ctxt, buckets, true); - kthread_wait_freezable(c->copy_gc_enabled); + kthread_wait_freezable(c->copy_gc_enabled || + kthread_should_stop()); } if (unlikely(freezing(current))) { From 3f0e297d8677c0f4465225ecb6d30d0edbc6e519 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 28 Nov 2023 16:36:54 -0500 Subject: [PATCH 070/226] bcachefs: Explicity go RW for fsck This eliminates a lot of BCH_TRANS_COMMIT_lazy_rw flags, and is less error prone. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 17 +++++------- fs/bcachefs/backpointers.c | 6 ++--- fs/bcachefs/fsck.c | 47 +++++++++++----------------------- fs/bcachefs/recovery.c | 2 +- fs/bcachefs/snapshot.c | 4 +-- fs/bcachefs/subvolume.c | 4 +-- 6 files changed, 27 insertions(+), 53 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 6e719dd09e31..ad4ad795b3bc 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -558,8 +558,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); if (ret) break; @@ -578,8 +577,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (have_bucket_gens_key && !ret) ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); bch2_trans_put(trans); @@ -1276,7 +1274,7 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran ret = bch2_btree_delete_extent_at(trans, iter, iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw); + BCH_TRANS_COMMIT_no_enospc); goto out; } @@ -1413,8 +1411,7 @@ int bch2_check_alloc_info(struct bch_fs *c) } ret = bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw); + BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; @@ -1472,7 +1469,7 @@ int bch2_check_alloc_info(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_bucket_gens_key(trans, &iter, k)); err: bch2_trans_put(trans); @@ -1562,7 +1559,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_alloc_to_lru_ref(trans, &iter))); bch_err_fn(c, ret); return ret; @@ -1899,7 +1896,6 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, ret = bch2_bucket_do_index(trans, k, a, true) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; @@ -1920,7 +1916,6 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 2b6ab5ce95f2..2fb96fa99f42 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -397,7 +397,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_btree_backpointer(trans, &iter, k))); if (ret) bch_err_fn(c, ret); @@ -623,7 +623,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc, check_btree_root_to_backpointers(trans, btree_id, bucket_start, bucket_end, @@ -645,7 +644,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, bucket_start, bucket_end, &last_flushed, k) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; @@ -821,7 +819,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_one_backpointer(trans, start, end, bkey_s_c_to_backpointer(k), &last_flushed_pos)); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 24b2e2834cfc..bc6b56628fdf 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -207,10 +207,8 @@ static int fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) { - int ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, - __write_inode(trans, inode, snapshot)); + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __write_inode(trans, inode, snapshot)); if (ret) bch_err_fn(trans->c, ret); return ret; @@ -353,9 +351,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { - int ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| - BCH_TRANS_COMMIT_no_enospc, + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, __reattach_inode(trans, inode, inode_snapshot)); bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum); return ret; @@ -756,9 +752,7 @@ static int hash_redo_key(struct btree_trans *trans, k.k->p.snapshot, tmp, BCH_HASH_SET_MUST_CREATE, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw); + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } static int hash_check_key(struct btree_trans *trans, @@ -919,9 +913,6 @@ static int check_inode(struct btree_trans *trans, fsck_err(c, inode_unlinked_but_clean, "filesystem marked clean, but inode %llu unlinked", u.bi_inum))) { - bch2_trans_unlock(trans); - bch2_fs_lazy_rw(c); - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); return ret; @@ -934,9 +925,6 @@ static int check_inode(struct btree_trans *trans, u.bi_inum))) { bch_verbose(c, "truncating inode %llu", u.bi_inum); - bch2_trans_unlock(trans); - bch2_fs_lazy_rw(c); - /* * XXX: need to truncate partial blocks too here - or ideally * just switch units to bytes and that issue goes away @@ -1015,7 +1003,7 @@ int bch2_check_inodes(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_inode(trans, &iter, k, &prev, &s, full)); snapshots_seen_exit(&s); @@ -1229,8 +1217,7 @@ static int overlapping_extents_found(struct btree_trans *trans, ret = bch2_trans_update_extent_overwrite(trans, old_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, k1, k2) ?: - bch2_trans_commit(trans, &res, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc); + bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(c, &res); if (ret) @@ -1469,7 +1456,7 @@ int bch2_check_extents(struct bch_fs *c) POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, &res, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({ + BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: check_extent_overbig(trans, &iter, k); @@ -1498,7 +1485,7 @@ int bch2_check_indirect_extents(struct bch_fs *c) POS_MIN, BTREE_ITER_PREFETCH, k, &res, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({ + BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent_overbig(trans, &iter, k); })); @@ -1871,7 +1858,7 @@ int bch2_check_dirents(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + BCH_TRANS_COMMIT_no_enospc, check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)); bch2_trans_put(trans); @@ -1935,7 +1922,7 @@ int bch2_check_xattrs(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); bch_err_fn(c, ret); return ret; @@ -1966,8 +1953,7 @@ static int check_root_trans(struct btree_trans *trans) root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.inode = cpu_to_le64(inum); ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0)); bch_err_msg(c, ret, "writing root subvol"); @@ -2002,9 +1988,7 @@ int bch2_check_root(struct bch_fs *c) { int ret; - ret = bch2_trans_do(c, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_root_trans(trans)); bch_err_fn(c, ret); return ret; @@ -2133,8 +2117,7 @@ static int check_path(struct btree_trans *trans, return 0; ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, remove_backpointer(trans, inode)); if (ret) { bch_err(c, "error removing dirent: %i", ret); @@ -2415,7 +2398,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS(0, range_start), BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); if (ret < 0) { bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); @@ -2500,7 +2483,7 @@ int bch2_fix_reflink_p(struct bch_fs *c) BTREE_ID_extents, POS_MIN, BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, fix_reflink_p_key(trans, &iter, k))); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 643369c8c512..fec74086c9c5 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -534,7 +534,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) keys->gap = keys->nr; set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr) + if (keys->nr || c->opts.fsck) return bch2_fs_read_write_early(c); return 0; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index e473c788fd64..b2d216fa7182 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -590,7 +590,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c) for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_snapshot_tree(trans, &iter, k))); if (ret) @@ -868,7 +868,7 @@ int bch2_check_snapshots(struct bch_fs *c) for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_snapshot(trans, &iter, k))); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 9a21fe86af78..f0f91211f757 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -37,8 +37,6 @@ static int check_subvol(struct btree_trans *trans, return ret; if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { - bch2_fs_lazy_rw(c); - ret = bch2_subvolume_delete(trans, iter->pos.offset); if (ret) bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); @@ -89,7 +87,7 @@ int bch2_check_subvols(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_subvol(trans, &iter, k))); if (ret) bch_err_fn(c, ret); From a79e1b6dea0b98a66dc0d05fd8e470c18b164b52 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Mon, 27 Nov 2023 23:37:44 +1300 Subject: [PATCH 071/226] bcachefs: copygc shouldn't try moving buckets on error Co-developed-by: Kent Overstreet Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 7155e2060d29..fd239a261aca 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -170,15 +170,23 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, saw++; - if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p))) + ret2 = bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)); + if (ret2 < 0) + goto err; + + if (!ret2) not_movable++; else if (bucket_in_flight(buckets_in_flight, b.k)) in_flight++; else { - ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get; - if (ret2 >= 0) - sectors += b.sectors; + ret2 = darray_push(buckets, b); + if (ret2) + goto err; + sectors += b.sectors; } + + ret2 = buckets->nr >= nr_to_get; +err: ret2; })); From 21e07cc966aab2400144f08eb85cb7f42df923ad Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Mon, 27 Nov 2023 21:52:33 +1300 Subject: [PATCH 072/226] bcachefs: remove redundant condition from data_update_index_update Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 59b558c25977..632e8ed5bbda 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -314,7 +314,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, } continue; nowork: - if (m->stats && m->stats) { + if (m->stats) { BUG_ON(k.k->p.offset <= iter.pos.offset); atomic64_inc(&m->stats->keys_raced); atomic64_add(k.k->p.offset - iter.pos.offset, From 0f64a6daaa4852db735a2df754b37d187f5480d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Nov 2023 02:11:15 -0500 Subject: [PATCH 073/226] bcachefs: On missing backpointer to interior node, flush interior updates Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 2fb96fa99f42..eb826457ff6b 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -441,6 +441,11 @@ static int check_bp_exists(struct btree_trans *trans, memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { if (last_flushed->level != bp.level || !bpos_eq(last_flushed->pos, orig_k.k->p)) { + if (bp.level) { + bch2_trans_unlock(trans); + bch2_btree_interior_updates_flush(c); + } + ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) goto err; From a56c61714a2d2e548f5c3792130149159e0657ed Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Nov 2023 02:16:19 -0500 Subject: [PATCH 074/226] bcachefs: Make backpointer fsck wb flush check more rigorous backpointers fsck now always runs in rw mode - the btree is being modified while it runs, by e.g. copygc, rebalance, the discard worker, the invalidate worker. We could find a missing backpointer, flush the btree write buffer, and then on the next iteration find a new key at the exact same position - which will most likely need another write buffer flush. Hence, we have to check for an exact match on last_flushed, not just the pos. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index eb826457ff6b..d0299361e8d2 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -3,6 +3,7 @@ #include "bbpos.h" #include "alloc_background.h" #include "backpointers.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -404,25 +405,23 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return ret; } -struct bpos_level { - unsigned level; - struct bpos pos; -}; - static int check_bp_exists(struct btree_trans *trans, struct bpos bucket, struct bch_backpointer bp, struct bkey_s_c orig_k, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter bp_iter = { NULL }; struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k; + struct bkey_buf tmp; int ret; + bch2_bkey_buf_init(&tmp); + if (bpos_lt(bucket, bucket_start) || bpos_gt(bucket, bucket_end)) return 0; @@ -439,8 +438,11 @@ static int check_bp_exists(struct btree_trans *trans, if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { - if (last_flushed->level != bp.level || - !bpos_eq(last_flushed->pos, orig_k.k->p)) { + if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) || + bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) || + memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) { + bch2_bkey_buf_reassemble(&tmp, c, orig_k); + if (bp.level) { bch2_trans_unlock(trans); bch2_btree_interior_updates_flush(c); @@ -450,8 +452,7 @@ static int check_bp_exists(struct btree_trans *trans, if (ret) goto err; - last_flushed->level = bp.level; - last_flushed->pos = orig_k.k->p; + bch2_bkey_buf_copy(last_flushed, c, tmp.k); ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } @@ -461,6 +462,7 @@ static int check_bp_exists(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &bp_iter); + bch2_bkey_buf_exit(&tmp, c); printbuf_exit(&buf); return ret; missing: @@ -482,7 +484,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed, + struct bkey_buf *last_flushed, struct bkey_s_c k) { struct bch_fs *c = trans->c; @@ -516,7 +518,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, enum btree_id btree_id, struct bpos bucket_start, struct bpos bucket_end, - struct bpos_level *last_flushed, + struct bkey_buf *last_flushed, int *level) { struct bch_fs *c = trans->c; @@ -621,10 +623,13 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct btree_iter iter; enum btree_id btree_id; struct bkey_s_c k; + struct bkey_buf last_flushed; int ret = 0; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, @@ -669,6 +674,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, } } + bch2_bkey_buf_exit(&last_flushed, c); return 0; } From d9e14a4eb9902bac1e6fb779eb6c41629652cbd8 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 30 Nov 2023 14:17:11 -0500 Subject: [PATCH 075/226] bcachefs: remove sb lock and flags update on explicit shutdown bcachefs grabs s_umount and sets SB_RDONLY when the fs is shutdown via the ioctl() interface. This has a couple issues related to interactions between shutdown and freeze: 1. The flags == FSOP_GOING_FLAGS_DEFAULT case is a deadlock vector because freeze_bdev() calls into freeze_super(), which also acquires s_umount. 2. If an explicit shutdown occurs while the sb is frozen, SB_RDONLY alters the thaw path as if the sb was read-only at freeze time. This effectively leaks the frozen state and leaves the sb frozen indefinitely. The usage of SB_RDONLY here goes back to the initial bcachefs commit and AFAICT is simply historical behavior. This behavior is unique to bcachefs relative to the handful of other filesystems that support the shutdown ioctl(). Typically, SB_RDONLY is reserved for the proper remount path, which itself is restricted from modifying frozen superblocks in reconfigure_super(). Drop the unnecessary sb lock and flags update bch2_ioc_goingdown() to address both of these issues. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 14d5cc6f90d7..e0a19a73c8e1 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -285,34 +285,26 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) bch_notice(c, "shutdown by ioctl type %u", flags); - down_write(&c->vfs_sb->s_umount); - switch (flags) { case FSOP_GOING_FLAGS_DEFAULT: ret = freeze_bdev(c->vfs_sb->s_bdev); if (ret) - goto err; - + break; bch2_journal_flush(&c->journal); - c->vfs_sb->s_flags |= SB_RDONLY; bch2_fs_emergency_read_only(c); thaw_bdev(c->vfs_sb->s_bdev); break; - case FSOP_GOING_FLAGS_LOGFLUSH: bch2_journal_flush(&c->journal); fallthrough; - case FSOP_GOING_FLAGS_NOLOGFLUSH: - c->vfs_sb->s_flags |= SB_RDONLY; bch2_fs_emergency_read_only(c); break; default: ret = -EINVAL; break; } -err: - up_write(&c->vfs_sb->s_umount); + return ret; } From a564c9fad581972a4bf02d598ec3ad3f53dcbae2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 2 Dec 2023 03:36:27 -0500 Subject: [PATCH 076/226] bcachefs: Include btree_trans in more tracepoints This gives us more context information - e.g. which codepath is invoking btree node reads. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 22 +++--- fs/bcachefs/btree_cache.h | 4 +- fs/bcachefs/btree_io.c | 11 +-- fs/bcachefs/btree_io.h | 2 +- fs/bcachefs/btree_iter.c | 4 +- fs/bcachefs/btree_update_interior.c | 35 +++++----- fs/bcachefs/trace.h | 104 ++++++++++++++++++++-------- 7 files changed, 115 insertions(+), 67 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 79495cd7a794..9574c8c4d708 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -500,19 +500,21 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc) * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ -void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c) +void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans) { + struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; if (bc->alloc_lock == current) { - trace_and_count(c, btree_cache_cannibalize_unlock, c); + trace_and_count(c, btree_cache_cannibalize_unlock, trans); bc->alloc_lock = NULL; closure_wake_up(&bc->alloc_wait); } } -int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) +int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl) { + struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct task_struct *old; @@ -521,7 +523,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) goto success; if (!cl) { - trace_and_count(c, btree_cache_cannibalize_lock_fail, c); + trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; } @@ -535,11 +537,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) goto success; } - trace_and_count(c, btree_cache_cannibalize_lock_fail, c); + trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return -BCH_ERR_btree_cache_cannibalize_lock_blocked; success: - trace_and_count(c, btree_cache_cannibalize_lock, c); + trace_and_count(c, btree_cache_cannibalize_lock, trans); return 0; } @@ -673,7 +675,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea mutex_unlock(&bc->lock); - trace_and_count(c, btree_cache_cannibalize, c); + trace_and_count(c, btree_cache_cannibalize, trans); goto out; } @@ -749,7 +751,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, if (path && sync) bch2_trans_unlock_noassert(trans); - bch2_btree_node_read(c, b, sync); + bch2_btree_node_read(trans, b, sync); if (!sync) return NULL; @@ -1039,7 +1041,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, goto retry; if (IS_ERR(b) && - !bch2_btree_cache_cannibalize_lock(c, NULL)) + !bch2_btree_cache_cannibalize_lock(trans, NULL)) goto retry; if (IS_ERR(b)) @@ -1087,7 +1089,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); out: - bch2_btree_cache_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(trans); return b; } diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index cfb80b201d61..4e1af5882052 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -17,8 +17,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, unsigned, enum btree_id); -void bch2_btree_cache_cannibalize_unlock(struct bch_fs *); -int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); +void bch2_btree_cache_cannibalize_unlock(struct btree_trans *); +int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index cf19621304f7..a6ac68fe90fb 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1575,16 +1575,17 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool return 0; } -void bch2_btree_node_read(struct bch_fs *c, struct btree *b, +void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bool sync) { + struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct btree_read_bio *rb; struct bch_dev *ca; struct bio *bio; int ret; - trace_and_count(c, btree_node_read, c, b); + trace_and_count(c, btree_node_read, trans, b); if (bch2_verify_all_btree_replicas && !btree_node_read_all_replicas(c, b, sync)) @@ -1663,12 +1664,12 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, closure_init_stack(&cl); do { - ret = bch2_btree_cache_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(trans, &cl); closure_sync(&cl); } while (ret); b = bch2_btree_node_mem_alloc(trans, level != 0); - bch2_btree_cache_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(trans); BUG_ON(IS_ERR(b)); @@ -1677,7 +1678,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); - bch2_btree_node_read(c, b, true); + bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { bch2_btree_node_hash_remove(&c->btree_cache, b); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index e0d7fa5b1dfb..e251cb6b965f 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -130,7 +130,7 @@ void bch2_btree_init_next(struct btree_trans *, struct btree *); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, struct btree *, bool, bool *); -void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); +void bch2_btree_node_read(struct btree_trans *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b304c7fc58b1..1a1e8331976d 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -977,7 +977,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) closure_init_stack(&cl); do { - ret = bch2_btree_cache_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(trans, &cl); closure_sync(&cl); } while (ret); } @@ -1013,7 +1013,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) * then failed to relock a path - that's fine. */ err: - bch2_btree_cache_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(trans); trans->in_traverse_all = false; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 81e443dee714..6858fca7abe1 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -164,9 +164,11 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, /* Btree node freeing/allocation: */ -static void __btree_node_free(struct bch_fs *c, struct btree *b) +static void __btree_node_free(struct btree_trans *trans, struct btree *b) { - trace_and_count(c, btree_node_free, c, b); + struct bch_fs *c = trans->c; + + trace_and_count(c, btree_node_free, trans, b); BUG_ON(btree_node_write_blocked(b)); BUG_ON(btree_node_dirty(b)); @@ -192,7 +194,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, bch2_btree_node_lock_write_nofail(trans, path, &b->c); bch2_btree_node_hash_remove(&c->btree_cache, b); - __btree_node_free(c, b); + __btree_node_free(trans, b); six_unlock_write(&b->c.lock); mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); @@ -363,7 +365,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id); BUG_ON(ret); - trace_and_count(c, btree_node_alloc, c, b); + trace_and_count(c, btree_node_alloc, trans, b); bch2_increment_clock(c, btree_sectors(c), WRITE); return b; } @@ -453,7 +455,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans * btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); - __btree_node_free(c, b); + __btree_node_free(trans, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } @@ -466,7 +468,6 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, unsigned flags, struct closure *cl) { - struct bch_fs *c = as->c; struct btree *b; unsigned interior; int ret = 0; @@ -477,7 +478,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, * Protects reaping from the btree node cache and using the btree node * open bucket reserve: */ - ret = bch2_btree_cache_cannibalize_lock(c, cl); + ret = bch2_btree_cache_cannibalize_lock(trans, cl); if (ret) return ret; @@ -496,7 +497,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, } } err: - bch2_btree_cache_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(trans); return ret; } @@ -1223,7 +1224,7 @@ static void bch2_btree_set_root(struct btree_update *as, struct bch_fs *c = as->c; struct btree *old; - trace_and_count(c, btree_node_set_root, c, b); + trace_and_count(c, btree_node_set_root, trans, b); old = btree_node_root(c, b); @@ -1489,7 +1490,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { struct btree *n[2]; - trace_and_count(c, btree_node_split, c, b); + trace_and_count(c, btree_node_split, trans, b); n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level); n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level); @@ -1547,7 +1548,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); } } else { - trace_and_count(c, btree_node_compact, c, b); + trace_and_count(c, btree_node_compact, trans, b); n1 = bch2_btree_node_alloc_replacement(as, trans, b); @@ -1867,7 +1868,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if (ret) goto err; - trace_and_count(c, btree_node_merge, c, b); + trace_and_count(c, btree_node_merge, trans, b); bch2_btree_interior_update_will_free_node(as, b); bch2_btree_interior_update_will_free_node(as, m); @@ -1970,7 +1971,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); bch2_btree_path_level_init(trans, new_path, n); - trace_and_count(c, btree_node_rewrite, c, b); + trace_and_count(c, btree_node_rewrite, trans, b); if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); @@ -2252,7 +2253,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite * btree_iter_traverse(): */ if (btree_ptr_hash_val(new_key) != b->hash_val) { - ret = bch2_btree_cache_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(trans, &cl); if (ret) { ret = drop_locks_do(trans, (closure_sync(&cl), 0)); if (ret) @@ -2276,7 +2277,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite six_unlock_intent(&new_hash->c.lock); } closure_sync(&cl); - bch2_btree_cache_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(trans); return ret; } @@ -2337,12 +2338,12 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) closure_init_stack(&cl); do { - ret = bch2_btree_cache_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(trans, &cl); closure_sync(&cl); } while (ret); b = bch2_btree_node_mem_alloc(trans, false); - bch2_btree_cache_cannibalize_unlock(c); + bch2_btree_cache_cannibalize_unlock(trans); set_btree_node_fake(b); set_btree_node_need_rewrite(b); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 6e2ad6f3db98..cfa7ee780fd4 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(trans_str, __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) ); -DECLARE_EVENT_CLASS(btree_node, +DECLARE_EVENT_CLASS(btree_node_nofs, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), @@ -97,6 +97,33 @@ DECLARE_EVENT_CLASS(btree_node, __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); +DECLARE_EVENT_CLASS(btree_node, + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __array(char, trans_fn, 32 ) + __field(u8, level ) + __field(u8, btree_id ) + TRACE_BPOS_entries(pos) + ), + + TP_fast_assign( + __entry->dev = trans->c->dev; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->level = b->c.level; + __entry->btree_id = b->c.btree_id; + TRACE_BPOS_assign(pos, b->key.k.p); + ), + + TP_printk("%d,%d %s %u %s %llu:%llu:%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn, + __entry->level, + bch2_btree_id_str(__entry->btree_id), + __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) +); + DECLARE_EVENT_CLASS(bch_fs, TP_PROTO(struct bch_fs *c), TP_ARGS(c), @@ -112,6 +139,23 @@ DECLARE_EVENT_CLASS(bch_fs, TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) ); +DECLARE_EVENT_CLASS(btree_trans, + TP_PROTO(struct btree_trans *trans), + TP_ARGS(trans), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __array(char, trans_fn, 32 ) + ), + + TP_fast_assign( + __entry->dev = trans->c->dev; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + ), + + TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn) +); + DECLARE_EVENT_CLASS(bio, TP_PROTO(struct bio *bio), TP_ARGS(bio), @@ -330,36 +374,36 @@ TRACE_EVENT(btree_cache_scan, __entry->nr_to_scan, __entry->can_free, __entry->ret) ); -DEFINE_EVENT(btree_node, btree_cache_reap, +DEFINE_EVENT(btree_node_nofs, btree_cache_reap, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); -DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock_fail, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) +DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock_fail, + TP_PROTO(struct btree_trans *trans), + TP_ARGS(trans) ); -DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) +DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock, + TP_PROTO(struct btree_trans *trans), + TP_ARGS(trans) ); -DEFINE_EVENT(bch_fs, btree_cache_cannibalize, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) +DEFINE_EVENT(btree_trans, btree_cache_cannibalize, + TP_PROTO(struct btree_trans *trans), + TP_ARGS(trans) ); -DEFINE_EVENT(bch_fs, btree_cache_cannibalize_unlock, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) +DEFINE_EVENT(btree_trans, btree_cache_cannibalize_unlock, + TP_PROTO(struct btree_trans *trans), + TP_ARGS(trans) ); /* Btree */ DEFINE_EVENT(btree_node, btree_node_read, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); TRACE_EVENT(btree_node_write, @@ -383,13 +427,13 @@ TRACE_EVENT(btree_node_write, ); DEFINE_EVENT(btree_node, btree_node_alloc, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_free, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); TRACE_EVENT(btree_reserve_get_fail, @@ -421,28 +465,28 @@ TRACE_EVENT(btree_reserve_get_fail, ); DEFINE_EVENT(btree_node, btree_node_compact, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_merge, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_split, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_rewrite, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_set_root, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) + TP_PROTO(struct btree_trans *trans, struct btree *b), + TP_ARGS(trans, b) ); TRACE_EVENT(btree_path_relock_fail, From d4e4d8b98b839e1b384d5b58982888b3af1beb84 Mon Sep 17 00:00:00 2001 From: Richard Davies Date: Sun, 3 Dec 2023 14:10:27 +0000 Subject: [PATCH 077/226] bcachefs: Remove obsolete comment about zstd Remove obsolete comment about zstd, since approach changed during development of commit bbc3a46065d08f9ab3412b1f26bbfa778c444833 Signed-off-by: Richard Davies Signed-off-by: Kent Overstreet --- fs/bcachefs/compress.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 51af8ea230ed..33df8cf86bd8 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -572,10 +572,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), c->opts.encoded_extent_max); - /* - * ZSTD is lying: if we allocate the size of the workspace it says it - * requires, it returns memory allocation errors - */ c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams); struct { From 0d963a635d20a3a48e5a1b12e91925f868f5f9de Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 3 Dec 2023 15:54:45 -0500 Subject: [PATCH 078/226] bcachefs: Move reflink_p triggers into reflink.c Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 200 ----------------------------------------- fs/bcachefs/buckets.h | 4 - fs/bcachefs/reflink.c | 201 ++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/reflink.h | 4 + 4 files changed, 205 insertions(+), 204 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 312bd0c86623..27c743882b63 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1164,107 +1164,6 @@ int bch2_mark_reservation(struct btree_trans *trans, return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); } -static s64 __bch2_mark_reflink_p(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, - u64 start, u64 end, - u64 *idx, unsigned flags, size_t r_idx) -{ - struct bch_fs *c = trans->c; - struct reflink_gc *r; - int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; - u64 next_idx = end; - s64 ret = 0; - struct printbuf buf = PRINTBUF; - - if (r_idx >= c->reflink_gc_nr) - goto not_found; - - r = genradix_ptr(&c->reflink_gc_table, r_idx); - next_idx = min(next_idx, r->offset - r->size); - if (*idx < next_idx) - goto not_found; - - BUG_ON((s64) r->refcount + add < 0); - - r->refcount += add; - *idx = r->offset; - return 0; -not_found: - if (fsck_err(c, reflink_p_to_missing_reflink_v, - "pointer to missing indirect extent\n" - " %s\n" - " missing range %llu-%llu", - (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), - *idx, next_idx)) { - struct bkey_i_error *new; - - new = bch2_trans_kmalloc(trans, sizeof(*new)); - ret = PTR_ERR_OR_ZERO(new); - if (ret) - goto err; - - bkey_init(&new->k); - new->k.type = KEY_TYPE_error; - new->k.p = bkey_start_pos(p.k); - new->k.p.offset += *idx - start; - bch2_key_resize(&new->k, next_idx - *idx); - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, - BTREE_TRIGGER_NORUN); - } - - *idx = next_idx; -err: -fsck_err: - printbuf_exit(&buf); - return ret; -} - -static int __mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - struct reflink_gc *ref; - size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx), start = idx; - u64 end = le64_to_cpu(p.v->idx) + p.k->size; - int ret = 0; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) { - idx -= le32_to_cpu(p.v->front_pad); - end += le32_to_cpu(p.v->back_pad); - } - - l = 0; - r = c->reflink_gc_nr; - while (l < r) { - m = l + (r - l) / 2; - - ref = genradix_ptr(&c->reflink_gc_table, m); - if (ref->offset <= idx) - l = m + 1; - else - r = m; - } - - while (idx < end && !ret) - ret = __bch2_mark_reflink_p(trans, p, start, end, - &idx, flags, l++); - - return ret; -} - -int bch2_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); -} - void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1732,105 +1631,6 @@ int bch2_trans_mark_reservation(struct btree_trans *trans, return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); } -static int trans_mark_reflink_p_segment(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, - u64 *idx, unsigned flags) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_i *k; - __le64 *refcount; - int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; - struct printbuf buf = PRINTBUF; - int ret; - - k = bch2_bkey_get_mut_noupdate(trans, &iter, - BTREE_ID_reflink, POS(0, *idx), - BTREE_ITER_WITH_UPDATES); - ret = PTR_ERR_OR_ZERO(k); - if (ret) - goto err; - - refcount = bkey_refcount(k); - if (!refcount) { - bch2_bkey_val_to_text(&buf, c, p.s_c); - bch2_trans_inconsistent(trans, - "nonexistent indirect extent at %llu while marking\n %s", - *idx, buf.buf); - ret = -EIO; - goto err; - } - - if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { - bch2_bkey_val_to_text(&buf, c, p.s_c); - bch2_trans_inconsistent(trans, - "indirect extent refcount underflow at %llu while marking\n %s", - *idx, buf.buf); - ret = -EIO; - goto err; - } - - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; - u64 pad; - - pad = max_t(s64, le32_to_cpu(v->front_pad), - le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); - BUG_ON(pad > U32_MAX); - v->front_pad = cpu_to_le32(pad); - - pad = max_t(s64, le32_to_cpu(v->back_pad), - k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); - BUG_ON(pad > U32_MAX); - v->back_pad = cpu_to_le32(pad); - } - - le64_add_cpu(refcount, add); - - bch2_btree_iter_set_pos_to_extent_start(&iter); - ret = bch2_trans_update(trans, &iter, k, 0); - if (ret) - goto err; - - *idx = k->k.p.offset; -err: - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); - return ret; -} - -static int __trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx, end_idx; - int ret = 0; - - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - end_idx = le64_to_cpu(p.v->idx) + p.k->size + - le32_to_cpu(p.v->back_pad); - - while (idx < end_idx && !ret) - ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); - return ret; -} - -int bch2_trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) -{ - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; - - v->front_pad = v->back_pad = 0; - } - - return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); -} - static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index bc088673009b..379101d7e585 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -335,14 +335,10 @@ int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); - #define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index c39f172e7a64..1d56470e1849 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -3,6 +3,7 @@ #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" +#include "error.h" #include "extents.h" #include "inode.h" #include "io_misc.h" @@ -73,6 +74,206 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r return true; } +static int trans_mark_reflink_p_segment(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + u64 *idx, unsigned flags) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_i *k; + __le64 *refcount; + int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; + struct printbuf buf = PRINTBUF; + int ret; + + k = bch2_bkey_get_mut_noupdate(trans, &iter, + BTREE_ID_reflink, POS(0, *idx), + BTREE_ITER_WITH_UPDATES); + ret = PTR_ERR_OR_ZERO(k); + if (ret) + goto err; + + refcount = bkey_refcount(k); + if (!refcount) { + bch2_bkey_val_to_text(&buf, c, p.s_c); + bch2_trans_inconsistent(trans, + "nonexistent indirect extent at %llu while marking\n %s", + *idx, buf.buf); + ret = -EIO; + goto err; + } + + if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { + bch2_bkey_val_to_text(&buf, c, p.s_c); + bch2_trans_inconsistent(trans, + "indirect extent refcount underflow at %llu while marking\n %s", + *idx, buf.buf); + ret = -EIO; + goto err; + } + + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; + u64 pad; + + pad = max_t(s64, le32_to_cpu(v->front_pad), + le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); + BUG_ON(pad > U32_MAX); + v->front_pad = cpu_to_le32(pad); + + pad = max_t(s64, le32_to_cpu(v->back_pad), + k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); + BUG_ON(pad > U32_MAX); + v->back_pad = cpu_to_le32(pad); + } + + le64_add_cpu(refcount, add); + + bch2_btree_iter_set_pos_to_extent_start(&iter); + ret = bch2_trans_update(trans, &iter, k, 0); + if (ret) + goto err; + + *idx = k->k.p.offset; +err: + bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); + return ret; +} + +static int __trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) +{ + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + u64 idx, end_idx; + int ret = 0; + + idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + end_idx = le64_to_cpu(p.v->idx) + p.k->size + + le32_to_cpu(p.v->back_pad); + + while (idx < end_idx && !ret) + ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); + return ret; +} + +int bch2_trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) +{ + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; + + v->front_pad = v->back_pad = 0; + } + + return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); +} + +static s64 __bch2_mark_reflink_p(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + u64 start, u64 end, + u64 *idx, unsigned flags, size_t r_idx) +{ + struct bch_fs *c = trans->c; + struct reflink_gc *r; + int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; + u64 next_idx = end; + s64 ret = 0; + struct printbuf buf = PRINTBUF; + + if (r_idx >= c->reflink_gc_nr) + goto not_found; + + r = genradix_ptr(&c->reflink_gc_table, r_idx); + next_idx = min(next_idx, r->offset - r->size); + if (*idx < next_idx) + goto not_found; + + BUG_ON((s64) r->refcount + add < 0); + + r->refcount += add; + *idx = r->offset; + return 0; +not_found: + if (fsck_err(c, reflink_p_to_missing_reflink_v, + "pointer to missing indirect extent\n" + " %s\n" + " missing range %llu-%llu", + (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), + *idx, next_idx)) { + struct bkey_i_error *new; + + new = bch2_trans_kmalloc(trans, sizeof(*new)); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + goto err; + + bkey_init(&new->k); + new->k.type = KEY_TYPE_error; + new->k.p = bkey_start_pos(p.k); + new->k.p.offset += *idx - start; + bch2_key_resize(&new->k, next_idx - *idx); + ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, + BTREE_TRIGGER_NORUN); + } + + *idx = next_idx; +err: +fsck_err: + printbuf_exit(&buf); + return ret; +} + +static int __mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + struct reflink_gc *ref; + size_t l, r, m; + u64 idx = le64_to_cpu(p.v->idx), start = idx; + u64 end = le64_to_cpu(p.v->idx) + p.k->size; + int ret = 0; + + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) { + idx -= le32_to_cpu(p.v->front_pad); + end += le32_to_cpu(p.v->back_pad); + } + + l = 0; + r = c->reflink_gc_nr; + while (l < r) { + m = l + (r - l) / 2; + + ref = genradix_ptr(&c->reflink_gc_table, m); + if (ref->offset <= idx) + l = m + 1; + else + r = m; + } + + while (idx < end && !ret) + ret = __bch2_mark_reflink_p(trans, p, start, end, + &idx, flags, l++); + + return ret; +} + +int bch2_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); +} + /* indirect extents */ int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 8ccf3f9c4939..6cc9c4a77265 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -9,6 +9,10 @@ int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); +int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s_c, unsigned); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ .key_invalid = bch2_reflink_p_invalid, \ From 6e92d15546944d9e2af8e7248a3a399dbd6fce0e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 00:20:42 -0500 Subject: [PATCH 079/226] bcachefs: Refactor trans->paths_allocated to be standard bitmap Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 23 +++++++++++------------ fs/bcachefs/btree_iter.h | 23 +++++------------------ fs/bcachefs/btree_locking.h | 2 +- fs/bcachefs/btree_types.h | 2 +- 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1a1e8331976d..59815c0d0db6 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1298,7 +1298,7 @@ static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path { __bch2_btree_path_unlock(trans, path); btree_path_list_remove(trans, path); - trans->paths_allocated &= ~(1ULL << path->idx); + __clear_bit(path->idx, trans->paths_allocated); } void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) @@ -1471,6 +1471,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) { struct btree_transaction_stats *s = btree_trans_stats(trans); struct printbuf buf = PRINTBUF; + size_t nr = bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX); if (!s) return; @@ -1479,9 +1480,8 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) if (!buf.allocation_failure) { mutex_lock(&s->lock); - if (s->nr_max_paths < hweight64(trans->paths_allocated)) { - s->nr_max_paths = trans->nr_max_paths = - hweight64(trans->paths_allocated); + if (nr > s->nr_max_paths) { + s->nr_max_paths = nr; swap(s->max_paths_text, buf.buf); } mutex_unlock(&s->lock); @@ -1489,7 +1489,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) printbuf_exit(&buf); - trans->nr_max_paths = hweight64(trans->paths_allocated); + trans->nr_max_paths = nr; } noinline __cold @@ -1518,13 +1518,12 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, struct btree_path *pos) { struct btree_path *path; - unsigned idx; + size_t idx = find_first_zero_bit(trans->paths_allocated, BTREE_ITER_MAX); - if (unlikely(trans->paths_allocated == - ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) + if (unlikely(idx == BTREE_ITER_MAX)) btree_path_overflow(trans); - idx = __ffs64(~trans->paths_allocated); + BUG_ON(idx > BTREE_ITER_MAX); /* * Do this before marking the new path as allocated, since it won't be @@ -1533,7 +1532,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, if (unlikely(idx > trans->nr_max_paths)) bch2_trans_update_max_paths(trans); - trans->paths_allocated |= 1ULL << idx; + __set_bit(idx, trans->paths_allocated); path = &trans->paths[idx]; path->idx = idx; @@ -2516,7 +2515,7 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans) struct btree_path *path; unsigned i; - BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated)); + BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX)); trans_for_each_path(trans, path) { BUG_ON(path->sorted_idx >= trans->nr_sorted); @@ -2526,7 +2525,7 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans) for (i = 0; i < trans->nr_sorted; i++) { unsigned idx = trans->sorted[i]; - EBUG_ON(!(trans->paths_allocated & (1ULL << idx))); + BUG_ON(!test_bit(idx, trans->paths_allocated)); BUG_ON(trans->paths[idx].sorted_idx != i); } } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 3859c0b27d2b..2a657205572a 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -66,17 +66,10 @@ static inline void btree_trans_sort_paths(struct btree_trans *trans) static inline struct btree_path * __trans_next_path(struct btree_trans *trans, unsigned idx) { - u64 l; - + idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, idx); if (idx == BTREE_ITER_MAX) return NULL; - - l = trans->paths_allocated >> idx; - if (!l) - return NULL; - - idx += __ffs64(l); - EBUG_ON(idx >= BTREE_ITER_MAX); + EBUG_ON(idx > BTREE_ITER_MAX); EBUG_ON(trans->paths[idx].idx != idx); return &trans->paths[idx]; } @@ -92,17 +85,11 @@ __trans_next_path(struct btree_trans *trans, unsigned idx) static inline struct btree_path * __trans_next_path_safe(struct btree_trans *trans, unsigned *idx) { - u64 l; - + *idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, *idx); if (*idx == BTREE_ITER_MAX) return NULL; - l = trans->paths_allocated >> *idx; - if (!l) - return NULL; - - *idx += __ffs64(l); - EBUG_ON(*idx >= BTREE_ITER_MAX); + EBUG_ON(*idx > BTREE_ITER_MAX); return &trans->paths[*idx]; } @@ -631,7 +618,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *); static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) + if (bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) > BTREE_ITER_MAX - 8) return __bch2_btree_trans_too_many_iters(trans); return 0; diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 11b0a2c8cd69..a49f1dd1d223 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -263,7 +263,7 @@ static inline int btree_node_lock(struct btree_trans *trans, int ret = 0; EBUG_ON(level >= BTREE_MAX_DEPTH); - EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); + EBUG_ON(!test_bit(path->idx, trans->paths_allocated)); if (likely(six_trylock_type(&b->lock, type)) || btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index ca7526603d06..78d9f585db45 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -414,7 +414,7 @@ struct btree_trans { unsigned extra_journal_res; unsigned nr_max_paths; - u64 paths_allocated; + unsigned long paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; unsigned mem_top; unsigned mem_max; From 56ec287d30ba2f8dec70b642e9b2c108116cbfd9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 13:03:24 -0500 Subject: [PATCH 080/226] bcachefs: BCH_ERR_opt_parse_error Continuing the project of replacing generic error codes with more specific ones. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/opts.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index d49a4994666d..ac6201403fbe 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -175,6 +175,7 @@ x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, invalid) \ x(EINVAL, internal_fsck_err) \ + x(EINVAL, opt_parse_error) \ x(EROFS, erofs_trans_commit) \ x(EROFS, erofs_no_writes) \ x(EROFS, erofs_journal_err) \ diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 8dd4046cca41..8e6f230eac38 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -279,14 +279,14 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) if (err) prt_printf(err, "%s: not a multiple of 512", opt->attr.name); - return -EINVAL; + return -BCH_ERR_opt_parse_error; } if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) { if (err) prt_printf(err, "%s: must be a power of two", opt->attr.name); - return -EINVAL; + return -BCH_ERR_opt_parse_error; } if (opt->fn.validate) From 5a11b5fe79e9f781db8aec6222d03dc92e2978d9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 5 Dec 2023 08:24:39 -0500 Subject: [PATCH 081/226] bcachefs: return from fsync on writeback error to avoid early shutdown When investigating transient failures of generic/441 on bcachefs, it was determined that the cause of the failure was a combination of unconditional emergency shutdown and racing between background journal activity and the test switchover from a working device mapper table to an error injecting table. Part of the reason for this sequence of events is that bcachefs aggressively flushes as much as possible during fsync(), regardless of errors. While this is reasonable behavior, it is technically unnecessary because once an error is returned from fsync(), the caller cannot make any assumptions about the resilience of data. Tweak the bch2_fsync() logic to return an error on failure of any of the steps involved in the flush. Note that this change alone does not prevent generic/441 failure, but in combination with a test tweak to avoid racing during the dm-error table switchover it avoids the unnecessary shutdowns and allows the test to pass reliably on bcachefs. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 31f40e587a4f..98bd5babab19 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -192,13 +192,17 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret, ret2, ret3; + int ret; ret = file_write_and_wait_range(file, start, end); - ret2 = sync_inode_metadata(&inode->v, 1); - ret3 = bch2_flush_inode(c, inode); - - return bch2_err_class(ret ?: ret2 ?: ret3); + if (ret) + goto out; + ret = sync_inode_metadata(&inode->v, 1); + if (ret) + goto out; + ret = bch2_flush_inode(c, inode); +out: + return bch2_err_class(ret); } /* truncate: */ From 483dea4431240dbd6503dc1b2801e391068ba737 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 5 Dec 2023 15:22:25 -0500 Subject: [PATCH 082/226] bcachefs: Improve error message when finding wrong btree node single_device.merge_torture_flakey is, very rarely, finding a btree node that doesn't match the key that points to it: this patch improves the error message to print out more fields from the btree node header, so that we can see what else does or does not match the key. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index a6ac68fe90fb..d99c237c8873 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -968,12 +968,20 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; + bch2_bpos_to_text(&buf, b->data->min_key); + prt_str(&buf, "-"); + bch2_bpos_to_text(&buf, b->data->max_key); + btree_err_on(b->data->keys.seq != bp->seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, btree_node_bad_seq, - "got wrong btree node (seq %llx want %llx)", - b->data->keys.seq, bp->seq); + "got wrong btree node (want %llx got %llx)\n" + "got btree %s level %llu pos %s", + bp->seq, b->data->keys.seq, + bch2_btree_id_str(BTREE_NODE_ID(b->data)), + BTREE_NODE_LEVEL(b->data), + buf.buf); } else { btree_err_on(!b->data->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, From 63508b756443cb38e432dea01fec36248cb25cd7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Dec 2023 16:26:18 -0500 Subject: [PATCH 083/226] bcachefs: c->ro_ref Add a new refcount for async ops that don't necessarily need the fs to be RW, with similar lifetime/rules otherwise as c->writes. To be used by online fsck. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 21 +++++++++++++++++++++ fs/bcachefs/super.c | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 06b5cea9980e..ca7c9dea83dd 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -710,6 +710,13 @@ struct bch_fs { #else struct percpu_ref writes; #endif + /* + * Analagous to c->writes, for asynchronous ops that don't necessarily + * need fs to be read-write + */ + refcount_t ro_ref; + wait_queue_head_t ro_ref_wait; + struct work_struct read_only_work; struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; @@ -1105,6 +1112,20 @@ static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref) #endif } +static inline bool bch2_ro_ref_tryget(struct bch_fs *c) +{ + if (test_bit(BCH_FS_stopping, &c->flags)) + return false; + + return refcount_inc_not_zero(&c->ro_ref); +} + +static inline void bch2_ro_ref_put(struct bch_fs *c) +{ + if (refcount_dec_and_test(&c->ro_ref)) + wake_up(&c->ro_ref_wait); +} + static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) { #ifndef NO_BCACHEFS_FS diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8e566c3afcfd..1ec66bb8a63f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -599,6 +599,9 @@ void __bch2_fs_stop(struct bch_fs *c) bch2_fs_debug_exit(c); bch2_fs_chardev_exit(c); + bch2_ro_ref_put(c); + wait_event(c->ro_ref_wait, !refcount_read(&c->ro_ref)); + kobject_put(&c->counters_kobj); kobject_put(&c->time_stats); kobject_put(&c->opts_dir); @@ -729,6 +732,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->btree_root_lock); INIT_WORK(&c->read_only_work, bch2_fs_read_only_work); + refcount_set(&c->ro_ref, 1); + init_waitqueue_head(&c->ro_ref_wait); + init_rwsem(&c->gc_lock); mutex_init(&c->gc_gens_lock); atomic_set(&c->journal_keys.ref, 1); From bbefcd910d9f992db2d8ba4b8f96a77d8fb131d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Jul 2023 00:20:22 -0400 Subject: [PATCH 084/226] bcachefs: thread_with_file Abstract out a new helper from the data job code, for connecting a kthread to a file descriptor. Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 114 ++++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index ba0436ae6b05..30a70cef1174 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -29,6 +29,62 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long return copy_to_user(to, from, n) ? -EFAULT : 0; } +struct thread_with_file { + struct task_struct *task; + int ret; +}; + +static void thread_with_file_exit(struct thread_with_file *thr) +{ + kthread_stop(thr->task); + put_task_struct(thr->task); +} + +__printf(4, 0) +static int run_thread_with_file(struct thread_with_file *thr, + const struct file_operations *fops, + int (*fn)(void *), const char *fmt, ...) +{ + va_list args; + struct file *file = NULL; + int ret, fd = -1; + struct printbuf name = PRINTBUF; + unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK; + + va_start(args, fmt); + prt_vprintf(&name, fmt, args); + va_end(args); + + thr->ret = 0; + thr->task = kthread_create(fn, thr, name.buf); + ret = PTR_ERR_OR_ZERO(thr->task); + if (ret) + goto err; + + ret = get_unused_fd_flags(fd_flags); + if (ret < 0) + goto err_stop_task; + fd = ret; + + file = anon_inode_getfile(name.buf, fops, thr, fd_flags); + ret = PTR_ERR_OR_ZERO(file); + if (ret) + goto err_put_fd; + + fd_install(fd, file); + get_task_struct(thr->task); + wake_up_process(thr->task); + printbuf_exit(&name); + return fd; +err_put_fd: + put_unused_fd(fd); +err_stop_task: + kthread_stop(thr->task); +err: + printbuf_exit(&name); + return ret; +} + /* returns with ref on ca->ref */ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, unsigned flags) @@ -299,31 +355,27 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, } struct bch_data_ctx { + struct thread_with_file thr; + struct bch_fs *c; struct bch_ioctl_data arg; struct bch_move_stats stats; - - int ret; - - struct task_struct *thread; }; static int bch2_data_thread(void *arg) { - struct bch_data_ctx *ctx = arg; - - ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); + struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); + ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); ctx->stats.data_type = U8_MAX; return 0; } static int bch2_data_job_release(struct inode *inode, struct file *file) { - struct bch_data_ctx *ctx = file->private_data; + struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); - kthread_stop(ctx->thread); - put_task_struct(ctx->thread); + thread_with_file_exit(&ctx->thr); kfree(ctx); return 0; } @@ -331,7 +383,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file) static ssize_t bch2_data_job_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - struct bch_data_ctx *ctx = file->private_data; + struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); struct bch_fs *c = ctx->c; struct bch_ioctl_data_event e = { .type = BCH_DATA_EVENT_PROGRESS, @@ -357,10 +409,8 @@ static const struct file_operations bcachefs_data_ops = { static long bch2_ioctl_data(struct bch_fs *c, struct bch_ioctl_data arg) { - struct bch_data_ctx *ctx = NULL; - struct file *file = NULL; - unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK; - int ret, fd = -1; + struct bch_data_ctx *ctx; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -375,36 +425,12 @@ static long bch2_ioctl_data(struct bch_fs *c, ctx->c = c; ctx->arg = arg; - ctx->thread = kthread_create(bch2_data_thread, ctx, - "bch-data/%s", c->name); - if (IS_ERR(ctx->thread)) { - ret = PTR_ERR(ctx->thread); - goto err; - } - - ret = get_unused_fd_flags(flags); + ret = run_thread_with_file(&ctx->thr, + &bcachefs_data_ops, + bch2_data_thread, + "bch-data/%s", c->name); if (ret < 0) - goto err; - fd = ret; - - file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto err; - } - - fd_install(fd, file); - - get_task_struct(ctx->thread); - wake_up_process(ctx->thread); - - return fd; -err: - if (fd >= 0) - put_unused_fd(fd); - if (!IS_ERR_OR_NULL(ctx->thread)) - kthread_stop(ctx->thread); - kfree(ctx); + kfree(ctx); return ret; } From 2b41226d7f4b3b836a2a3e4ce08ab18ba03f0cd0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 20:15:23 -0500 Subject: [PATCH 085/226] bcachefs: Add ability to redirect log output Upcoming patches are going to add two new ioctls for running fsck in the kernel, but pretending that we're running our normal userspace fsck. This patch adds some plumbing for redirecting our normal log messages away from the dmesg log to a thread_with_file file descriptor - via a struct log_output, which will be consumed by the fsck f_op's read method. The new ioctls will allow for running fsck in the kernel against an offline filesystem (without mounting it), and an online filesystem. For an offline filesystem we need a way to pass in a pointer to the log_output, which is done via a new hidden opts.h option. For online fsck, we can set c->output directly, but only want to redirect log messages from the thread running fsck - hence the new c->output_filter method. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 55 +++++++++++++++++++++++++++++++----------- fs/bcachefs/opts.h | 5 ++++ fs/bcachefs/recovery.c | 6 ++--- fs/bcachefs/super.c | 28 +++++++++++++++++++++ 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index ca7c9dea83dd..3c9d24e42ff7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -264,36 +264,55 @@ do { \ #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") +__printf(2, 3) +void __bch2_print(struct bch_fs *c, const char *fmt, ...); + +#define maybe_dev_to_fs(_c) _Generic((_c), \ + struct bch_dev *: ((struct bch_dev *) (_c))->fs, \ + struct bch_fs *: (_c)) + +#define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__) + +#define bch2_print_ratelimited(_c, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&_rs)) \ + bch2_print(_c, __VA_ARGS__); \ +} while (0) + #define bch_info(c, fmt, ...) \ - printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_notice(c, fmt, ...) \ - printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_warn(c, fmt, ...) \ - printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_warn_ratelimited(c, fmt, ...) \ - printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) + bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_err(c, fmt, ...) \ - printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_err_dev(ca, fmt, ...) \ - printk(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) #define bch_err_dev_offset(ca, _offset, fmt, ...) \ - printk(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) #define bch_err_inum(c, _inum, fmt, ...) \ - printk(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) #define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ - printk(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) + bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) #define bch_err_ratelimited(c, fmt, ...) \ - printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) + bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_err_dev_ratelimited(ca, fmt, ...) \ - printk_ratelimited(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) + bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) #define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ - printk_ratelimited(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) + bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) #define bch_err_inum_ratelimited(c, _inum, fmt, ...) \ - printk_ratelimited(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) + bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) #define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \ - printk_ratelimited(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) + bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) #define bch_err_fn(_c, _ret) \ do { \ @@ -446,6 +465,12 @@ enum bch_time_stats { struct btree; +struct log_output { + spinlock_t lock; + wait_queue_head_t wait; + struct printbuf buf; +}; + enum gc_phase { GC_PHASE_NOT_RUNNING, GC_PHASE_START, @@ -700,6 +725,8 @@ struct bch_fs { struct super_block *vfs_sb; dev_t dev; char name[40]; + struct log_output *output; + struct task_struct *output_filter; /* ro/rw, add/remove/resize devices: */ struct rw_semaphore state_lock; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 8526f177450a..7f9e3001bf55 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -419,6 +419,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Allocate the buckets_nouse bitmap") \ + x(log_output, u64, \ + 0, \ + OPT_UINT(0, S64_MAX), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Pointer to a struct log_output") \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index fec74086c9c5..b296a8fc2fdc 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -690,13 +690,13 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) struct recovery_pass_fn *p = recovery_pass_fns + pass; if (!(p->when & PASS_SILENT)) - printk(KERN_INFO bch2_log_msg(c, "%s..."), - bch2_recovery_passes[pass]); + bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), + bch2_recovery_passes[pass]); ret = p->fn(c); if (ret) return ret; if (!(p->when & PASS_SILENT)) - printk(KERN_CONT " done\n"); + bch2_print(c, KERN_CONT " done\n"); c->recovery_passes_complete |= BIT_ULL(pass); } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 1ec66bb8a63f..83e5423fd005 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -86,6 +86,32 @@ const char * const bch2_fs_flag_strs[] = { NULL }; +void __bch2_print(struct bch_fs *c, const char *fmt, ...) +{ + struct log_output *output = c->output; + va_list args; + + if (c->output_filter && c->output_filter != current) + output = NULL; + + va_start(args, fmt); + if (likely(!output)) { + vprintk(fmt, args); + } else { + unsigned long flags; + + if (fmt[0] == KERN_SOH[0]) + fmt += 2; + + spin_lock_irqsave(&output->lock, flags); + prt_vprintf(&output->buf, fmt, args); + spin_unlock_irqrestore(&output->lock, flags); + + wake_up(&output->wait); + } + va_end(args); +} + #define KTYPE(type) \ static const struct attribute_group type ## _group = { \ .attrs = type ## _files \ @@ -712,6 +738,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto out; } + c->output = (void *)(unsigned long) opts.log_output; + __module_get(THIS_MODULE); closure_init(&c->cl, NULL); From 0953450af79e9e814f64b89464e78237ceeccf54 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Dec 2023 14:24:26 -0500 Subject: [PATCH 086/226] bcachefs: Mark recovery passses that are safe to run online Online fsck is coming, and many of our recovery/fsck passes are already safe to run while the filesystem is in use - mark which ones. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery_types.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index d37c6fd30e38..6a7debe663b9 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -6,6 +6,7 @@ #define PASS_FSCK BIT(1) #define PASS_UNCLEAN BIT(2) #define PASS_ALWAYS BIT(3) +#define PASS_ONLINE BIT(4) /* * Passes may be reordered, but the second field is a persistent identifier and @@ -22,18 +23,18 @@ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ x(journal_replay, 9, PASS_ALWAYS) \ - x(check_alloc_info, 10, PASS_FSCK) \ - x(check_lrus, 11, PASS_FSCK) \ - x(check_btree_backpointers, 12, PASS_FSCK) \ - x(check_backpointers_to_extents, 13, PASS_FSCK) \ - x(check_extents_to_backpointers, 14, PASS_FSCK) \ - x(check_alloc_to_lru_refs, 15, PASS_FSCK) \ + x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ + x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ + x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ + x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \ + x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ + x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ x(bucket_gens_init, 17, 0) \ - x(check_snapshot_trees, 18, PASS_FSCK) \ - x(check_snapshots, 19, PASS_FSCK) \ - x(check_subvols, 20, PASS_FSCK) \ - x(delete_dead_snapshots, 21, PASS_FSCK) \ + x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ + x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ + x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ + x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ x(fs_upgrade_for_subvolumes, 22, 0) \ x(resume_logged_ops, 23, PASS_ALWAYS) \ x(check_inodes, 24, PASS_FSCK) \ From 7f391b2f8edc29a63913b6a2d42ea8cbb5a36ee8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Dec 2023 14:36:18 -0500 Subject: [PATCH 087/226] bcachefs: bch2_run_online_recovery_passes() Add a new helper for running online recovery passes - i.e. online fsck. This is a subset of our normal recovery passes, and does not - for now - use or follow c->curr_recovery_pass. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 7 +++++ fs/bcachefs/recovery.c | 59 ++++++++++++++++++++++++++++-------------- fs/bcachefs/recovery.h | 1 + 3 files changed, 47 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3c9d24e42ff7..18bc1bbef918 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1047,6 +1047,13 @@ struct bch_fs { /* RECOVERY */ u64 journal_replay_seq_start; u64 journal_replay_seq_end; + /* + * Two different uses: + * "Has this fsck pass?" - i.e. should this type of error be an + * emergency read-only + * And, in certain situations fsck will rewind to an earlier pass: used + * for signaling to the toplevel code which pass we want to run now. + */ enum bch_recovery_pass curr_recovery_pass; /* bitmap of explicitly enabled recovery passes: */ u64 recovery_passes_explicit; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b296a8fc2fdc..629ddbb5850f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -665,7 +665,7 @@ u64 bch2_fsck_recovery_passes(void) static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { - struct recovery_pass_fn *p = recovery_pass_fns + c->curr_recovery_pass; + struct recovery_pass_fn *p = recovery_pass_fns + pass; if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) return false; @@ -682,24 +682,17 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { + struct recovery_pass_fn *p = recovery_pass_fns + pass; int ret; - c->curr_recovery_pass = pass; - - if (should_run_recovery_pass(c, pass)) { - struct recovery_pass_fn *p = recovery_pass_fns + pass; - - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), - bch2_recovery_passes[pass]); - ret = p->fn(c); - if (ret) - return ret; - if (!(p->when & PASS_SILENT)) - bch2_print(c, KERN_CONT " done\n"); - - c->recovery_passes_complete |= BIT_ULL(pass); - } + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), + bch2_recovery_passes[pass]); + ret = p->fn(c); + if (ret) + return ret; + if (!(p->when & PASS_SILENT)) + bch2_print(c, KERN_CONT " done\n"); return 0; } @@ -709,12 +702,38 @@ static int bch2_run_recovery_passes(struct bch_fs *c) int ret = 0; while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) + if (should_run_recovery_pass(c, c->curr_recovery_pass)) { + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) + continue; + if (ret) + break; + + c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); + } + c->curr_recovery_pass++; + } + + return ret; +} + +int bch2_run_online_recovery_passes(struct bch_fs *c) +{ + int ret = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { + struct recovery_pass_fn *p = recovery_pass_fns + i; + + if (!(p->when & PASS_ONLINE)) continue; + + ret = bch2_run_recovery_pass(c, i); + if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { + i = c->curr_recovery_pass; + continue; + } if (ret) break; - c->curr_recovery_pass++; } return ret; diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 3a554b0751d0..4e9d24719b2e 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -31,6 +31,7 @@ static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, } } +int bch2_run_online_recovery_passes(struct bch_fs *); u64 bch2_fsck_recovery_passes(void); int bch2_fs_recovery(struct bch_fs *); From 8408fa570ef9b8c35720369bad6b13828ae6b001 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Jul 2023 23:23:40 -0400 Subject: [PATCH 088/226] bcachefs: BCH_IOCTL_FSCK_OFFLINE This adds a new ioctl for running fsck on a list of devices. Normally, if we wish to use the kernel's implementation of fsck we'd run it at mount time with -o fsck. This ioctl lets us run fsck without mounting, so that userspace bcachefs-tools can transparently switch to the kernel's implementation of fsck when appropriate - primarily if the kernel version of bcachefs better matches the filesystem on disk. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 13 +++ fs/bcachefs/chardev.c | 221 ++++++++++++++++++++++++++++++++++- 2 files changed, 230 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 43822c17297c..07c490851742 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -83,6 +83,8 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) +#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline) + /* ioctl below act on a particular file, not the filesystem as a whole: */ #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) @@ -386,4 +388,15 @@ struct bch_ioctl_subvolume { #define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0) #define BCH_SUBVOL_SNAPSHOT_RO (1U << 1) +/* + * BCH_IOCTL_FSCK_OFFLINE: run fsck from the 'bcachefs fsck' userspace command, + * but with the kernel's implementation of fsck: + */ +struct bch_ioctl_fsck_offline { + __u64 flags; + __u64 opts; /* string */ + __u64 nr_devs; + __u64 devs[0]; +}; + #endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 30a70cef1174..78bcfa4cb48f 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -32,12 +33,15 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long struct thread_with_file { struct task_struct *task; int ret; + bool done; }; static void thread_with_file_exit(struct thread_with_file *thr) { - kthread_stop(thr->task); - put_task_struct(thr->task); + if (thr->task) { + kthread_stop(thr->task); + put_task_struct(thr->task); + } } __printf(4, 0) @@ -194,8 +198,208 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg } #endif +struct fsck_thread { + struct thread_with_file thr; + struct printbuf buf; + char **devs; + size_t nr_devs; + struct bch_opts opts; + + struct log_output output; + DARRAY(char) output2; +}; + +static void bch2_fsck_thread_free(struct fsck_thread *thr) +{ + thread_with_file_exit(&thr->thr); + if (thr->devs) + for (size_t i = 0; i < thr->nr_devs; i++) + kfree(thr->devs[i]); + darray_exit(&thr->output2); + printbuf_exit(&thr->output.buf); + kfree(thr->devs); + kfree(thr); +} + +static int bch2_fsck_thread_release(struct inode *inode, struct file *file) +{ + struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); + + bch2_fsck_thread_free(thr); + return 0; +} + +static bool fsck_thread_ready(struct fsck_thread *thr) +{ + return thr->output.buf.pos || + thr->output2.nr || + thr->thr.done; +} + +static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); + size_t copied = 0, b; + int ret = 0; + + if ((file->f_flags & O_NONBLOCK) && + !fsck_thread_ready(thr)) + return -EAGAIN; + + ret = wait_event_interruptible(thr->output.wait, + fsck_thread_ready(thr)); + if (ret) + return ret; + + if (thr->thr.done) + return 0; + + while (len) { + ret = darray_make_room(&thr->output2, thr->output.buf.pos); + if (ret) + break; + + spin_lock_irq(&thr->output.lock); + b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos); + + memcpy(&darray_top(thr->output2), thr->output.buf.buf, b); + memmove(thr->output.buf.buf, + thr->output.buf.buf + b, + thr->output.buf.pos - b); + + thr->output2.nr += b; + thr->output.buf.pos -= b; + spin_unlock_irq(&thr->output.lock); + + b = min(len, thr->output2.nr); + if (!b) + break; + + b -= copy_to_user(buf, thr->output2.data, b); + if (!b) { + ret = -EFAULT; + break; + } + + copied += b; + buf += b; + len -= b; + + memmove(thr->output2.data, + thr->output2.data + b, + thr->output2.nr - b); + thr->output2.nr -= b; + } + + return copied ?: ret; +} + +static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait) +{ + struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); + + poll_wait(file, &thr->output.wait, wait); + + return fsck_thread_ready(thr) + ? EPOLLIN|EPOLLHUP + : 0; +} + +static const struct file_operations fsck_thread_ops = { + .release = bch2_fsck_thread_release, + .read = bch2_fsck_thread_read, + .poll = bch2_fsck_thread_poll, + .llseek = no_llseek, +}; + +static int bch2_fsck_offline_thread_fn(void *arg) +{ + struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr); + struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts); + + thr->thr.ret = PTR_ERR_OR_ZERO(c); + if (!thr->thr.ret) + bch2_fs_stop(c); + + thr->thr.done = true; + wake_up(&thr->output.wait); + return 0; +} + +static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) +{ + struct bch_ioctl_fsck_offline arg; + struct fsck_thread *thr = NULL; + u64 *devs = NULL; + long ret = 0; + + if (copy_from_user(&arg, user_arg, sizeof(arg))) + return -EFAULT; + + if (arg.flags) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) || + !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) || + !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) { + ret = -ENOMEM; + goto err; + } + + thr->nr_devs = arg.nr_devs; + thr->output.buf = PRINTBUF; + thr->output.buf.atomic++; + spin_lock_init(&thr->output.lock); + init_waitqueue_head(&thr->output.wait); + darray_init(&thr->output2); + + if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) { + ret = -EINVAL; + goto err; + } + + for (size_t i = 0; i < arg.nr_devs; i++) { + thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX); + ret = PTR_ERR_OR_ZERO(thr->devs[i]); + if (ret) + goto err; + } + + if (arg.opts) { + char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); + + ret = PTR_ERR_OR_ZERO(optstr) ?: + bch2_parse_mount_opts(NULL, &thr->opts, optstr); + kfree(optstr); + + if (ret) + goto err; + } + + opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output); + + ret = run_thread_with_file(&thr->thr, + &fsck_thread_ops, + bch2_fsck_offline_thread_fn, + "bch-fsck"); +err: + if (ret < 0) { + if (thr) + bch2_fsck_thread_free(thr); + pr_err("ret %s", bch2_err_str(ret)); + } + kfree(devs); + return ret; +} + static long bch2_global_ioctl(unsigned cmd, void __user *arg) { + long ret; + switch (cmd) { #if 0 case BCH_IOCTL_ASSEMBLE: @@ -203,9 +407,18 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg) case BCH_IOCTL_INCREMENTAL: return bch2_ioctl_incremental(arg); #endif - default: - return -ENOTTY; + case BCH_IOCTL_FSCK_OFFLINE: { + ret = bch2_ioctl_fsck_offline(arg); + break; } + default: + ret = -ENOTTY; + break; + } + + if (ret < 0) + ret = bch2_err_class(ret); + return ret; } static long bch2_ioctl_query_uuid(struct bch_fs *c, From 267b801fda10b70eca4001a819fcac07f023df6b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 13:45:33 -0500 Subject: [PATCH 089/226] bcachefs: BCH_IOCTL_FSCK_ONLINE This adds a new ioctl for running fsck on a mounted, in use filesystem. This reuses the fsck_thread code from the previous patch for running fsck on an offline, unmounted filesystem, so that log messages for the fsck thread are redirected to userspace. Only one running fsck instance is allowed at a time; a new semaphore (since the lock will be taken by one thread and released by another) is added for this. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_ioctl.h | 12 ++++- fs/bcachefs/chardev.c | 95 +++++++++++++++++++++++++++++++++++- fs/bcachefs/super.c | 1 + 4 files changed, 107 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 18bc1bbef918..b42e3854c51f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1058,6 +1058,7 @@ struct bch_fs { /* bitmap of explicitly enabled recovery passes: */ u64 recovery_passes_explicit; u64 recovery_passes_complete; + struct semaphore online_fsck_mutex; /* DEBUG JUNK */ struct dentry *fs_debug_dir; diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 07c490851742..21f81b16f24e 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -83,7 +83,8 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) -#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline) +#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline) +#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online) /* ioctl below act on a particular file, not the filesystem as a whole: */ @@ -399,4 +400,13 @@ struct bch_ioctl_fsck_offline { __u64 devs[0]; }; +/* + * BCH_IOCTL_FSCK_ONLINE: run fsck from the 'bcachefs fsck' userspace command, + * but with the kernel's implementation of fsck: + */ +struct bch_ioctl_fsck_online { + __u64 flags; + __u64 opts; /* string */ +}; + #endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 78bcfa4cb48f..08922f7e380a 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -7,6 +7,7 @@ #include "chardev.h" #include "journal.h" #include "move.h" +#include "recovery.h" #include "replicas.h" #include "super.h" #include "super-io.h" @@ -201,6 +202,7 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg struct fsck_thread { struct thread_with_file thr; struct printbuf buf; + struct bch_fs *c; char **devs; size_t nr_devs; struct bch_opts opts; @@ -350,6 +352,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a goto err; } + thr->opts = bch2_opts_empty(); thr->nr_devs = arg.nr_devs; thr->output.buf = PRINTBUF; thr->output.buf.atomic++; @@ -929,6 +932,95 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, return ret; } +static int bch2_fsck_online_thread_fn(void *arg) +{ + struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr); + struct bch_fs *c = thr->c; + + c->output_filter = current; + c->output = &thr->output; + + /* + * XXX: can we figure out a way to do this without mucking with c->opts? + */ + if (opt_defined(thr->opts, fix_errors)) + c->opts.fix_errors = thr->opts.fix_errors; + c->opts.fsck = true; + + c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; + bch2_run_online_recovery_passes(c); + + c->output = NULL; + c->output_filter = NULL; + + thr->thr.done = true; + wake_up(&thr->output.wait); + + up(&c->online_fsck_mutex); + bch2_ro_ref_put(c); + return 0; +} + +static long bch2_ioctl_fsck_online(struct bch_fs *c, + struct bch_ioctl_fsck_online arg) +{ + struct fsck_thread *thr = NULL; + long ret = 0; + + if (arg.flags) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!bch2_ro_ref_tryget(c)) + return -EROFS; + + if (down_trylock(&c->online_fsck_mutex)) { + bch2_ro_ref_put(c); + return -EAGAIN; + } + + thr = kzalloc(sizeof(*thr), GFP_KERNEL); + if (!thr) { + ret = -ENOMEM; + goto err; + } + + thr->c = c; + thr->opts = bch2_opts_empty(); + thr->output.buf = PRINTBUF; + thr->output.buf.atomic++; + spin_lock_init(&thr->output.lock); + init_waitqueue_head(&thr->output.wait); + darray_init(&thr->output2); + + if (arg.opts) { + char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); + + ret = PTR_ERR_OR_ZERO(optstr) ?: + bch2_parse_mount_opts(c, &thr->opts, optstr); + kfree(optstr); + + if (ret) + goto err; + } + + ret = run_thread_with_file(&thr->thr, + &fsck_thread_ops, + bch2_fsck_online_thread_fn, + "bch-fsck"); +err: + if (ret < 0) { + bch_err_fn(c, ret); + if (thr) + bch2_fsck_thread_free(thr); + up(&c->online_fsck_mutex); + bch2_ro_ref_put(c); + } + return ret; +} + #define BCH_IOCTL(_name, _argtype) \ do { \ _argtype i; \ @@ -984,7 +1076,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize); case BCH_IOCTL_DISK_RESIZE_JOURNAL: BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal); - + case BCH_IOCTL_FSCK_ONLINE: + BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online); default: return -ENOTTY; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 83e5423fd005..c7c7d4a11eb9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -762,6 +762,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) refcount_set(&c->ro_ref, 1); init_waitqueue_head(&c->ro_ref_wait); + sema_init(&c->online_fsck_mutex, 1); init_rwsem(&c->gc_lock); mutex_init(&c->gc_gens_lock); From a0acc24fedbe067b30f4320daa8d63b404f6ccd9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Dec 2023 17:53:59 -0500 Subject: [PATCH 090/226] bcachefs: Fix open coded set_btree_iter_dontneed() Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index a7e6554cc7dd..d99f38c1e7f9 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -377,7 +377,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); if (!ob) - iter.path->preserve = false; + set_btree_iter_dontneed(&iter); err: if (iter.trans && iter.path) set_btree_iter_dontneed(&iter); @@ -447,7 +447,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); next: - citer.path->preserve = false; + set_btree_iter_dontneed(&citer); bch2_trans_iter_exit(trans, &citer); if (ob) break; @@ -502,7 +502,7 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, ob = try_alloc_bucket(trans, ca, watermark, alloc_cursor, s, k, cl); if (ob) { - iter.path->preserve = false; + set_btree_iter_dontneed(&iter); break; } } From be1fa63de867aa26f2f57e6f5db2a358f6da5ad1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Dec 2023 23:50:38 -0500 Subject: [PATCH 091/226] bcachefs: Fix bch2_read_btree() In the debugfs code, we had an incorrect use of drop_locks_do(); on transaction restart we don't want to restart the current loop iteration, since we've already emitted the current key to the buffer for userspace. Signed-off-by: Kent Overstreet --- fs/bcachefs/debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 57c5128db173..c45421a2f408 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -385,7 +385,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, BTREE_ITER_ALL_SNAPSHOTS, k, ({ bch2_bkey_val_to_text(&i->buf, i->c, k); prt_newline(&i->buf); - drop_locks_do(trans, flush_buf(i)); + bch2_trans_unlock(trans); + flush_buf(i); })); i->from = iter.pos; @@ -490,7 +491,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, } bch2_bfloat_to_text(&i->buf, l->b, _k); - drop_locks_do(trans, flush_buf(i)); + bch2_trans_unlock(trans); + flush_buf(i); })); i->from = iter.pos; From 8c066edeb43b067badac984b6a0493d07d15c00a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 00:10:25 -0500 Subject: [PATCH 092/226] bcachefs: continue now works in for_each_btree_key2() continue now works as in any other loop Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 102 +++++++++++++-------------------------- 1 file changed, 34 insertions(+), 68 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 2a657205572a..83a4ff0b91e1 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -656,19 +656,24 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, return k; } +/* + * goto instead of loop, so that when used inside for_each_btree_key2() + * break/continue work correctly + */ #define lockrestart_do(_trans, _do) \ ({ \ + __label__ transaction_restart; \ u32 _restart_count; \ int _ret2; \ +transaction_restart: \ + _restart_count = bch2_trans_begin(_trans); \ + _ret2 = (_do); \ \ - do { \ - _restart_count = bch2_trans_begin(_trans); \ - _ret2 = (_do); \ - } while (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)); \ + if (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)) \ + goto transaction_restart; \ \ if (!_ret2) \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ - \ _ret2; \ }) @@ -697,65 +702,33 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, _ret2 ?: trans_was_restarted(_trans, _restart_count); \ }) -#define for_each_btree_key2(_trans, _iter, _btree_id, \ - _start, _flags, _k, _do) \ +#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \ + _start, _end, _flags, _k, _do) \ ({ \ int _ret3 = 0; \ \ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ \ - while (1) { \ - u32 _restart_count = bch2_trans_begin(_trans); \ + do { \ + _ret3 = lockrestart_do(_trans, ({ \ + (_k) = bch2_btree_iter_peek_upto_type(&(_iter), \ + _end, (_flags)); \ + if (!(_k).k) \ + break; \ \ - _ret3 = 0; \ - (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ - if (!(_k).k) \ - break; \ - \ - _ret3 = bkey_err(_k) ?: (_do); \ - if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ - continue; \ - if (_ret3) \ - break; \ - bch2_trans_verify_not_restarted(_trans, _restart_count);\ - if (!bch2_btree_iter_advance(&(_iter))) \ - break; \ - } \ + bkey_err(_k) ?: (_do); \ + })); \ + } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ }) -#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _do) \ -({ \ - int _ret3 = 0; \ - \ - bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - \ - while (1) { \ - u32 _restart_count = bch2_trans_begin(_trans); \ - \ - _ret3 = 0; \ - (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\ - if (!(_k).k) \ - break; \ - \ - _ret3 = bkey_err(_k) ?: (_do); \ - if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ - continue; \ - if (_ret3) \ - break; \ - bch2_trans_verify_not_restarted(_trans, _restart_count);\ - if (!bch2_btree_iter_advance(&(_iter))) \ - break; \ - } \ - \ - bch2_trans_iter_exit((_trans), &(_iter)); \ - _ret3; \ -}) +#define for_each_btree_key2(_trans, _iter, _btree_id, \ + _start, _flags, _k, _do) \ + for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, \ + SPOS_MAX, _flags, _k, _do) #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ _start, _flags, _k, _do) \ @@ -765,23 +738,16 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ \ - while (1) { \ - u32 _restart_count = bch2_trans_begin(_trans); \ - (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ - if (!(_k).k) { \ - _ret3 = 0; \ - break; \ - } \ + do { \ + _ret3 = lockrestart_do(_trans, ({ \ + (_k) = bch2_btree_iter_peek_prev_type(&(_iter), \ + (_flags)); \ + if (!(_k).k) \ + break; \ \ - _ret3 = bkey_err(_k) ?: (_do); \ - if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ - continue; \ - if (_ret3) \ - break; \ - bch2_trans_verify_not_restarted(_trans, _restart_count);\ - if (!bch2_btree_iter_rewind(&(_iter))) \ - break; \ - } \ + bkey_err(_k) ?: (_do); \ + })); \ + } while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ From 27b2df982fa3343552e8a98a744581da69064207 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Dec 2023 23:28:26 -0500 Subject: [PATCH 093/226] bcachefs: Kill for_each_btree_key() for_each_btree_key() handles transaction restarts, like for_each_btree_key2(), but only calls bch2_trans_begin() after a transaction restart - for_each_btree_key2() wraps every loop iteration in a transaction. The for_each_btree_key() behaviour is problematic when it leads to holding the SRCU lock that prevents key cache reclaim for an unbounded amount of time - there's no real need to keep it around. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 31 ++++---- fs/bcachefs/btree_gc.c | 37 +++++----- fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/ec.c | 46 ++++++------ fs/bcachefs/fsck.c | 130 +++++++++++++++------------------ fs/bcachefs/inode.c | 38 +++++----- fs/bcachefs/move.c | 11 +-- fs/bcachefs/recovery.c | 3 +- fs/bcachefs/snapshot.c | 31 ++++---- 9 files changed, 152 insertions(+), 177 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ad4ad795b3bc..7ebf5516266e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) u8 gen; int ret; - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -572,8 +572,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) } g.v.gens[offset] = gen; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); if (have_bucket_gens_key && !ret) ret = commit_do(trans, NULL, NULL, @@ -582,8 +582,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -601,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c) const struct bch_bucket_gens *g; u64 b; - for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; @@ -624,13 +623,13 @@ int bch2_alloc_read(struct bch_fs *c) b < min_t(u64, ca->mi.nbuckets, end); b++) *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } else { struct bch_alloc_v4 a; - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -641,16 +640,14 @@ int bch2_alloc_read(struct bch_fs *c) ca = bch_dev_bkey_exists(c, k.k->p.inode); *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } bch2_trans_put(trans); up_read(&c->gc_lock); - if (ret) - bch_err_fn(c, ret); - + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 5a7b72a461df..6a44427d2ab2 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1665,7 +1665,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) static int bch2_gc_reflink_start(struct bch_fs *c, bool metadata_only) { - struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; @@ -1674,30 +1673,30 @@ static int bch2_gc_reflink_start(struct bch_fs *c, if (metadata_only) return 0; - trans = bch2_trans_get(c); c->reflink_gc_nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - const __le64 *refcount = bkey_refcount_c(k); + ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ + const __le64 *refcount = bkey_refcount_c(k); - if (!refcount) - continue; + if (!refcount) + continue; - r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, - GFP_KERNEL); - if (!r) { - ret = -BCH_ERR_ENOMEM_gc_reflink_start; - break; - } + r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, + GFP_KERNEL); + if (!r) { + ret = -BCH_ERR_ENOMEM_gc_reflink_start; + break; + } - r->offset = k.k->p.offset; - r->size = k.k->size; - r->refcount = 0; - } - bch2_trans_iter_exit(trans, &iter); + r->offset = k.k->p.offset; + r->size = k.k->size; + r->refcount = 0; + 0; + }))); - bch2_trans_put(trans); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 83a4ff0b91e1..10d059cfd36d 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -777,7 +777,7 @@ transaction_restart: \ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) -#define for_each_btree_key(_trans, _iter, _btree_id, \ +#define for_each_btree_key_old(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index bc8b556f19a9..8e5237661cce 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1833,7 +1833,6 @@ void bch2_fs_ec_flush(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; const struct bch_stripe *s; @@ -1841,36 +1840,33 @@ int bch2_stripes_read(struct bch_fs *c) unsigned i; int ret; - for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - if (k.k->type != KEY_TYPE_stripe) - continue; + ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ + if (k.k->type != KEY_TYPE_stripe) + continue; - ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); - if (ret) - break; + ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); + if (ret) + break; - s = bkey_s_c_to_stripe(k).v; + s = bkey_s_c_to_stripe(k).v; - m = genradix_ptr(&c->stripes, k.k->p.offset); - m->sectors = le16_to_cpu(s->sectors); - m->algorithm = s->algorithm; - m->nr_blocks = s->nr_blocks; - m->nr_redundant = s->nr_redundant; - m->blocks_nonempty = 0; + m = genradix_ptr(&c->stripes, k.k->p.offset); + m->sectors = le16_to_cpu(s->sectors); + m->algorithm = s->algorithm; + m->nr_blocks = s->nr_blocks; + m->nr_redundant = s->nr_redundant; + m->blocks_nonempty = 0; - for (i = 0; i < s->nr_blocks; i++) - m->blocks_nonempty += !!stripe_blockcount_get(s, i); + for (i = 0; i < s->nr_blocks; i++) + m->blocks_nonempty += !!stripe_blockcount_get(s, i); - bch2_stripes_heap_insert(c, m, k.k->p.offset); - } - bch2_trans_iter_exit(trans, &iter); - - bch2_trans_put(trans); - - if (ret) - bch_err_fn(c, ret); + bch2_stripes_heap_insert(c, m, k.k->p.offset); + 0; + }))); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index bc6b56628fdf..39f529eb4a4d 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -589,14 +589,13 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - u32 restart_count = trans->restart_count; int ret; w->recalculate_sums = false; w->inodes.nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (k.k->p.offset != inum) break; @@ -609,8 +608,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans, return ret; w->first_this_inode = true; - - return trans_was_restarted(trans, restart_count); + return 0; } static struct inode_walker_entry * @@ -2146,19 +2144,14 @@ int bch2_check_directory_structure(struct bch_fs *c) pathbuf path = { 0, }; int ret; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (!bkey_is_inode(k.k)) continue; - ret = bch2_inode_unpack(k, &u); - if (ret) { - /* Should have been caught earlier in fsck: */ - bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret); - break; - } + BUG_ON(bch2_inode_unpack(k, &u)); if (u.bi_flags & BCH_INODE_unlinked) continue; @@ -2170,6 +2163,7 @@ int bch2_check_directory_structure(struct bch_fs *c) bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); darray_exit(&path); + bch_err_fn(c, ret); return ret; } @@ -2255,47 +2249,42 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct nlink_table *t, u64 start, u64 *end) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; - int ret = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, - POS(0, start), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (!bkey_is_inode(k.k)) - continue; + int ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_inodes, + POS(0, start), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + if (!bkey_is_inode(k.k)) + continue; - /* Should never fail, checked by bch2_inode_invalid: */ - BUG_ON(bch2_inode_unpack(k, &u)); + /* Should never fail, checked by bch2_inode_invalid: */ + BUG_ON(bch2_inode_unpack(k, &u)); - /* - * Backpointer and directory structure checks are sufficient for - * directories, since they can't have hardlinks: - */ - if (S_ISDIR(u.bi_mode)) - continue; + /* + * Backpointer and directory structure checks are sufficient for + * directories, since they can't have hardlinks: + */ + if (S_ISDIR(u.bi_mode)) + continue; - if (!u.bi_nlink) - continue; + if (!u.bi_nlink) + continue; - ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); - if (ret) { - *end = k.k->p.offset; - ret = 0; - break; - } - - } - bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); - - if (ret) - bch_err(c, "error in fsck: btree error %i while walking inodes", ret); + ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); + if (ret) { + *end = k.k->p.offset; + ret = 0; + break; + } + 0; + }))); + bch_err_fn(c, ret); return ret; } @@ -2303,42 +2292,39 @@ noinline_for_stack static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct btree_trans *trans = bch2_trans_get(c); struct snapshots_seen s; struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent d; - int ret; snapshots_seen_init(&s); - for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); - if (ret) - break; + int ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); + if (ret) + break; - switch (k.k->type) { - case KEY_TYPE_dirent: - d = bkey_s_c_to_dirent(k); + switch (k.k->type) { + case KEY_TYPE_dirent: + d = bkey_s_c_to_dirent(k); - if (d.v->d_type != DT_DIR && - d.v->d_type != DT_SUBVOL) - inc_link(c, &s, links, range_start, range_end, - le64_to_cpu(d.v->d_inum), - bch2_snapshot_equiv(c, d.k->p.snapshot)); - break; - } - } - bch2_trans_iter_exit(trans, &iter); + if (d.v->d_type != DT_DIR && + d.v->d_type != DT_SUBVOL) + inc_link(c, &s, links, range_start, range_end, + le64_to_cpu(d.v->d_inum), + bch2_snapshot_equiv(c, d.k->p.snapshot)); + break; + } + 0; + }))); - if (ret) - bch_err(c, "error in fsck: btree error %i while walking dirents", ret); - - bch2_trans_put(trans); snapshots_seen_exit(&s); + + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 0d2bdc7575a8..7ee9ac5e4479 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1168,29 +1168,29 @@ int bch2_delete_dead_inodes(struct bch_fs *c) * but we can't retry because the btree write buffer won't have been * flushed and we'd spin: */ - for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, - may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass)); - if (ret < 0) - break; - - if (ret) { - if (!test_bit(BCH_FS_rw, &c->flags)) { - bch2_trans_unlock(trans); - bch2_fs_lazy_rw(c); - } - + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); + if (ret > 0) { bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); - if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - break; + /* + * We don't want to loop here: a transaction restart + * error here means we handled a transaction restart and + * we're actually done, but if we loop we'll retry the + * same key because the write buffer hasn't been flushed + * yet + */ + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } } - } - bch2_trans_iter_exit(trans, &iter); + + ret; + })); if (!ret && need_another_pass) { ret = bch2_btree_write_buffer_flush_sync(trans); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 847ab0eba2da..b2985e730650 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, io_opts->d.nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ({ if (k.k->p.offset != extent_k.k->p.inode) break; @@ -391,11 +391,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; bch2_inode_opts_get(&e.io_opts, trans->c, &inode); - ret = darray_push(&io_opts->d, e); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); + darray_push(&io_opts->d, e); + })); io_opts->cur_inum = extent_k.k->p.inode; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 629ddbb5850f..ed366b35a1f2 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -534,7 +534,8 @@ static int bch2_set_may_go_rw(struct bch_fs *c) keys->gap = keys->nr; set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || c->opts.fsck) + + if (keys->nr || c->opts.fsck || !c->sb.clean) return bch2_fs_read_write_early(c); return 0; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b2d216fa7182..48307f79f6ad 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1410,19 +1410,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } - for_each_btree_key(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ({ if (k.k->type != KEY_TYPE_snapshot) continue; snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v)) { - ret = snapshot_list_add(c, &deleted, k.k->p.offset); - if (ret) - break; - } - } - bch2_trans_iter_exit(trans, &iter); + BCH_SNAPSHOT_DELETED(snap.v) + ? snapshot_list_add(c, &deleted, k.k->p.offset) + : 0; + })); if (ret) { bch_err_msg(c, ret, "walking snapshots"); @@ -1469,18 +1466,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_trans_unlock(trans); down_write(&c->snapshot_create_lock); - for_each_btree_key(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ({ u32 snapshot = k.k->p.offset; u32 equiv = bch2_snapshot_equiv(c, snapshot); - if (equiv != snapshot) - snapshot_list_add(c, &deleted_interior, snapshot); - } - bch2_trans_iter_exit(trans, &iter); + equiv != snapshot + ? snapshot_list_add(c, &deleted_interior, snapshot) + : 0; + })); - if (ret) + if (ret) { + bch_err_msg(c, ret, "walking snapshots"); goto err_create_lock; + } /* * Fixing children of deleted snapshots can't be done completely From 5028b9078ccb02ead012056dcbcc4f27f963b212 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Dec 2023 23:33:11 -0500 Subject: [PATCH 094/226] bcachefs: Rename for_each_btree_key2() -> for_each_btree_key() Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 18 +++++++++--------- fs/bcachefs/btree_gc.c | 8 ++++---- fs/bcachefs/btree_iter.h | 6 +++--- fs/bcachefs/debug.c | 12 ++++++------ fs/bcachefs/ec.c | 4 ++-- fs/bcachefs/fsck.c | 18 +++++++++--------- fs/bcachefs/logged_ops.c | 5 +++-- fs/bcachefs/move.c | 4 ++-- fs/bcachefs/quota.c | 8 ++++---- fs/bcachefs/snapshot.c | 20 ++++++++++---------- fs/bcachefs/sysfs.c | 4 ++-- 11 files changed, 54 insertions(+), 53 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 7ebf5516266e..5e7ec368b16f 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -544,8 +544,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) u8 gen; int ret; - ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -600,8 +600,8 @@ int bch2_alloc_read(struct bch_fs *c) const struct bch_bucket_gens *g; u64 b; - ret = for_each_btree_key2(trans, iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; @@ -628,8 +628,8 @@ int bch2_alloc_read(struct bch_fs *c) } else { struct bch_alloc_v4 a; - ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -1427,7 +1427,7 @@ int bch2_check_alloc_info(struct bch_fs *c) if (ret < 0) goto err; - ret = for_each_btree_key2(trans, iter, + ret = for_each_btree_key(trans, iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_PREFETCH, k, bch2_check_discard_freespace_key(trans, &iter)); @@ -1693,8 +1693,8 @@ static void bch2_do_discards_work(struct work_struct *work) * successful commit: */ ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, - BTREE_ID_need_discard, POS_MIN, 0, k, + for_each_btree_key(trans, iter, + BTREE_ID_need_discard, POS_MIN, 0, k, bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &seen, &open, diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6a44427d2ab2..ae880661fddb 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1538,8 +1538,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) rcu_assign_pointer(ca->buckets_gc, buckets); } - ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ ca = bch_dev_bkey_exists(c, k.k->p.inode); g = gc_bucket(ca, k.k->p.offset); @@ -1676,8 +1676,8 @@ static int bch2_gc_reflink_start(struct bch_fs *c, c->reflink_gc_nr = 0; ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, BTREE_ID_reflink, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ const __le64 *refcount = bkey_refcount_c(k); if (!refcount) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 10d059cfd36d..5f401056c3ed 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -725,8 +725,8 @@ transaction_restart: \ _ret3; \ }) -#define for_each_btree_key2(_trans, _iter, _btree_id, \ - _start, _flags, _k, _do) \ +#define for_each_btree_key(_trans, _iter, _btree_id, \ + _start, _flags, _k, _do) \ for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, \ SPOS_MAX, _flags, _k, _do) @@ -757,7 +757,7 @@ transaction_restart: \ _start, _iter_flags, _k, \ _disk_res, _journal_seq, _commit_flags,\ _do) \ - for_each_btree_key2(_trans, _iter, _btree_id, _start, _iter_flags, _k,\ + for_each_btree_key(_trans, _iter, _btree_id, _start, _iter_flags, _k,\ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index c45421a2f408..97a699fa68d5 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -380,9 +380,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, return ret; trans = bch2_trans_get(i->c); - ret = for_each_btree_key2(trans, iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ({ + ret = for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ bch2_bkey_val_to_text(&i->buf, i->c, k); prt_newline(&i->buf); bch2_trans_unlock(trans); @@ -478,9 +478,9 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, trans = bch2_trans_get(i->c); - ret = for_each_btree_key2(trans, iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ({ + ret = for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct btree_path_level *l = &iter.path->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 8e5237661cce..1a3303c65961 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1841,8 +1841,8 @@ int bch2_stripes_read(struct bch_fs *c) int ret; ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ if (k.k->type != KEY_TYPE_stripe) continue; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 39f529eb4a4d..8b77098dde2d 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2254,11 +2254,11 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct bch_inode_unpacked u; int ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, BTREE_ID_inodes, - POS(0, start), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ({ + for_each_btree_key(trans, iter, BTREE_ID_inodes, + POS(0, start), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ if (!bkey_is_inode(k.k)) continue; @@ -2300,10 +2300,10 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links snapshots_seen_init(&s); int ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, BTREE_ID_dirents, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ({ + for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); if (ret) break; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 9a76a9aab5c3..4b603746063c 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -59,8 +59,9 @@ int bch2_resume_logged_ops(struct bch_fs *c) int ret; ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, - BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k, + for_each_btree_key(trans, iter, + BTREE_ID_logged_ops, POS_MIN, + BTREE_ITER_PREFETCH, k, resume_logged_op(trans, &iter, k))); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index b2985e730650..9be421e2a577 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -377,8 +377,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, io_opts->d.nr = 0; - ret = for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), - BTREE_ITER_ALL_SNAPSHOTS, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ({ if (k.k->p.offset != extent_k.k->p.inode) break; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a54647c36b85..79724a7aaab0 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -617,11 +617,11 @@ int bch2_fs_quota_read(struct bch_fs *c) trans = bch2_trans_get(c); - ret = for_each_btree_key2(trans, iter, BTREE_ID_quotas, - POS_MIN, BTREE_ITER_PREFETCH, k, + ret = for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, + BTREE_ITER_PREFETCH, k, __bch2_quota_set(c, k, NULL)) ?: - for_each_btree_key2(trans, iter, BTREE_ID_inodes, - POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, bch2_fs_quota_read_inode(trans, &iter, k)); bch2_trans_put(trans); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 48307f79f6ad..1b1b7e76167e 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1402,16 +1402,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } - ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, + ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, bch2_snapshot_set_equiv(trans, k)); if (ret) { bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); goto err; } - ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ({ if (k.k->type != KEY_TYPE_snapshot) continue; @@ -1466,8 +1466,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_trans_unlock(trans); down_write(&c->snapshot_create_lock); - ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ({ + ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ({ u32 snapshot = k.k->p.offset; u32 equiv = bch2_snapshot_equiv(c, snapshot); @@ -1693,13 +1693,13 @@ int bch2_snapshots_read(struct bch_fs *c) int ret = 0; ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, + for_each_btree_key(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: bch2_snapshot_set_equiv(trans, k) ?: bch2_check_snapshot_needs_deletion(trans, k)) ?: - for_each_btree_key2(trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, + for_each_btree_key(trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1b82a3a93d14..e818ca7a1b70 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -278,8 +278,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c if (!btree_type_has_ptrs(id)) continue; - ret = for_each_btree_key2(trans, iter, id, POS_MIN, - BTREE_ITER_ALL_SNAPSHOTS, k, ({ + ret = for_each_btree_key(trans, iter, id, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bch_extent_crc_unpacked crc; const union bch_extent_entry *entry; From f8fd5871becf3beb7dff84c4608140a8c571c9a9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Dec 2023 13:11:44 -0500 Subject: [PATCH 095/226] bcachefs: reserve path idx 0 for sentinal Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 4 +++- fs/bcachefs/btree_iter.h | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 59815c0d0db6..e2f010564ceb 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2515,7 +2515,7 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans) struct btree_path *path; unsigned i; - BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX)); + BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) - 1); trans_for_each_path(trans, path) { BUG_ON(path->sorted_idx >= trans->nr_sorted); @@ -2896,6 +2896,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) atomic_inc_not_zero(&c->journal_keys.ref); closure_init_stack(&trans->ref); + trans->paths_allocated[0] = 1; + s = btree_trans_stats(trans); if (s && s->max_mem) { unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 5f401056c3ed..5be3c1a2c820 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -80,7 +80,7 @@ __trans_next_path(struct btree_trans *trans, unsigned idx) _path = __trans_next_path((_trans), (_path)->idx + 1)) #define trans_for_each_path(_trans, _path) \ - trans_for_each_path_from(_trans, _path, 0) + trans_for_each_path_from(_trans, _path, 1) static inline struct btree_path * __trans_next_path_safe(struct btree_trans *trans, unsigned *idx) @@ -103,7 +103,7 @@ __trans_next_path_safe(struct btree_trans *trans, unsigned *idx) _idx++) #define trans_for_each_path_safe(_trans, _path, _idx) \ - trans_for_each_path_safe_from(_trans, _path, _idx, 0) + trans_for_each_path_safe_from(_trans, _path, _idx, 1) static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) { @@ -155,7 +155,7 @@ __trans_next_path_with_node(struct btree_trans *trans, struct btree *b, } #define trans_for_each_path_with_node(_trans, _b, _path) \ - for (_path = __trans_next_path_with_node((_trans), (_b), 0); \ + for (_path = __trans_next_path_with_node((_trans), (_b), 1); \ (_path); \ _path = __trans_next_path_with_node((_trans), (_b), \ (_path)->idx + 1)) From b56cee70e75e2edcbb92eb3d9357fae5df857b01 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 9 Dec 2023 22:06:44 -0800 Subject: [PATCH 096/226] bcachefs: six lock: fix typos Fix a few typos in the six.h header file. Signed-off-by: Randy Dunlap Cc: Kent Overstreet Cc: Brian Foster Cc: linux-bcachefs@vger.kernel.org Signed-off-by: Kent Overstreet --- fs/bcachefs/six.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h index a7104ac1d35c..68d46fd7f391 100644 --- a/fs/bcachefs/six.h +++ b/fs/bcachefs/six.h @@ -15,7 +15,7 @@ * will have to take write locks for the full duration of the operation. * * But by adding an intent state, which is exclusive with other intent locks but - * not with readers, we can take intent locks at thte start of the operation, + * not with readers, we can take intent locks at the start of the operation, * and then take write locks only for the actual update to each individual * nodes, without deadlocking. * @@ -65,8 +65,8 @@ * * Reentrancy: * - * Six locks are not by themselves reentrent, but have counters for both the - * read and intent states that can be used to provide reentrency by an upper + * Six locks are not by themselves reentrant, but have counters for both the + * read and intent states that can be used to provide reentrancy by an upper * layer that tracks held locks. If a lock is known to already be held in the * read or intent state, six_lock_increment() can be used to bump the "lock * held in this state" counter, increasing the number of unlock calls that From 249bf593e84e0b4652d2cdb9884b97bc21b59d9f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 12:42:49 -0500 Subject: [PATCH 097/226] bcachefs: Fix snapshot.c assertion for online fsck c->curr_recovery_pass can go backwards; this adds a non rewinding version, c->recovery_pass_done. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 3 +++ fs/bcachefs/recovery.c | 3 +++ fs/bcachefs/snapshot.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index b42e3854c51f..f33f510463d1 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1057,7 +1057,10 @@ struct bch_fs { enum bch_recovery_pass curr_recovery_pass; /* bitmap of explicitly enabled recovery passes: */ u64 recovery_passes_explicit; + /* bitmask of recovery passes that we actually ran */ u64 recovery_passes_complete; + /* never rewinds version of curr_recovery_pass */ + enum bch_recovery_pass recovery_pass_done; struct semaphore online_fsck_mutex; /* DEBUG JUNK */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index ed366b35a1f2..1099b2f51252 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -713,6 +713,7 @@ static int bch2_run_recovery_passes(struct bch_fs *c) c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); } c->curr_recovery_pass++; + c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); } return ret; @@ -1197,6 +1198,8 @@ int bch2_fs_initialize(struct bch_fs *c) goto err; } + c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1; + if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); if (ret) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 1b1b7e76167e..8d1800ef22b5 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -123,7 +123,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) struct snapshot_table *t; bool ret; - EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); + EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots); rcu_read_lock(); t = rcu_dereference(c->snapshots); From a83b6c895c4d91014c73569f13b071d44e16cdc3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 16:12:24 -0500 Subject: [PATCH 098/226] bcachefs: kill btree_path->(alloc_seq|downgrade_seq) These were for extra info in tracepoints for debugging a specialized issue - we do not want to bloat btree_path for this, at least in release builds. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 1 - fs/bcachefs/btree_locking.c | 1 - fs/bcachefs/btree_types.h | 2 -- fs/bcachefs/trace.h | 10 ++-------- 4 files changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index e2f010564ceb..48691b62d671 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1539,7 +1539,6 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, path->ref = 0; path->intent_ref = 0; path->nodes_locked = 0; - path->alloc_seq++; btree_path_list_add(trans, pos, path); trans->paths_sorted = false; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 1eca320e7574..24a91cc38538 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -708,7 +708,6 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, bch2_btree_path_verify_locks(path); - path->downgrade_seq++; trace_path_downgrade(trans, _RET_IP_, path, old_locks_want); } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 78d9f585db45..78de9569ff14 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -227,8 +227,6 @@ struct btree_path { u8 sorted_idx; u8 ref; u8 intent_ref; - u32 alloc_seq; - u32 downgrade_seq; /* btree_iter_copy starts here: */ struct bpos pos; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index cfa7ee780fd4..427edb3e7cd6 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1145,8 +1145,6 @@ TRACE_EVENT(trans_restart_upgrade, __field(u8, level ) __field(u32, path_seq ) __field(u32, node_seq ) - __field(u32, path_alloc_seq ) - __field(u32, downgrade_seq) TRACE_BPOS_entries(pos) ), @@ -1159,12 +1157,10 @@ TRACE_EVENT(trans_restart_upgrade, __entry->level = f->l; __entry->path_seq = path->l[f->l].lock_seq; __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq; - __entry->path_alloc_seq = path->alloc_seq; - __entry->downgrade_seq = path->downgrade_seq; TRACE_BPOS_assign(pos, path->pos) ), - TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u", + TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u", __entry->trans_fn, (void *) __entry->caller_ip, bch2_btree_id_str(__entry->btree_id), @@ -1175,9 +1171,7 @@ TRACE_EVENT(trans_restart_upgrade, __entry->new_locks_want, __entry->level, __entry->path_seq, - __entry->node_seq, - __entry->path_alloc_seq, - __entry->downgrade_seq) + __entry->node_seq) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, From e4e49375a8e4d9c9b65e79070ef6cff2433a7d5f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 17:52:58 -0500 Subject: [PATCH 099/226] bcachefs; kill bch2_btree_key_cache_flush() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 16 ---------------- fs/bcachefs/btree_key_cache.h | 2 -- 2 files changed, 18 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index b39b28b4ae73..7d2db1e5df3b 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -747,22 +747,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, return ret; } -/* - * Flush and evict a key from the key cache: - */ -int bch2_btree_key_cache_flush(struct btree_trans *trans, - enum btree_id id, struct bpos pos) -{ - struct bch_fs *c = trans->c; - struct bkey_cached_key key = { id, pos }; - - /* Fastpath - assume it won't be found: */ - if (!bch2_btree_key_cache_find(c, id, pos)) - return 0; - - return btree_key_cache_flush_pos(trans, key, 0, 0, true); -} - bool bch2_btree_insert_key_cached(struct btree_trans *trans, unsigned flags, struct btree_insert_entry *insert_entry) diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index be3acde2caa0..e6b2cd0dd2c1 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -31,8 +31,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned, struct btree_insert_entry *); -int bch2_btree_key_cache_flush(struct btree_trans *, - enum btree_id, struct bpos); void bch2_btree_key_cache_drop(struct btree_trans *, struct btree_path *); From 24de63dacbffbfa069b44a1da1750eb5382275e7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 16:48:22 -0500 Subject: [PATCH 100/226] bcachefs: Improve trans->extra_journal_entries Instead of using a darray, we now allocate journal entries for the transaction commit path with our normal bump allocator - with an inlined fastpath, and using btree_transaction_stats to remember how much to initially allocate so as to avoid transaction restarts. This is prep work for converting write buffer updates to use this mechanism. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/btree_iter.c | 4 +- fs/bcachefs/btree_trans_commit.c | 20 +++---- fs/bcachefs/btree_types.h | 6 +- fs/bcachefs/btree_update.c | 92 ++++++++++++++++++----------- fs/bcachefs/btree_update.h | 23 +++++++- fs/bcachefs/btree_update_interior.c | 15 ++--- 7 files changed, 98 insertions(+), 63 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f33f510463d1..ce66894e6537 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -638,6 +638,7 @@ struct btree_transaction_stats { struct mutex lock; unsigned nr_max_paths; unsigned wb_updates_size; + unsigned journal_entries_size; unsigned max_mem; char *max_paths_text; }; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 48691b62d671..2b0dc0ebbba3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2794,6 +2794,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->restart_count++; trans->mem_top = 0; + trans->journal_entries = NULL; trans_for_each_path(trans, path) { path->should_be_locked = false; @@ -2914,6 +2915,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) if (s) { trans->nr_max_paths = s->nr_max_paths; trans->wb_updates_size = s->wb_updates_size; + trans->journal_entries_size = s->journal_entries_size; } trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); @@ -3000,8 +3002,6 @@ void bch2_trans_put(struct btree_trans *trans) srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); } - kfree(trans->extra_journal_entries.data); - if (trans->fs_usage_deltas) { if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) == REPLICAS_DELTA_LIST_MAX) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 336350bd9048..abdf4fd10b6a 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -726,15 +726,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, goto fatal_err; } - if (unlikely(trans->extra_journal_entries.nr)) { - memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), - trans->extra_journal_entries.data, - trans->extra_journal_entries.nr); - - trans->journal_res.offset += trans->extra_journal_entries.nr; - trans->journal_res.u64s -= trans->extra_journal_entries.nr; - } - if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { struct journal *j = &c->journal; struct jset_entry *entry; @@ -772,6 +763,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, bkey_copy((struct bkey_i *) entry->start, &wb->k); } + memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), + trans->journal_entries, + trans->journal_entries_u64s); + + trans->journal_res.offset += trans->journal_entries_u64s; + trans->journal_res.u64s -= trans->journal_entries_u64s; + if (trans->journal_seq) *trans->journal_seq = trans->journal_res.seq; } @@ -1036,7 +1034,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!trans->nr_updates && !trans->nr_wb_updates && - !trans->extra_journal_entries.nr) + !trans->journal_entries_u64s) goto out_reset; ret = bch2_trans_commit_run_triggers(trans); @@ -1088,7 +1086,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - trans->journal_u64s = trans->extra_journal_entries.nr; + trans->journal_u64s = trans->journal_entries_u64s; trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 78de9569ff14..40e5a004e5c1 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -410,7 +410,9 @@ struct btree_trans { * extent: */ unsigned extra_journal_res; - unsigned nr_max_paths; + u8 nr_max_paths; + u16 journal_entries_u64s; + u16 journal_entries_size; unsigned long paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; @@ -426,7 +428,7 @@ struct btree_trans { /* update path: */ struct btree_trans_commit_hook *hooks; - darray_u64 extra_journal_entries; + struct jset_entry *journal_entries; struct journal_entry_pin *journal_pin; struct journal_res journal_res; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 3642e2629716..33497e71b3d4 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -545,6 +545,34 @@ static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, return bch2_btree_insert_trans(trans, btree, n, 0); } +struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) +{ + unsigned new_top = trans->journal_entries_u64s + u64s; + unsigned old_size = trans->journal_entries_size; + + if (new_top > trans->journal_entries_size) { + trans->journal_entries_size = roundup_pow_of_two(new_top); + + struct btree_transaction_stats *s = btree_trans_stats(trans); + if (s) + s->journal_entries_size = trans->journal_entries_size; + } + + struct jset_entry *n = + bch2_trans_kmalloc_nomemzero(trans, + trans->journal_entries_size * sizeof(u64)); + if (IS_ERR(n)) + return ERR_CAST(n); + + if (trans->journal_entries) + memcpy(n, trans->journal_entries, old_size * sizeof(u64)); + trans->journal_entries = n; + + struct jset_entry *e = btree_trans_journal_entries_top(trans); + trans->journal_entries_u64s = new_top; + return e; +} + int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) @@ -823,41 +851,17 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, return bch2_trans_update_buffered(trans, btree, &k); } -__printf(2, 0) -static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) +static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s) { - struct printbuf buf = PRINTBUF; - struct jset_entry_log *l; - unsigned u64s; - int ret; - - prt_vprintf(&buf, fmt, args); - ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); + int ret = PTR_ERR_OR_ZERO(e); if (ret) - goto err; + return ret; - u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); - - ret = darray_make_room(entries, jset_u64s(u64s)); - if (ret) - goto err; - - l = (void *) &darray_top(*entries); - l->entry.u64s = cpu_to_le16(u64s); - l->entry.btree_id = 0; - l->entry.level = 1; - l->entry.type = BCH_JSET_ENTRY_log; - l->entry.pad[0] = 0; - l->entry.pad[1] = 0; - l->entry.pad[2] = 0; - memcpy(l->d, buf.buf, buf.pos); - while (buf.pos & 7) - l->d[buf.pos++] = '\0'; - - entries->nr += jset_u64s(u64s); -err: - printbuf_exit(&buf); - return ret; + struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry); + journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s); + memcpy(l->d, buf->buf, buf->pos); + return 0; } __printf(3, 0) @@ -865,16 +869,32 @@ static int __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, va_list args) { - int ret; + struct printbuf buf = PRINTBUF; + prt_vprintf(&buf, fmt, args); + + unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); + prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos); + + int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; + if (ret) + goto err; if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { - ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); + ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); + if (ret) + goto err; + + struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); + journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); + memcpy(l->d, buf.buf, buf.pos); + c->journal.early_journal_entries.nr += jset_u64s(u64s); } else { ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|commit_flags, - __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); + __bch2_trans_log_msg(trans, &buf, u64s)); } - +err: + printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index fa19f3212b05..1f2a77bfe461 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -101,11 +101,28 @@ int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, struct bkey_i *, enum btree_update_flags); -int __must_check bch2_trans_update_seq(struct btree_trans *, u64, struct btree_iter *, - struct bkey_i *, enum btree_update_flags); int __must_check bch2_trans_update_buffered(struct btree_trans *, enum btree_id, struct bkey_i *); +struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned); + +static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans) +{ + return (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); +} + +static inline struct jset_entry * +bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) +{ + if (!trans->journal_entries || + trans->journal_entries_u64s + u64s > trans->journal_entries_size) + return __bch2_trans_jset_entry_alloc(trans, u64s); + + struct jset_entry *e = btree_trans_journal_entries_top(trans); + trans->journal_entries_u64s += u64s; + return e; +} + void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); int __bch2_trans_commit(struct btree_trans *, unsigned); @@ -171,10 +188,10 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans) trans->extra_journal_res = 0; trans->nr_updates = 0; + trans->journal_entries_u64s = 0; trans->nr_wb_updates = 0; trans->wb_updates = NULL; trans->hooks = NULL; - trans->extra_journal_entries.nr = 0; if (trans->fs_usage_deltas) { trans->fs_usage_deltas->used = 0; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6858fca7abe1..b3f1f7c9a5f4 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -557,16 +557,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, struct btree_update *as) { struct bkey_i *k; - int ret; - ret = darray_make_room(&trans->extra_journal_entries, as->journal_u64s); + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, as->journal_u64s); + int ret = PTR_ERR_OR_ZERO(e); if (ret) return ret; - memcpy(&darray_top(trans->extra_journal_entries), - as->journal_entries, - as->journal_u64s * sizeof(u64)); - trans->extra_journal_entries.nr += as->journal_u64s; + memcpy(e, as->journal_entries, as->journal_u64s * sizeof(u64)); trans->journal_pin = &as->journal; @@ -2188,16 +2185,16 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, } else { BUG_ON(btree_node_root(c, b) != b); - ret = darray_make_room(&trans->extra_journal_entries, + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(new_key->k.u64s)); + ret = PTR_ERR_OR_ZERO(e); if (ret) return ret; - journal_entry_set((void *) &darray_top(trans->extra_journal_entries), + journal_entry_set(e, BCH_JSET_ENTRY_btree_root, b->c.btree_id, b->c.level, new_key, new_key->k.u64s); - trans->extra_journal_entries.nr += jset_u64s(new_key->k.u64s); } ret = bch2_trans_commit(trans, NULL, NULL, commit_flags); From f33600057f50d3dea5cb3bda05c21ecce7125282 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 17:44:04 -0500 Subject: [PATCH 101/226] bcachefs: bch2_trans_node_add no longer uses trans_for_each_path() In the future we'll be making trans->paths resizable and potentially having _many_ more paths (for fsck); we need to start fixing algorithms that walk each path in a transaction where possible. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 20 ++++++++++++++------ fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/btree_update_interior.c | 10 +++++----- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 2b0dc0ebbba3..6d6963858a83 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -674,14 +674,22 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str * A btree node is being replaced - update the iterator to point to the new * node: */ -void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) +void bch2_trans_node_add(struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { - struct btree_path *path; + struct btree_path *prev; - trans_for_each_path(trans, path) - if (path->uptodate == BTREE_ITER_UPTODATE && - !path->cached && - btree_path_pos_in_node(path, b)) { + BUG_ON(!btree_path_pos_in_node(path, b)); + + while ((prev = prev_btree_path(trans, path)) && + btree_path_pos_in_node(prev, b)) + path = prev; + + for (; + path && btree_path_pos_in_node(path, b); + path = next_btree_path(trans, path)) + if (path->uptodate == BTREE_ITER_UPTODATE && !path->cached) { enum btree_node_locked_type t = btree_lock_want(path, b->c.level); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 5be3c1a2c820..f7d5f8403940 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -322,7 +322,7 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans, void bch2_trans_downgrade(struct btree_trans *); -void bch2_trans_node_add(struct btree_trans *trans, struct btree *); +void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *); void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b3f1f7c9a5f4..cffc66cd481f 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1601,10 +1601,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_node_free_inmem(trans, path, b); if (n3) - bch2_trans_node_add(trans, n3); + bch2_trans_node_add(trans, path, n3); if (n2) - bch2_trans_node_add(trans, n2); - bch2_trans_node_add(trans, n1); + bch2_trans_node_add(trans, path2, n2); + bch2_trans_node_add(trans, path1, n1); if (n3) six_unlock_intent(&n3->c.lock); @@ -1913,7 +1913,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_node_free_inmem(trans, path, b); bch2_btree_node_free_inmem(trans, sib_path, m); - bch2_trans_node_add(trans, n); + bch2_trans_node_add(trans, path, n); bch2_trans_verify_paths(trans); @@ -1985,7 +1985,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, bch2_btree_node_free_inmem(trans, iter->path, b); - bch2_trans_node_add(trans, n); + bch2_trans_node_add(trans, iter->path, n); six_unlock_intent(&n->c.lock); bch2_btree_update_done(as, trans); From 0ba9375a111a88e47733b679f6affb7f6492de4c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 7 Nov 2023 18:08:38 -0500 Subject: [PATCH 102/226] bcachefs: Unwritten journal buffers are always dirty Ensure that journal bufs that haven't been written can't be reclaimed from the journal pin fifo, and can thus have new pins taken. Prep work for changing the btree write buffer to pull keys from the journal directly. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index bd33a7c9634c..dc415e0ec493 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -303,6 +303,7 @@ void bch2_journal_reclaim_fast(struct journal *j) * all btree nodes got written out */ while (!fifo_empty(&j->pin) && + j->pin.front <= j->seq_ondisk && !atomic_read(&fifo_peek_front(&j->pin).count)) { j->pin.front++; popped = true; From b05c0e9370bec71c62df690250f451e58e8ed2a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 21:06:52 -0400 Subject: [PATCH 103/226] bcachefs: journal->buf_lock Add a new lock for synchronizing between journal IO path and btree write buffer flush. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 1 + fs/bcachefs/journal_io.c | 2 ++ fs/bcachefs/journal_types.h | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index fb983061af4a..6d56a71243bd 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1219,6 +1219,7 @@ int bch2_fs_journal_init(struct journal *j) static struct lock_class_key res_key; unsigned i; + mutex_init(&j->buf_lock); spin_lock_init(&j->lock); spin_lock_init(&j->err_lock); init_waitqueue_head(&j->wait); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index f2e038116b0f..afcb2a435956 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1885,9 +1885,11 @@ CLOSURE_CALLBACK(bch2_journal_write) if (ret) goto err; + mutex_lock(&j->buf_lock); journal_buf_realloc(j, w); ret = bch2_journal_write_prep(j, w); + mutex_unlock(&j->buf_lock); if (ret) goto err; diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 4ffae252e01e..5c8d3a8ec4df 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -181,6 +181,12 @@ struct journal { */ darray_u64 early_journal_entries; + /* + * Protects journal_buf->data, when accessing without a jorunal + * reservation: for synchronization between the btree write buffer code + * and the journal write path: + */ + struct mutex buf_lock; /* * Two journal entries -- one is currently open for new entries, the * other is possibly being written out. From 09caeabe1a5daa3b667b797c401d43206d583f23 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 18:57:19 -0400 Subject: [PATCH 104/226] bcachefs: btree write buffer now slurps keys from journal Previosuly, the transaction commit path would have to add keys to the btree write buffer as a separate operation, requiring additional global synchronization. This patch introduces a new journal entry type, which indicates that the keys need to be copied into the btree write buffer prior to being written out. We switch the journal entry type back to JSET_ENTRY_btree_keys prior to write, so this is not an on disk format change. Flushing the btree write buffer may require pulling keys out of journal entries yet to be written, and quiescing outstanding journal reservations; we previously added journal->buf_lock for synchronization with the journal write path. We also can't put strict bounds on the number of keys in the journal destined for the write buffer, which means we might overflow the size of the preallocated buffer and have to reallocate - this introduces a potentially fatal memory allocation failure. This is something we'll have to watch for, if it becomes an issue in practice we can do additional mitigation. The transaction commit path no longer has to explicitly check if the write buffer is full and wait on flushing; this is another performance optimization. Instead, when the btree write buffer is close to full we change the journal watermark, so that only reservations for journal reclaim are allowed. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 11 + fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/btree_trans_commit.c | 52 +-- fs/bcachefs/btree_write_buffer.c | 442 +++++++++++++++++-------- fs/bcachefs/btree_write_buffer.h | 52 ++- fs/bcachefs/btree_write_buffer_types.h | 63 ++-- fs/bcachefs/ec.c | 2 +- fs/bcachefs/errcode.h | 1 - fs/bcachefs/journal.c | 44 +++ fs/bcachefs/journal.h | 1 + fs/bcachefs/journal_io.c | 58 +++- fs/bcachefs/journal_reclaim.c | 12 +- fs/bcachefs/journal_reclaim.h | 1 + fs/bcachefs/journal_types.h | 2 + fs/bcachefs/opts.h | 5 - fs/bcachefs/super.c | 3 +- 16 files changed, 520 insertions(+), 232 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index ce66894e6537..eeb2787e6f79 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -427,6 +427,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_max_in_flight) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ + x(blocked_write_buffer_full) \ x(nocow_lock_contended) enum bch_time_stats { @@ -1122,6 +1123,16 @@ static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref) #endif } +static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref) +{ +#ifdef BCH_WRITE_REF_DEBUG + return !test_bit(BCH_FS_going_ro, &c->flags) && + atomic_long_inc_not_zero(&c->writes[ref]); +#else + return percpu_ref_tryget(&c->writes); +#endif +} + static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref) { #ifdef BCH_WRITE_REF_DEBUG diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 545df77bcd46..3d9393eecb93 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -2162,7 +2162,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(clock, 7) \ x(dev_usage, 8) \ x(log, 9) \ - x(overwrite, 10) + x(overwrite, 10) \ + x(write_buffer_keys, 11) enum { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index abdf4fd10b6a..e52386fdc7ec 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -659,10 +659,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, i->k->k.needs_whiteout = false; } - if (trans->nr_wb_updates && - trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size) - return -BCH_ERR_btree_insert_need_flush_buffer; - /* * Don't get journal reservation until after we know insert will * succeed: @@ -697,14 +693,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) return -BCH_ERR_btree_insert_need_mark_replicas; - if (trans->nr_wb_updates) { - EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res); - - ret = bch2_btree_insert_keys_write_buffer(trans); - if (ret) - goto revert_fs_usage; - } - h = trans->hooks; while (h) { ret = h->fn(trans, h); @@ -757,7 +745,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, trans_for_each_wb_update(trans, wb) { entry = bch2_journal_add_entry(j, &trans->journal_res, - BCH_JSET_ENTRY_btree_keys, + BCH_JSET_ENTRY_write_buffer_keys, wb->btree, 0, wb->k.k.u64s); bkey_copy((struct bkey_i *) entry->start, &wb->k); @@ -948,30 +936,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, ret = bch2_trans_relock(trans); break; - case -BCH_ERR_btree_insert_need_flush_buffer: { - struct btree_write_buffer *wb = &c->btree_write_buffer; - - ret = 0; - - if (wb->state.nr > wb->size * 3 / 4) { - bch2_trans_unlock(trans); - mutex_lock(&wb->flush_lock); - - if (wb->state.nr > wb->size * 3 / 4) { - bch2_trans_begin(trans); - ret = bch2_btree_write_buffer_flush_locked(trans); - mutex_unlock(&wb->flush_lock); - if (!ret) { - trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); - } - } else { - mutex_unlock(&wb->flush_lock); - ret = bch2_trans_relock(trans); - } - } - break; - } default: BUG_ON(ret >= 0); break; @@ -1070,20 +1034,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; } - if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 && - mutex_trylock(&c->btree_write_buffer.flush_lock)) { - bch2_trans_begin(trans); - bch2_trans_unlock(trans); - - ret = bch2_btree_write_buffer_flush_locked(trans); - mutex_unlock(&c->btree_write_buffer.flush_lock); - if (!ret) { - trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); - } - goto out; - } - EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); trans->journal_u64s = trans->journal_entries_u64s; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 6ab26576252c..b19aeea5a80b 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -7,30 +7,25 @@ #include "btree_write_buffer.h" #include "error.h" #include "journal.h" +#include "journal_io.h" #include "journal_reclaim.h" +#include #include static int bch2_btree_write_buffer_journal_flush(struct journal *, struct journal_entry_pin *, u64); -static int btree_write_buffered_key_cmp(const void *_l, const void *_r) +static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *); + +static inline int wb_key_cmp(const void *_l, const void *_r) { - const struct btree_write_buffered_key *l = _l; - const struct btree_write_buffered_key *r = _r; + const struct wb_key_ref *l = _l; + const struct wb_key_ref *r = _r; - return cmp_int(l->btree, r->btree) ?: - bpos_cmp(l->k.k.p, r->k.k.p) ?: - cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset); -} - -static int btree_write_buffered_journal_cmp(const void *_l, const void *_r) -{ - const struct btree_write_buffered_key *l = _l; - const struct btree_write_buffered_key *r = _r; - - return cmp_int(l->journal_seq, r->journal_seq); + return cmp_int(l->hi, r->hi) ?: + cmp_int(l->mi, r->mi) ?: + cmp_int(l->lo, r->lo); } static noinline int wb_flush_one_slowpath(struct btree_trans *trans, @@ -59,6 +54,9 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite int ret; EBUG_ON(!wb->journal_seq); + EBUG_ON(!c->btree_write_buffer.flushing.pin.seq); + EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); + ret = bch2_btree_iter_traverse(iter); if (ret) return ret; @@ -91,26 +89,6 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite return 0; } -static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb) -{ - union btree_write_buffer_state old, new; - u64 v = READ_ONCE(wb->state.v); - - do { - old.v = new.v = v; - - new.nr = 0; - new.idx++; - } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v); - - while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1) - cpu_relax(); - - smp_mb(); - - return old; -} - /* * Update a btree with a write buffered key using the journal seq of the * original write buffer insert. @@ -140,28 +118,79 @@ btree_write_buffered_insert(struct btree_trans *trans, return ret; } -int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) +static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb) +{ + struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer); + struct journal *j = &c->journal; + + if (!wb->inc.keys.nr) + return; + + bch2_journal_pin_add(j, wb->inc.keys.data[0].journal_seq, &wb->flushing.pin, + bch2_btree_write_buffer_journal_flush); + + darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr)); + darray_resize(&wb->sorted, wb->flushing.keys.size); + + if (!wb->flushing.keys.nr && wb->sorted.size >= wb->inc.keys.nr) { + swap(wb->flushing.keys, wb->inc.keys); + goto out; + } + + size_t nr = min(darray_room(wb->flushing.keys), + wb->sorted.size - wb->flushing.keys.nr); + nr = min(nr, wb->inc.keys.nr); + + memcpy(&darray_top(wb->flushing.keys), + wb->inc.keys.data, + sizeof(wb->inc.keys.data[0]) * nr); + + memmove(wb->inc.keys.data, + wb->inc.keys.data + nr, + sizeof(wb->inc.keys.data[0]) * (wb->inc.keys.nr - nr)); + + wb->flushing.keys.nr += nr; + wb->inc.keys.nr -= nr; +out: + if (!wb->inc.keys.nr) + bch2_journal_pin_drop(j, &wb->inc.pin); + else + bch2_journal_pin_update(j, wb->inc.keys.data[0].journal_seq, &wb->inc.pin, + bch2_btree_write_buffer_journal_flush); + + if (j->watermark) { + spin_lock(&j->lock); + bch2_journal_set_watermark(j); + spin_unlock(&j->lock); + } + + BUG_ON(wb->sorted.size < wb->flushing.keys.nr); +} + +static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree_write_buffer *wb = &c->btree_write_buffer; - struct journal_entry_pin pin; - struct btree_write_buffered_key *i, *keys; + struct wb_key_ref *i; struct btree_iter iter = { NULL }; - size_t nr = 0, skipped = 0, fast = 0, slowpath = 0; + size_t skipped = 0, fast = 0, slowpath = 0; bool write_locked = false; - union btree_write_buffer_state s; int ret = 0; - memset(&pin, 0, sizeof(pin)); + bch2_trans_unlock(trans); + bch2_trans_begin(trans); - bch2_journal_pin_copy(j, &pin, &wb->journal_pin, - bch2_btree_write_buffer_journal_flush); - bch2_journal_pin_drop(j, &wb->journal_pin); + mutex_lock(&wb->inc.lock); + move_keys_from_inc_to_flushing(wb); + mutex_unlock(&wb->inc.lock); - s = btree_write_buffer_switch(wb); - keys = wb->keys[s.idx]; - nr = s.nr; + for (size_t i = 0; i < wb->flushing.keys.nr; i++) { + wb->sorted.data[i].idx = i; + wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree; + wb->sorted.data[i].pos = wb->flushing.keys.data[i].k.k.p; + } + wb->sorted.nr = wb->flushing.keys.nr; /* * We first sort so that we can detect and skip redundant updates, and @@ -177,32 +206,46 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) * If that happens, simply skip the key so we can optimistically insert * as many keys as possible in the fast path. */ - sort(keys, nr, sizeof(keys[0]), - btree_write_buffered_key_cmp, NULL); + sort(wb->sorted.data, wb->sorted.nr, + sizeof(wb->sorted.data[0]), + wb_key_cmp, NULL); - for (i = keys; i < keys + nr; i++) { - if (i + 1 < keys + nr && + darray_for_each(wb->sorted, i) { + struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; + + for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) + prefetch(&wb->flushing.keys.data[n->idx]); + + BUG_ON(!k->journal_seq); + + if (i + 1 < &darray_top(wb->sorted) && i[0].btree == i[1].btree && - bpos_eq(i[0].k.k.p, i[1].k.k.p)) { + bpos_eq(i[0].pos, i[1].pos)) { + struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx]; + skipped++; - i->journal_seq = 0; + n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq); + k->journal_seq = 0; continue; } - if (write_locked && - (iter.path->btree_id != i->btree || - bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) { - bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b); - write_locked = false; + if (write_locked) { + struct btree_path *path = iter.path; + + if (path->btree_id != i->btree || + bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) { + bch2_btree_node_unlock_write(trans, path, path->l[0].b); + write_locked = false; + } } - if (!iter.path || iter.path->btree_id != i->btree) { + if (!iter.path || iter.btree_id != k->btree) { bch2_trans_iter_exit(trans, &iter); - bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p, + bch2_trans_iter_init(trans, &iter, k->btree, k->k.k.p, BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS); } - bch2_btree_iter_set_pos(&iter, i->k.k.p); + bch2_btree_iter_set_pos(&iter, k->k.k.p); iter.path->preserve = false; do { @@ -211,13 +254,13 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) break; } - ret = wb_flush_one(trans, &iter, i, &write_locked, &fast); + ret = wb_flush_one(trans, &iter, k, &write_locked, &fast); if (!write_locked) bch2_trans_begin(trans); } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); if (!ret) { - i->journal_seq = 0; + k->journal_seq = 0; } else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) { slowpath++; ret = 0; @@ -239,18 +282,17 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) * The fastpath zapped the seq of keys that were successfully flushed so * we can skip those here. */ - trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); + trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr); - sort(keys, nr, sizeof(keys[0]), - btree_write_buffered_journal_cmp, - NULL); - - for (i = keys; i < keys + nr; i++) { + struct btree_write_buffered_key *i; + darray_for_each(wb->flushing.keys, i) { if (!i->journal_seq) continue; - bch2_journal_pin_update(j, i->journal_seq, &pin, - bch2_btree_write_buffer_journal_flush); + bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin, + bch2_btree_write_buffer_journal_flush); + + bch2_trans_begin(trans); ret = commit_do(trans, NULL, NULL, BCH_WATERMARK_reclaim| @@ -265,26 +307,67 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) } err: bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); - trace_write_buffer_flush(trans, nr, skipped, fast, wb->size); - bch2_journal_pin_drop(j, &pin); + trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0); + bch2_journal_pin_drop(j, &wb->flushing.pin); + wb->flushing.keys.nr = 0; return ret; } +static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq) +{ + struct journal *j = &c->journal; + struct journal_buf *buf; + int ret = 0; + + while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, seq))) { + ret = bch2_journal_keys_to_write_buffer(c, buf); + mutex_unlock(&j->buf_lock); + } + + return ret; +} + +static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq) +{ + struct bch_fs *c = trans->c; + struct btree_write_buffer *wb = &c->btree_write_buffer; + int ret = 0, fetch_from_journal_err; + + do { + bch2_trans_unlock(trans); + + fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); + + /* + * On memory allocation failure, bch2_btree_write_buffer_flush_locked() + * is not guaranteed to empty wb->inc: + */ + mutex_lock(&wb->flushing.lock); + ret = bch2_btree_write_buffer_flush_locked(trans); + mutex_unlock(&wb->flushing.lock); + } while (!ret && + (fetch_from_journal_err || + (wb->inc.pin.seq && wb->inc.pin.seq <= seq) || + (wb->flushing.pin.seq && wb->flushing.pin.seq <= seq))); + + return ret; +} + +static int bch2_btree_write_buffer_journal_flush(struct journal *j, + struct journal_entry_pin *_pin, u64 seq) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq)); +} + int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) { struct bch_fs *c = trans->c; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer)) - return -BCH_ERR_erofs_no_writes; - trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); - bch2_trans_unlock(trans); - mutex_lock(&c->btree_write_buffer.flush_lock); - int ret = bch2_btree_write_buffer_flush_locked(trans); - mutex_unlock(&c->btree_write_buffer.flush_lock); - bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); - return ret; + return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal)); } int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) @@ -293,9 +376,9 @@ int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) struct btree_write_buffer *wb = &c->btree_write_buffer; int ret = 0; - if (mutex_trylock(&wb->flush_lock)) { + if (mutex_trylock(&wb->flushing.lock)) { ret = bch2_btree_write_buffer_flush_locked(trans); - mutex_unlock(&wb->flush_lock); + mutex_unlock(&wb->flushing.lock); } return ret; @@ -313,90 +396,179 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) return ret; } -static int bch2_btree_write_buffer_journal_flush(struct journal *j, - struct journal_entry_pin *_pin, u64 seq) +static void bch2_btree_write_buffer_flush_work(struct work_struct *work) { - struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work); struct btree_write_buffer *wb = &c->btree_write_buffer; + int ret; - mutex_lock(&wb->flush_lock); - int ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans)); - mutex_unlock(&wb->flush_lock); + mutex_lock(&wb->flushing.lock); + do { + ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans)); + } while (!ret && bch2_btree_write_buffer_should_flush(c)); + mutex_unlock(&wb->flushing.lock); - return ret; + bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); } -static inline u64 btree_write_buffer_ref(int idx) +int __bch2_journal_key_to_wb(struct bch_fs *c, + struct journal_keys_to_wb *dst, + enum btree_id btree, struct bkey_i *k) { - return ((union btree_write_buffer_state) { - .ref0 = idx == 0, - .ref1 = idx == 1, - }).v; -} - -int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans) -{ - struct bch_fs *c = trans->c; struct btree_write_buffer *wb = &c->btree_write_buffer; - struct btree_write_buffered_key *i; - union btree_write_buffer_state old, new; - int ret = 0; - u64 v; + int ret; +retry: + ret = darray_make_room_gfp(&dst->wb->keys, 1, GFP_KERNEL); + if (!ret && dst->wb == &wb->flushing) + ret = darray_resize(&wb->sorted, wb->flushing.keys.size); - trans_for_each_wb_update(trans, i) { - EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); + if (unlikely(ret)) { + if (dst->wb == &c->btree_write_buffer.flushing) { + mutex_unlock(&dst->wb->lock); + dst->wb = &c->btree_write_buffer.inc; + bch2_journal_pin_add(&c->journal, dst->seq, &dst->wb->pin, + bch2_btree_write_buffer_journal_flush); + goto retry; + } - i->journal_seq = trans->journal_res.seq; - i->journal_offset = trans->journal_res.offset; + return ret; } - preempt_disable(); - v = READ_ONCE(wb->state.v); - do { - old.v = new.v = v; + dst->room = darray_room(dst->wb->keys); + if (dst->wb == &wb->flushing) + dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr); + BUG_ON(!dst->room); + BUG_ON(!dst->seq); - new.v += btree_write_buffer_ref(new.idx); - new.nr += trans->nr_wb_updates; - if (new.nr > wb->size) { - ret = -BCH_ERR_btree_insert_need_flush_buffer; - goto out; + struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys); + wb_k->journal_seq = dst->seq; + wb_k->btree = btree; + bkey_copy(&wb_k->k, k); + dst->wb->keys.nr++; + dst->room--; + return 0; +} + +void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; + + if (mutex_trylock(&wb->flushing.lock)) { + mutex_lock(&wb->inc.lock); + move_keys_from_inc_to_flushing(wb); + + /* + * Attempt to skip wb->inc, and add keys directly to + * wb->flushing, saving us a copy later: + */ + + if (!wb->inc.keys.nr) { + dst->wb = &wb->flushing; + } else { + mutex_unlock(&wb->flushing.lock); + dst->wb = &wb->inc; } - } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v); + } else { + mutex_lock(&wb->inc.lock); + dst->wb = &wb->inc; + } - memcpy(wb->keys[new.idx] + old.nr, - trans->wb_updates, - sizeof(trans->wb_updates[0]) * trans->nr_wb_updates); + dst->room = darray_room(dst->wb->keys); + if (dst->wb == &wb->flushing) + dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr); + dst->seq = seq; - bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin, + bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin, bch2_btree_write_buffer_journal_flush); +} - atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter); +void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; + + if (!dst->wb->keys.nr) + bch2_journal_pin_drop(&c->journal, &dst->wb->pin); + + if (bch2_btree_write_buffer_should_flush(c) && + __bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) && + !queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work)) + bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); + + if (dst->wb == &wb->flushing) + mutex_unlock(&wb->flushing.lock); + mutex_unlock(&wb->inc.lock); +} + +static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) +{ + struct journal_keys_to_wb dst; + struct jset_entry *entry; + struct bkey_i *k; + int ret = 0; + + bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); + + for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { + jset_entry_for_each_key(entry, k) { + ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); + if (ret) + goto out; + } + + entry->type = BCH_JSET_ENTRY_btree_keys; + } + + buf->need_flush_to_write_buffer = false; out: - preempt_enable(); + bch2_journal_keys_to_write_buffer_end(c, &dst); return ret; } +static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size) +{ + if (wb->keys.size >= new_size) + return 0; + + if (!mutex_trylock(&wb->lock)) + return -EINTR; + + int ret = darray_resize(&wb->keys, new_size); + mutex_unlock(&wb->lock); + return ret; +} + +int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; + + return wb_keys_resize(&wb->flushing, new_size) ?: + wb_keys_resize(&wb->inc, new_size); +} + void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) { struct btree_write_buffer *wb = &c->btree_write_buffer; - BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal)); + BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) && + !bch2_journal_error(&c->journal)); - kvfree(wb->keys[1]); - kvfree(wb->keys[0]); + darray_exit(&wb->sorted); + darray_exit(&wb->flushing.keys); + darray_exit(&wb->inc.keys); } int bch2_fs_btree_write_buffer_init(struct bch_fs *c) { struct btree_write_buffer *wb = &c->btree_write_buffer; - mutex_init(&wb->flush_lock); - wb->size = c->opts.btree_write_buffer_size; + mutex_init(&wb->inc.lock); + mutex_init(&wb->flushing.lock); + INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work); - wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL); - wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL); - if (!wb->keys[0] || !wb->keys[1]) - return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init; + /* Will be resized by journal as needed: */ + unsigned initial_size = 1 << 16; - return 0; + return darray_make_room(&wb->inc.keys, initial_size) ?: + darray_make_room(&wb->flushing.keys, initial_size) ?: + darray_make_room(&wb->sorted, initial_size); } diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index dec2c9a8bab2..1f645f529ed2 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -2,13 +2,59 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H #define _BCACHEFS_BTREE_WRITE_BUFFER_H -int bch2_btree_write_buffer_flush_locked(struct btree_trans *); -int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); +#include "bkey.h" + +static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; + + return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4; +} + +static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; + + return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4; +} + +struct btree_trans; int bch2_btree_write_buffer_flush_sync(struct btree_trans *); +int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_tryflush(struct btree_trans *); -int bch2_btree_insert_keys_write_buffer(struct btree_trans *); +struct journal_keys_to_wb { + struct btree_write_buffer_keys *wb; + size_t room; + u64 seq; +}; +int __bch2_journal_key_to_wb(struct bch_fs *, + struct journal_keys_to_wb *, + enum btree_id, struct bkey_i *); + +static inline int bch2_journal_key_to_wb(struct bch_fs *c, + struct journal_keys_to_wb *dst, + enum btree_id btree, struct bkey_i *k) +{ + EBUG_ON(!dst->seq); + + if (unlikely(!dst->room)) + return __bch2_journal_key_to_wb(c, dst, btree, k); + + struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys); + wb_k->journal_seq = dst->seq; + wb_k->btree = btree; + bkey_copy(&wb_k->k, k); + dst->wb->keys.nr++; + dst->room--; + return 0; +} + +void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64); +void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *); + +int bch2_btree_write_buffer_resize(struct bch_fs *, size_t); void bch2_fs_btree_write_buffer_exit(struct bch_fs *); int bch2_fs_btree_write_buffer_init(struct bch_fs *); diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h index 99993ba77aea..8758d6adabf4 100644 --- a/fs/bcachefs/btree_write_buffer_types.h +++ b/fs/bcachefs/btree_write_buffer_types.h @@ -2,43 +2,56 @@ #ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H #define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H +#include "darray.h" #include "journal_types.h" #define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4 #define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX) +struct wb_key_ref { +union { + struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + unsigned idx:24; + struct bpos pos; + enum btree_id btree:8; +#else + enum btree_id btree:8; + struct bpos pos; + unsigned idx:24; +#endif + } __packed; + struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + u64 lo; + u64 mi; + u64 hi; +#else + u64 hi; + u64 mi; + u64 lo; +#endif + }; +}; +}; + struct btree_write_buffered_key { - u64 journal_seq; - unsigned journal_offset; - enum btree_id btree; + enum btree_id btree:8; + u64 journal_seq:56; __BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX); }; -union btree_write_buffer_state { - struct { - atomic64_t counter; - }; - - struct { - u64 v; - }; - - struct { - u64 nr:23; - u64 idx:1; - u64 ref0:20; - u64 ref1:20; - }; +struct btree_write_buffer_keys { + DARRAY(struct btree_write_buffered_key) keys; + struct journal_entry_pin pin; + struct mutex lock; }; struct btree_write_buffer { - struct mutex flush_lock; - struct journal_entry_pin journal_pin; - - union btree_write_buffer_state state; - size_t size; - - struct btree_write_buffered_key *keys[2]; + DARRAY(struct wb_key_ref) sorted; + struct btree_write_buffer_keys inc; + struct btree_write_buffer_keys flushing; + struct work_struct flush_work; }; #endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 1a3303c65961..76163c2ea3b3 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1005,7 +1005,7 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) unsigned i, nr_data = v->nr_blocks - v->nr_redundant; int ret = 0; - ret = bch2_btree_write_buffer_flush_nocheck_rw(trans); + ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) goto err; diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ac6201403fbe..c39bea441983 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -151,7 +151,6 @@ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ - x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \ x(0, backpointer_to_overwritten_btree_node) \ x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 6d56a71243bd..8294d7fd6632 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -10,6 +10,7 @@ #include "bkey_methods.h" #include "btree_gc.h" #include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "error.h" #include "journal.h" @@ -332,6 +333,7 @@ static int journal_entry_open(struct journal *j) buf->must_flush = false; buf->separate_flush = false; buf->flush_time = 0; + buf->need_flush_to_write_buffer = true; memset(buf->data, 0, sizeof(*buf->data)); buf->data->seq = cpu_to_le64(journal_cur_seq(j)); @@ -768,6 +770,48 @@ void bch2_journal_block(struct journal *j) journal_quiesce(j); } +static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) +{ + struct journal_buf *ret = NULL; + + mutex_lock(&j->buf_lock); + spin_lock(&j->lock); + max_seq = min(max_seq, journal_cur_seq(j)); + + for (u64 seq = journal_last_unwritten_seq(j); + seq <= max_seq; + seq++) { + unsigned idx = seq & JOURNAL_BUF_MASK; + struct journal_buf *buf = j->buf + idx; + + if (buf->need_flush_to_write_buffer) { + if (seq == journal_cur_seq(j)) + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); + + union journal_res_state s; + s.v = atomic64_read_acquire(&j->reservations.counter); + + ret = journal_state_count(s, idx) + ? ERR_PTR(-EAGAIN) + : buf; + break; + } + } + + spin_unlock(&j->lock); + if (IS_ERR_OR_NULL(ret)) + mutex_unlock(&j->buf_lock); + return ret; +} + +struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) +{ + struct journal_buf *ret; + + wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN)); + return ret; +} + /* allocate journal on a device: */ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index a1384433d193..1e14e6b324f8 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -425,6 +425,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j) void bch2_journal_unblock(struct journal *); void bch2_journal_block(struct journal *); +struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq); void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index afcb2a435956..4d8e10c901a8 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -4,6 +4,7 @@ #include "alloc_foreground.h" #include "btree_io.h" #include "btree_update_interior.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "checksum.h" #include "disk_groups.h" @@ -723,6 +724,22 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs journal_entry_btree_keys_to_text(out, c, entry); } +static int journal_entry_write_buffer_keys_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, + enum bkey_invalid_flags flags) +{ + return journal_entry_btree_keys_validate(c, jset, entry, + version, big_endian, READ); +} + +static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c, + struct jset_entry *entry) +{ + journal_entry_btree_keys_to_text(out, c, entry); +} + struct jset_entry_ops { int (*validate)(struct bch_fs *, struct jset *, struct jset_entry *, unsigned, int, @@ -1503,6 +1520,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) { + struct bch_fs *c = container_of(j, struct bch_fs, journal); + /* we aren't holding j->lock: */ unsigned new_size = READ_ONCE(j->buf_size_want); void *new_buf; @@ -1510,6 +1529,11 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) if (buf->buf_size >= new_size) return; + size_t btree_write_buffer_size = new_size / 64; + + if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size)) + return; + new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN); if (!new_buf) return; @@ -1703,9 +1727,11 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct jset_entry *start, *end, *i, *next, *prev = NULL; struct jset *jset = w->data; + struct journal_keys_to_wb wb = { NULL }; unsigned sectors, bytes, u64s; - bool validate_before_checksum = false; unsigned long btree_roots_have = 0; + bool validate_before_checksum = false; + u64 seq = le64_to_cpu(jset->seq); int ret; /* @@ -1733,9 +1759,28 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) * to c->btree_roots we have to get any missing btree roots and * add them to this journal entry: */ - if (i->type == BCH_JSET_ENTRY_btree_root) { + switch (i->type) { + case BCH_JSET_ENTRY_btree_root: bch2_journal_entry_to_btree_root(c, i); __set_bit(i->btree_id, &btree_roots_have); + break; + case BCH_JSET_ENTRY_write_buffer_keys: + EBUG_ON(!w->need_flush_to_write_buffer); + + if (!wb.wb) + bch2_journal_keys_to_write_buffer_start(c, &wb, seq); + + struct bkey_i *k; + jset_entry_for_each_key(i, k) { + ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k); + if (ret) { + bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer"); + bch2_journal_keys_to_write_buffer_end(c, &wb); + return ret; + } + } + i->type = BCH_JSET_ENTRY_btree_keys; + break; } /* Can we merge with previous entry? */ @@ -1758,6 +1803,10 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) memmove_u64s_down(prev, i, jset_u64s(u64s)); } + if (wb.wb) + bch2_journal_keys_to_write_buffer_end(c, &wb); + w->need_flush_to_write_buffer = false; + prev = prev ? vstruct_next(prev) : jset->start; jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); @@ -1765,8 +1814,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have); - bch2_journal_super_entries_add_common(c, &end, - le64_to_cpu(jset->seq)); + bch2_journal_super_entries_add_common(c, &end, seq); u64s = (u64 *) end - (u64 *) start; BUG_ON(u64s > j->entry_u64s_reserved); @@ -1789,7 +1837,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset)) - j->last_empty_seq = le64_to_cpu(jset->seq); + j->last_empty_seq = seq; if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) validate_before_checksum = true; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index dc415e0ec493..60b9d3572387 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" #include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "errcode.h" #include "error.h" @@ -50,20 +51,23 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, return available; } -static inline void journal_set_watermark(struct journal *j) +void bch2_journal_set_watermark(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); bool low_on_space = j->space[journal_space_clean].total * 4 <= j->space[journal_space_total].total; bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4; - unsigned watermark = low_on_space || low_on_pin + bool low_on_wb = bch2_btree_write_buffer_must_wait(c); + unsigned watermark = low_on_space || low_on_pin || low_on_wb ? BCH_WATERMARK_reclaim : BCH_WATERMARK_stripe; if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space], &j->low_on_space_start, low_on_space) || track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], - &j->low_on_pin_start, low_on_pin)) + &j->low_on_pin_start, low_on_pin) || + track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], + &j->write_buffer_full_start, low_on_wb)) trace_and_count(c, journal_full, c); swap(watermark, j->watermark); @@ -230,7 +234,7 @@ void bch2_journal_space_available(struct journal *j) else clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); - journal_set_watermark(j); + bch2_journal_set_watermark(j); out: j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; j->cur_entry_error = ret; diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h index 7b15d682a0f5..ec84c3345281 100644 --- a/fs/bcachefs/journal_reclaim.h +++ b/fs/bcachefs/journal_reclaim.h @@ -16,6 +16,7 @@ static inline void journal_reclaim_kick(struct journal *j) unsigned bch2_journal_dev_buckets_available(struct journal *, struct journal_device *, enum journal_space_from); +void bch2_journal_set_watermark(struct journal *); void bch2_journal_space_available(struct journal *); static inline bool journal_pin_active(struct journal_entry_pin *pin) diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 5c8d3a8ec4df..38817c7a0851 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -36,6 +36,7 @@ struct journal_buf { bool noflush; /* write has already been kicked off, and was noflush */ bool must_flush; /* something wants a flush */ bool separate_flush; + bool need_flush_to_write_buffer; }; /* @@ -276,6 +277,7 @@ struct journal { u64 low_on_space_start; u64 low_on_pin_start; u64 max_in_flight_start; + u64 write_buffer_full_start; struct bch2_time_stats *flush_write_time; struct bch2_time_stats *noflush_write_time; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 7f9e3001bf55..cf69b92cbd03 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -233,11 +233,6 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, true, \ NULL, "Stash pointer to in memory btree node in btree ptr")\ - x(btree_write_buffer_size, u32, \ - OPT_FS|OPT_MOUNT, \ - OPT_UINT(16, (1U << 20) - 1), \ - BCH2_NO_SB_OPT, 1U << 13, \ - NULL, "Number of btree write buffer entries") \ x(gc_reserve_percent, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(5, 21), \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index c7c7d4a11eb9..88a762bce7da 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -363,7 +363,8 @@ void bch2_fs_read_only(struct bch_fs *c) BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); BUG_ON(atomic_read(&c->btree_cache.dirty)); BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty)); - BUG_ON(c->btree_write_buffer.state.nr); + BUG_ON(c->btree_write_buffer.inc.keys.nr); + BUG_ON(c->btree_write_buffer.flushing.keys.nr); bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); From 38ced43bb04ac59b1417f520f4d5a5c3bc3d1e57 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 00:06:56 -0400 Subject: [PATCH 105/226] bcachefs: Inline btree write buffer sort The sort in the btree write buffer flush path is a very hot path, and it's particularly performance sensitive since it's single threaded and can block every other thread on a multithreaded write workload. It's well worth doing a sort with inlined cmp and swap functions. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 93 +++++++++++++++++++++++--- fs/bcachefs/btree_write_buffer_types.h | 4 +- 2 files changed, 84 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index b19aeea5a80b..ed5640c4d1a3 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -11,21 +11,95 @@ #include "journal_reclaim.h" #include -#include static int bch2_btree_write_buffer_journal_flush(struct journal *, struct journal_entry_pin *, u64); static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *); -static inline int wb_key_cmp(const void *_l, const void *_r) +static inline bool __wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) +{ + return (cmp_int(l->hi, r->hi) ?: + cmp_int(l->mi, r->mi) ?: + cmp_int(l->lo, r->lo)) >= 0; +} + +static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) +{ +#ifdef CONFIG_X86_64 + int cmp; + + asm("mov (%[l]), %%rax;" + "sub (%[r]), %%rax;" + "mov 8(%[l]), %%rax;" + "sbb 8(%[r]), %%rax;" + "mov 16(%[l]), %%rax;" + "sbb 16(%[r]), %%rax;" + : "=@ccae" (cmp) + : [l] "r" (l), [r] "r" (r) + : "rax", "cc"); + + EBUG_ON(cmp != __wb_key_cmp(l, r)); + return cmp; +#else + return __wb_key_cmp(l, r); +#endif +} + +/* Compare excluding idx, the low 24 bits: */ +static inline bool wb_key_eq(const void *_l, const void *_r) { const struct wb_key_ref *l = _l; const struct wb_key_ref *r = _r; - return cmp_int(l->hi, r->hi) ?: - cmp_int(l->mi, r->mi) ?: - cmp_int(l->lo, r->lo); + return !((l->hi ^ r->hi)| + (l->mi ^ r->mi)| + ((l->lo >> 24) ^ (r->lo >> 24))); +} + +static noinline void wb_sort(struct wb_key_ref *base, size_t num) +{ + size_t n = num, a = num / 2; + + if (!a) /* num < 2 || size == 0 */ + return; + + for (;;) { + size_t b, c, d; + + if (a) /* Building heap: sift down --a */ + --a; + else if (--n) /* Sorting: Extract root to --n */ + swap(base[0], base[n]); + else /* Sort complete */ + break; + + /* + * Sift element at "a" down into heap. This is the + * "bottom-up" variant, which significantly reduces + * calls to cmp_func(): we find the sift-down path all + * the way to the leaves (one compare per level), then + * backtrack to find where to insert the target element. + * + * Because elements tend to sift down close to the leaves, + * this uses fewer compares than doing two per level + * on the way down. (A bit more than half as many on + * average, 3/4 worst-case.) + */ + for (b = a; c = 2*b + 1, (d = c + 1) < n;) + b = wb_key_cmp(base + c, base + d) ? c : d; + if (d == n) /* Special case last leaf with no sibling */ + b = c; + + /* Now backtrack from "b" to the correct location for "a" */ + while (b != a && wb_key_cmp(base + a, base + b)) + b = (b - 1) / 2; + c = b; /* Where "a" belongs */ + while (b != a) { /* Shift it into place */ + b = (b - 1) / 2; + swap(base[b], base[c]); + } + } } static noinline int wb_flush_one_slowpath(struct btree_trans *trans, @@ -188,7 +262,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) for (size_t i = 0; i < wb->flushing.keys.nr; i++) { wb->sorted.data[i].idx = i; wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree; - wb->sorted.data[i].pos = wb->flushing.keys.data[i].k.k.p; + memcpy(&wb->sorted.data[i].pos, &wb->flushing.keys.data[i].k.k.p, sizeof(struct bpos)); } wb->sorted.nr = wb->flushing.keys.nr; @@ -206,9 +280,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) * If that happens, simply skip the key so we can optimistically insert * as many keys as possible in the fast path. */ - sort(wb->sorted.data, wb->sorted.nr, - sizeof(wb->sorted.data[0]), - wb_key_cmp, NULL); + wb_sort(wb->sorted.data, wb->sorted.nr); darray_for_each(wb->sorted, i) { struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; @@ -219,8 +291,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) BUG_ON(!k->journal_seq); if (i + 1 < &darray_top(wb->sorted) && - i[0].btree == i[1].btree && - bpos_eq(i[0].pos, i[1].pos)) { + wb_key_eq(i, i + 1)) { struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx]; skipped++; diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h index 8758d6adabf4..9b9433de9c36 100644 --- a/fs/bcachefs/btree_write_buffer_types.h +++ b/fs/bcachefs/btree_write_buffer_types.h @@ -13,11 +13,11 @@ union { struct { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ unsigned idx:24; - struct bpos pos; + u8 pos[sizeof(struct bpos)]; enum btree_id btree:8; #else enum btree_id btree:8; - struct bpos pos; + u8 pos[sizeof(struct bpos)]; unsigned idx:24; #endif } __packed; From 002c76dcf6a49a82498e8cddcde75e0dd83f745a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 22:51:16 -0500 Subject: [PATCH 106/226] bcachefs: check_root() can now be run online check_root() is simple enough to run as one single transaction, so is trivial to run online. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 10 ++-------- fs/bcachefs/recovery_types.h | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8b77098dde2d..d0cd6020bd9a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1950,14 +1950,10 @@ static int check_root_trans(struct btree_trans *trans) root_subvol.v.flags = 0; root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.inode = cpu_to_le64(inum); - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, - &root_subvol.k_i, 0)); + ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0); bch_err_msg(c, ret, "writing root subvol"); if (ret) goto err; - } ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); @@ -1984,9 +1980,7 @@ static int check_root_trans(struct btree_trans *trans) /* Get root directory, create if it doesn't exist: */ int bch2_check_root(struct bch_fs *c) { - int ret; - - ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_root_trans(trans)); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 6a7debe663b9..6dfc4f10948d 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -42,7 +42,7 @@ x(check_indirect_extents, 26, PASS_FSCK) \ x(check_dirents, 27, PASS_FSCK) \ x(check_xattrs, 28, PASS_FSCK) \ - x(check_root, 29, PASS_FSCK) \ + x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ From 679972348d03fb3644f4e1f7f7304911accd950d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Nov 2023 21:43:47 -0500 Subject: [PATCH 107/226] bcachefs: kill btree_trans->wb_updates the btree write buffer path now creates a journal entry directly Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 - fs/bcachefs/btree_iter.c | 17 +++------ fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/btree_trans_commit.c | 18 ---------- fs/bcachefs/btree_types.h | 3 -- fs/bcachefs/btree_update.c | 62 ++------------------------------ fs/bcachefs/btree_update.h | 28 ++++++++++----- 7 files changed, 28 insertions(+), 103 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index eeb2787e6f79..db246d00c607 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -638,7 +638,6 @@ struct btree_transaction_stats { struct bch2_time_stats lock_hold_times; struct mutex lock; unsigned nr_max_paths; - unsigned wb_updates_size; unsigned journal_entries_size; unsigned max_mem; char *max_paths_text; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6d6963858a83..9946b9b6b5a5 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -13,6 +13,7 @@ #include "error.h" #include "extents.h" #include "journal.h" +#include "journal_io.h" #include "replicas.h" #include "snapshot.h" #include "trace.h" @@ -1369,7 +1370,6 @@ noinline __cold void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) { struct btree_insert_entry *i; - struct btree_write_buffered_key *wb; prt_printf(buf, "transaction updates for %s journal seq %llu", trans->fn, trans->journal_res.seq); @@ -1394,16 +1394,10 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) prt_newline(buf); } - trans_for_each_wb_update(trans, wb) { - prt_printf(buf, "update: btree=%s wb=1 %pS", - bch2_btree_id_str(wb->btree), - (void *) i->ip_allocated); - prt_newline(buf); - - prt_printf(buf, " new "); - bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(&wb->k)); - prt_newline(buf); - } + for (struct jset_entry *e = trans->journal_entries; + e != btree_trans_journal_entries_top(trans); + e = vstruct_next(e)) + bch2_journal_entry_to_text(buf, trans->c, e); printbuf_indent_sub(buf, 2); } @@ -2922,7 +2916,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) if (s) { trans->nr_max_paths = s->nr_max_paths; - trans->wb_updates_size = s->wb_updates_size; trans->journal_entries_size = s->journal_entries_size; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index f7d5f8403940..6bc75bee55c8 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -496,7 +496,7 @@ static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size) { - size = roundup(size, 8); + size = round_up(size, 8); if (likely(trans->mem_top + size <= trans->mem_bytes)) { void *p = trans->mem + trans->mem_top; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index e52386fdc7ec..2078ab507012 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -624,7 +624,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct btree_write_buffered_key *wb; struct btree_trans_commit_hook *h; unsigned u64s = 0; int ret; @@ -743,14 +742,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, bkey_copy((struct bkey_i *) entry->start, i->k); } - trans_for_each_wb_update(trans, wb) { - entry = bch2_journal_add_entry(j, &trans->journal_res, - BCH_JSET_ENTRY_write_buffer_keys, - wb->btree, 0, - wb->k.k.u64s); - bkey_copy((struct bkey_i *) entry->start, &wb->k); - } - memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), trans->journal_entries, trans->journal_entries_u64s); @@ -785,13 +776,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) { struct btree_insert_entry *i; - struct btree_write_buffered_key *wb; trans_for_each_update(trans, i) bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); - - trans_for_each_wb_update(trans, wb) - bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p); } static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, @@ -993,11 +980,9 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; struct btree_insert_entry *i = NULL; - struct btree_write_buffered_key *wb; int ret = 0; if (!trans->nr_updates && - !trans->nr_wb_updates && !trans->journal_entries_u64s) goto out_reset; @@ -1064,9 +1049,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) trans->journal_u64s += jset_u64s(i->old_k.u64s); } - trans_for_each_wb_update(trans, wb) - trans->journal_u64s += jset_u64s(wb->k.k.u64s); - if (trans->extra_journal_res) { ret = bch2_disk_reservation_add(c, trans->disk_res, trans->extra_journal_res, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 40e5a004e5c1..d129c0e468cc 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -388,8 +388,6 @@ struct btree_trans { u8 fn_idx; u8 nr_sorted; u8 nr_updates; - u8 nr_wb_updates; - u8 wb_updates_size; bool srcu_held:1; bool used_mempool:1; bool in_traverse_all:1; @@ -424,7 +422,6 @@ struct btree_trans { u8 sorted[BTREE_ITER_MAX + 8]; struct btree_path paths[BTREE_ITER_MAX]; struct btree_insert_entry updates[BTREE_ITER_MAX]; - struct btree_write_buffered_key *wb_updates; /* update path: */ struct btree_trans_commit_hook *hooks; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 33497e71b3d4..ecaf8d6332fc 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -532,9 +532,9 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); } -static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, - enum btree_id btree, - struct bkey_i *k) +int bch2_btree_insert_clone_trans(struct btree_trans *trans, + enum btree_id btree, + struct bkey_i *k) { struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); int ret = PTR_ERR_OR_ZERO(n); @@ -573,62 +573,6 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi return e; } -int __must_check bch2_trans_update_buffered(struct btree_trans *trans, - enum btree_id btree, - struct bkey_i *k) -{ - struct btree_write_buffered_key *i; - int ret; - - EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); - EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); - - if (unlikely(trans->journal_replay_not_finished)) - return bch2_btree_insert_clone_trans(trans, btree, k); - - trans_for_each_wb_update(trans, i) { - if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { - bkey_copy(&i->k, k); - return 0; - } - } - - if (!trans->wb_updates || - trans->nr_wb_updates == trans->wb_updates_size) { - struct btree_write_buffered_key *u; - - if (trans->nr_wb_updates == trans->wb_updates_size) { - struct btree_transaction_stats *s = btree_trans_stats(trans); - - BUG_ON(trans->wb_updates_size > U8_MAX / 2); - trans->wb_updates_size = max(1, trans->wb_updates_size * 2); - if (s) - s->wb_updates_size = trans->wb_updates_size; - } - - u = bch2_trans_kmalloc_nomemzero(trans, - trans->wb_updates_size * - sizeof(struct btree_write_buffered_key)); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - return ret; - - if (trans->nr_wb_updates) - memcpy(u, trans->wb_updates, trans->nr_wb_updates * - sizeof(struct btree_write_buffered_key)); - trans->wb_updates = u; - } - - trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) { - .btree = btree, - }; - - bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k); - trans->nr_wb_updates++; - - return 0; -} - int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, enum btree_id btree, struct bpos end) { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 1f2a77bfe461..8aec86ca2d20 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -101,8 +101,6 @@ int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, struct bkey_i *, enum btree_update_flags); -int __must_check bch2_trans_update_buffered(struct btree_trans *, - enum btree_id, struct bkey_i *); struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned); @@ -123,6 +121,25 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) return e; } +int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); + +static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, + enum btree_id btree, + struct bkey_i *k) +{ + if (unlikely(trans->journal_replay_not_finished)) + return bch2_btree_insert_clone_trans(trans, btree, k); + + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); + int ret = PTR_ERR_OR_ZERO(e); + if (ret) + return ret; + + journal_entry_init(e, BCH_JSET_ENTRY_write_buffer_keys, btree, 0, k->k.u64s); + bkey_copy(e->start, k); + return 0; +} + void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); int __bch2_trans_commit(struct btree_trans *, unsigned); @@ -174,11 +191,6 @@ static inline int bch2_trans_commit(struct btree_trans *trans, (_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i)++) -#define trans_for_each_wb_update(_trans, _i) \ - for ((_i) = (_trans)->wb_updates; \ - (_i) < (_trans)->wb_updates + (_trans)->nr_wb_updates; \ - (_i)++) - static inline void bch2_trans_reset_updates(struct btree_trans *trans) { struct btree_insert_entry *i; @@ -189,8 +201,6 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans) trans->extra_journal_res = 0; trans->nr_updates = 0; trans->journal_entries_u64s = 0; - trans->nr_wb_updates = 0; - trans->wb_updates = NULL; trans->hooks = NULL; if (trans->fs_usage_deltas) { From 73ffa5305694bfda7e39e079dcbd2199fb410781 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 02:13:33 -0500 Subject: [PATCH 108/226] bcachefs: Drop journal entry compaction Previously, we dropped empty journal entries and coalesced entries that could be - but it's not worth the overhead; we very rarely leave unused journal entries after getting a journal reservation. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 4d8e10c901a8..6db821c86551 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1725,7 +1725,7 @@ static CLOSURE_CALLBACK(do_journal_write) static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct jset_entry *start, *end, *i, *next, *prev = NULL; + struct jset_entry *start, *end, *i; struct jset *jset = w->data; struct journal_keys_to_wb wb = { NULL }; unsigned sectors, bytes, u64s; @@ -1742,7 +1742,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) * If we wanted to be really fancy here, we could sort all the keys in * the jset and drop keys that were overwritten - probably not worth it: */ - vstruct_for_each_safe(jset, i, next) { + vstruct_for_each(jset, i) { unsigned u64s = le16_to_cpu(i->u64s); /* Empty entry: */ @@ -1782,34 +1782,12 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) i->type = BCH_JSET_ENTRY_btree_keys; break; } - - /* Can we merge with previous entry? */ - if (prev && - i->btree_id == prev->btree_id && - i->level == prev->level && - i->type == prev->type && - i->type == BCH_JSET_ENTRY_btree_keys && - le16_to_cpu(prev->u64s) + u64s <= U16_MAX) { - memmove_u64s_down(vstruct_next(prev), - i->_data, - u64s); - le16_add_cpu(&prev->u64s, u64s); - continue; - } - - /* Couldn't merge, move i into new position (after prev): */ - prev = prev ? vstruct_next(prev) : jset->start; - if (i != prev) - memmove_u64s_down(prev, i, jset_u64s(u64s)); } if (wb.wb) bch2_journal_keys_to_write_buffer_end(c, &wb); w->need_flush_to_write_buffer = false; - prev = prev ? vstruct_next(prev) : jset->start; - jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); - start = end = vstruct_last(jset); end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have); From e06af20719a439730a588155e3b28d327a57d951 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Dec 2023 22:16:51 -0500 Subject: [PATCH 109/226] bcachefs: fix userspace build errors Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 2 +- fs/bcachefs/btree_iter.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index db246d00c607..f6b621412ee4 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -193,6 +193,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 3d9393eecb93..c134da7d8194 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -2249,7 +2249,7 @@ static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage struct jset_entry_log { struct jset_entry entry; u8 d[]; -} __packed; +} __packed __aligned(8); /* * On disk format for a journal entry: diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 9946b9b6b5a5..ed4b8c302a9a 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2699,8 +2699,8 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) { unsigned new_top = trans->mem_top + size; - size_t old_bytes = trans->mem_bytes; - size_t new_bytes = roundup_pow_of_two(new_top); + unsigned old_bytes = trans->mem_bytes; + unsigned new_bytes = roundup_pow_of_two(new_top); int ret; void *new_mem; void *p; From cf904c8d964fa477cdb83445a03d05e9eda5d65c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 22:43:41 -0500 Subject: [PATCH 110/226] bcachefs: bch_err_(fn|msg) check if should print Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 +- fs/bcachefs/backpointers.c | 12 ++--- fs/bcachefs/bcachefs.h | 9 +++- fs/bcachefs/btree_gc.c | 43 ++++++---------- fs/bcachefs/btree_update_interior.c | 5 +- fs/bcachefs/buckets.c | 3 +- fs/bcachefs/ec.c | 24 +++------ fs/bcachefs/fs.c | 11 ++-- fs/bcachefs/fsck.c | 6 +-- fs/bcachefs/inode.c | 2 +- fs/bcachefs/journal.c | 6 +-- fs/bcachefs/journal_reclaim.c | 5 +- fs/bcachefs/logged_ops.c | 3 +- fs/bcachefs/lru.c | 3 +- fs/bcachefs/migrate.c | 5 +- fs/bcachefs/move.c | 5 +- fs/bcachefs/movinggc.c | 7 ++- fs/bcachefs/quota.c | 3 +- fs/bcachefs/rebalance.c | 5 +- fs/bcachefs/recovery.c | 24 ++++----- fs/bcachefs/snapshot.c | 65 +++++++++--------------- fs/bcachefs/subvolume.c | 17 +++---- fs/bcachefs/super.c | 79 +++++++++++------------------ 23 files changed, 133 insertions(+), 212 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 5e7ec368b16f..006ac48ec102 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1470,8 +1470,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_check_bucket_gens_key(trans, &iter, k)); err: bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index d0299361e8d2..b0a933d04337 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -393,15 +393,13 @@ int bch2_check_btree_backpointers(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; - int ret; - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_btree_backpointer(trans, &iter, k))); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -769,8 +767,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) } bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -877,7 +874,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) } bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f6b621412ee4..fe65dfea0780 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -315,15 +315,20 @@ do { \ #define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \ bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) +static inline bool should_print_err(int err) +{ + return err && !bch2_err_matches(err, BCH_ERR_transaction_restart); +} + #define bch_err_fn(_c, _ret) \ do { \ - if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + if (should_print_err(_ret)) \ bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\ } while (0) #define bch_err_msg(_c, _ret, _msg, ...) \ do { \ - if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + if (should_print_err(_ret)) \ bch_err(_c, "%s(): error " _msg " %s", __func__, \ ##__VA_ARGS__, bch2_err_str(_ret)); \ } while (0) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ae880661fddb..f0951c17a27a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -414,10 +414,9 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct continue; } - if (ret) { - bch_err_msg(c, ret, "getting btree node"); + bch_err_msg(c, ret, "getting btree node"); + if (ret) break; - } ret = btree_repair_node_boundaries(c, b, prev, cur); @@ -482,10 +481,9 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct false); ret = PTR_ERR_OR_ZERO(cur); - if (ret) { - bch_err_msg(c, ret, "getting btree node"); + bch_err_msg(c, ret, "getting btree node"); + if (ret) goto err; - } ret = bch2_btree_repair_topology_recurse(trans, cur); six_unlock_read(&cur->c.lock); @@ -707,8 +705,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); if (!new) { - bch_err_msg(c, ret, "allocating new key"); ret = -BCH_ERR_ENOMEM_gc_repair_key; + bch_err_msg(c, ret, "allocating new key"); goto err; } @@ -834,8 +832,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, bch2_mark_key(trans, btree_id, level, old, *k, flags)); fsck_err: err: - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1068,8 +1065,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, fsck_err: six_unlock_read(&b->c.lock); - if (ret < 0) - bch_err_fn(c, ret); + bch_err_fn(c, ret); printbuf_exit(&buf); return ret; } @@ -1105,10 +1101,8 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) : bch2_gc_btree(trans, i, initial, metadata_only); } - if (ret < 0) - bch_err_fn(c, ret); - bch2_trans_put(trans); + bch_err_fn(c, ret); return ret; } @@ -1304,8 +1298,7 @@ static int bch2_gc_done(struct bch_fs *c, fsck_err: if (ca) percpu_ref_put(&ca->ref); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); percpu_up_write(&c->mark_lock); printbuf_exit(&buf); @@ -1563,8 +1556,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) })); err: bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1659,6 +1651,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) c->reflink_gc_nr = 0; bch2_trans_put(trans); + bch_err_fn(c, ret); return ret; } @@ -1896,9 +1889,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) * allocator thread - issue wakeup in case they blocked on gc_lock: */ closure_wake_up(&c->freelist_wait); - - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -2015,7 +2006,7 @@ int bch2_gc_gens(struct bch_fs *c) NULL, NULL, BCH_TRANS_COMMIT_no_enospc, gc_btree_gens_key(trans, &iter, k)); - if (ret && !bch2_err_matches(ret, EROFS)) + if (!bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); if (ret) goto err; @@ -2028,7 +2019,7 @@ int bch2_gc_gens(struct bch_fs *c) NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_alloc_write_oldest_gen(trans, &iter, k)); - if (ret && !bch2_err_matches(ret, EROFS)) + if (!bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); if (ret) goto err; @@ -2058,7 +2049,6 @@ static int bch2_gc_thread(void *arg) struct io_clock *clock = &c->io_clock[WRITE]; unsigned long last = atomic64_read(&clock->now); unsigned last_kick = atomic_read(&c->kick_gc); - int ret; set_freezable(); @@ -2098,11 +2088,8 @@ static int bch2_gc_thread(void *arg) #if 0 ret = bch2_gc(c, false, false); #else - ret = bch2_gc_gens(c); + bch2_gc_gens(c); #endif - if (ret < 0) - bch_err_fn(c, ret); - debug_check_no_locks_held(); } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index cffc66cd481f..6350c661ce09 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2054,8 +2054,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work) ret = bch2_trans_do(c, NULL, NULL, 0, async_btree_node_rewrite_trans(trans, a)); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); kfree(a); } @@ -2093,8 +2092,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) } ret = bch2_fs_read_write_early(c); + bch_err_msg(c, ret, "going read-write"); if (ret) { - bch_err_msg(c, ret, "going read-write"); kfree(a); return; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 27c743882b63..31b7f0ba7d9e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1760,8 +1760,7 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) { int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca)); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 76163c2ea3b3..6cbc1d8a3ab5 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -791,28 +791,22 @@ static void ec_stripe_delete_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, ec_stripe_delete_work); - struct btree_trans *trans = bch2_trans_get(c); - int ret; - u64 idx; while (1) { mutex_lock(&c->ec_stripes_heap_lock); - idx = stripe_idx_to_delete(c); + u64 idx = stripe_idx_to_delete(c); mutex_unlock(&c->ec_stripes_heap_lock); if (!idx) break; - ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - ec_stripe_delete(trans, idx)); - if (ret) { - bch_err_fn(c, ret); + int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + ec_stripe_delete(trans, idx)); + bch_err_fn(c, ret); + if (ret) break; - } } - bch2_trans_put(trans); - bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete); } @@ -1126,16 +1120,15 @@ static void ec_stripe_create(struct ec_stripe_new *s) ec_stripe_key_update(trans, bkey_i_to_stripe(&s->new_stripe.key), !s->have_existing_stripe)); + bch_err_msg(c, ret, "creating stripe key"); if (ret) { - bch_err(c, "error creating stripe: error creating stripe key"); goto err; } ret = ec_stripe_update_extents(c, &s->new_stripe); - if (ret) { - bch_err_msg(c, ret, "creating stripe: error updating pointers"); + bch_err_msg(c, ret, "error updating extents"); + if (ret) goto err; - } err: bch2_disk_reservation_put(c, &s->res); @@ -1865,7 +1858,6 @@ int bch2_stripes_read(struct bch_fs *c) bch2_stripes_heap_insert(c, m, k.k->p.offset); 0; }))); - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 886c0f30e681..4c35df1230b5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1012,15 +1012,13 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret; if (!dir_emit_dots(file, ctx)) return 0; - ret = bch2_readdir(c, inode_inum(inode), ctx); - if (ret) - bch_err_fn(c, ret); + int ret = bch2_readdir(c, inode_inum(inode), ctx); + bch_err_fn(c, ret); return bch2_err_class(ret); } @@ -1944,10 +1942,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); - if (ret) { - bch_err_msg(c, ret, "mounting: error getting root inode"); + bch_err_msg(c, ret, "mounting: error getting root inode"); + if (ret) goto err_put_super; - } sb->s_root = d_make_root(vinode); if (!sb->s_root) { diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d0cd6020bd9a..7023c2800d59 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -209,8 +209,7 @@ static int fsck_write_inode(struct btree_trans *trans, { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, __write_inode(trans, inode, snapshot)); - if (ret) - bch_err_fn(trans->c, ret); + bch_err_fn(trans->c, ret); return ret; } @@ -2427,7 +2426,6 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, { struct bkey_s_c_reflink_p p; struct bkey_i_reflink_p *u; - int ret; if (k.k->type != KEY_TYPE_reflink_p) return 0; @@ -2438,7 +2436,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, return 0; u = bch2_trans_kmalloc(trans, sizeof(*u)); - ret = PTR_ERR_OR_ZERO(u); + int ret = PTR_ERR_OR_ZERO(u); if (ret) return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 7ee9ac5e4479..e75c6bc2bc28 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1200,6 +1200,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) } err: bch2_trans_put(trans); - + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 8294d7fd6632..5883fc38b4f5 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -993,8 +993,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, break; } - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); unlock: up_write(&c->state_lock); return ret; @@ -1024,8 +1023,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); err: - if (ret) - bch_err_fn(ca, ret); + bch_err_fn(ca, ret); return ret; } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 60b9d3572387..794c10ec01b6 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -793,10 +793,9 @@ int bch2_journal_reclaim_start(struct journal *j) p = kthread_create(bch2_journal_reclaim_thread, j, "bch-reclaim/%s", c->name); ret = PTR_ERR_OR_ZERO(p); - if (ret) { - bch_err_msg(c, ret, "creating journal reclaim thread"); + bch_err_msg(c, ret, "creating journal reclaim thread"); + if (ret) return ret; - } get_task_struct(p); j->reclaim_thread = p; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 4b603746063c..80fcea641e77 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -63,8 +63,7 @@ int bch2_resume_logged_ops(struct bch_fs *c) BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k, resume_logged_op(trans, &iter, k))); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index e6d081c0592c..4de1bc242660 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -157,8 +157,7 @@ int bch2_check_lrus(struct bch_fs *c) BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 8e5688d0a8ca..a2d998dbcc88 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -145,10 +145,9 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) continue; } - if (ret) { - bch_err_msg(c, ret, "updating btree node key"); + bch_err_msg(c, ret, "updating btree node key"); + if (ret) break; - } next: bch2_btree_iter_next_node(&iter); } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 9be421e2a577..b1b9566c7a61 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -669,10 +669,9 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); bch2_trans_iter_exit(trans, &iter); - if (ret) { - bch_err_msg(c, ret, "looking up alloc key"); + bch_err_msg(c, ret, "looking up alloc key"); + if (ret) goto err; - } a = bch2_alloc_to_v4(k, &a_convert); dirty_sectors = bch2_bucket_sectors_dirty(*a); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index fd239a261aca..963191a2f4e0 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -324,9 +324,9 @@ static int bch2_copygc_thread(void *arg) if (!buckets) return -ENOMEM; ret = rhashtable_init(&buckets->table, &bch_move_bucket_params); + bch_err_msg(c, ret, "allocating copygc buckets in flight"); if (ret) { kfree(buckets); - bch_err_msg(c, ret, "allocating copygc buckets in flight"); return ret; } @@ -423,10 +423,9 @@ int bch2_copygc_start(struct bch_fs *c) t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); ret = PTR_ERR_OR_ZERO(t); - if (ret) { - bch_err_msg(c, ret, "creating copygc thread"); + bch_err_msg(c, ret, "creating copygc thread"); + if (ret) return ret; - } get_task_struct(t); diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 79724a7aaab0..a33f370e1fc7 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -626,8 +626,7 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 9c415e14ff9c..95f46cb3b5bd 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -470,10 +470,9 @@ int bch2_rebalance_start(struct bch_fs *c) p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); ret = PTR_ERR_OR_ZERO(p); - if (ret) { - bch_err_msg(c, ret, "creating rebalance thread"); + bch_err_msg(c, ret, "creating rebalance thread"); + if (ret) return ret; - } get_task_struct(p); rcu_assign_pointer(c->rebalance.thread, p); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1099b2f51252..3b84c4ab2c4f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -463,8 +463,7 @@ static int bch2_initialize_subvolumes(struct bch_fs *c) ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -504,8 +503,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, __bch2_fs_upgrade_for_subvolumes(trans)); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1087,8 +1085,7 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_delete_dead_snapshots_async(c); } - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; err: fsck_err: @@ -1179,10 +1176,9 @@ int bch2_fs_initialize(struct bch_fs *c) packed_inode.inode.k.p.snapshot = U32_MAX; ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); - if (ret) { - bch_err_msg(c, ret, "creating root directory"); + bch_err_msg(c, ret, "creating root directory"); + if (ret) goto err; - } bch2_inode_init_early(c, &lostfound_inode); @@ -1193,10 +1189,9 @@ int bch2_fs_initialize(struct bch_fs *c) &lostfound, 0, 0, S_IFDIR|0700, 0, NULL, NULL, (subvol_inum) { 0 }, 0)); - if (ret) { - bch_err_msg(c, ret, "creating lost+found"); + bch_err_msg(c, ret, "creating lost+found"); + if (ret) goto err; - } c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1; @@ -1207,10 +1202,9 @@ int bch2_fs_initialize(struct bch_fs *c) } ret = bch2_journal_flush(&c->journal); - if (ret) { - bch_err_msg(c, ret, "writing first journal entry"); + bch_err_msg(c, ret, "writing first journal entry"); + if (ret) goto err; - } mutex_lock(&c->sb_lock); SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 8d1800ef22b5..b0ef415914dc 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -459,7 +459,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - struct bkey_s_c_subvolume s; bool found = false; int ret; @@ -468,7 +467,7 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, if (k.k->type != KEY_TYPE_subvolume) continue; - s = bkey_s_c_to_subvolume(k); + struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) continue; if (!BCH_SUBVOLUME_SNAP(s.v)) { @@ -592,9 +591,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c) BTREE_ITER_PREFETCH, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_snapshot_tree(trans, &iter, k))); - - if (ret) - bch_err(c, "error %i checking snapshot trees", ret); + bch_err_fn(c, ret); return ret; } @@ -866,12 +863,11 @@ int bch2_check_snapshots(struct bch_fs *c) */ ret = bch2_trans_run(c, for_each_btree_key_reverse_commit(trans, iter, - BTREE_ID_snapshots, POS_MAX, - BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot(trans, &iter, k))); - if (ret) - bch_err_fn(c, ret); + BTREE_ID_snapshots, POS_MAX, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_snapshot(trans, &iter, k))); + bch_err_fn(c, ret); return ret; } @@ -1381,10 +1377,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!test_bit(BCH_FS_started, &c->flags)) { ret = bch2_fs_read_write_early(c); - if (ret) { - bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); + bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); + if (ret) return ret; - } } trans = bch2_trans_get(c); @@ -1397,18 +1392,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) POS_MIN, 0, k, NULL, NULL, 0, bch2_delete_redundant_snapshot(trans, k)); - if (ret) { - bch_err_msg(c, ret, "deleting redundant snapshots"); + bch_err_msg(c, ret, "deleting redundant snapshots"); + if (ret) goto err; - } ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, bch2_snapshot_set_equiv(trans, k)); - if (ret) { - bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); + bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); + if (ret) goto err; - } ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ({ @@ -1420,11 +1413,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ? snapshot_list_add(c, &deleted, k.k->p.offset) : 0; })); - - if (ret) { - bch_err_msg(c, ret, "walking snapshots"); + bch_err_msg(c, ret, "walking snapshots"); + if (ret) goto err; - } for (id = 0; id < BTREE_ID_NR; id++) { struct bpos last_pos = POS_MIN; @@ -1457,10 +1448,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_disk_reservation_put(c, &res); darray_exit(&equiv_seen); - if (ret) { - bch_err_msg(c, ret, "deleting keys from dying snapshots"); + bch_err_msg(c, ret, "deleting keys from dying snapshots"); + if (ret) goto err; - } } bch2_trans_unlock(trans); @@ -1476,10 +1466,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) : 0; })); - if (ret) { - bch_err_msg(c, ret, "walking snapshots"); + bch_err_msg(c, ret, "walking snapshots"); + if (ret) goto err_create_lock; - } /* * Fixing children of deleted snapshots can't be done completely @@ -1496,19 +1485,17 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) darray_for_each(deleted, i) { ret = commit_do(trans, NULL, NULL, 0, bch2_snapshot_node_delete(trans, *i)); - if (ret) { - bch_err_msg(c, ret, "deleting snapshot %u", *i); + bch_err_msg(c, ret, "deleting snapshot %u", *i); + if (ret) goto err_create_lock; - } } darray_for_each(deleted_interior, i) { ret = commit_do(trans, NULL, NULL, 0, bch2_snapshot_node_delete(trans, *i)); - if (ret) { - bch_err_msg(c, ret, "deleting snapshot %u", *i); + bch_err_msg(c, ret, "deleting snapshot %u", *i); + if (ret) goto err_create_lock; - } } err_create_lock: up_write(&c->snapshot_create_lock); @@ -1516,8 +1503,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) darray_exit(&deleted_interior); darray_exit(&deleted); bch2_trans_put(trans); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1701,8 +1687,7 @@ int bch2_snapshots_read(struct bch_fs *c) for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f0f91211f757..be0de4f030a4 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -38,8 +38,7 @@ static int check_subvol(struct btree_trans *trans, if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); - if (ret) - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); + bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); return ret ?: -BCH_ERR_transaction_restart_nested; } @@ -86,11 +85,10 @@ int bch2_check_subvols(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, - BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol(trans, &iter, k))); - if (ret) - bch_err_fn(c, ret); + BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_subvol(trans, &iter, k))); + bch_err_fn(c, ret); return ret; } @@ -297,10 +295,9 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor for (id = s.data; id < s.data + s.nr; id++) { ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); - if (ret) { - bch_err_msg(c, ret, "deleting subvolume %u", *id); + bch_err_msg(c, ret, "deleting subvolume %u", *id); + if (ret) break; - } } darray_exit(&s); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 88a762bce7da..c04d425c872f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1522,9 +1522,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) BTREE_TRIGGER_NORUN, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end, BTREE_TRIGGER_NORUN, NULL); - if (ret) - bch_err_msg(c, ret, "removing dev alloc info"); - + bch_err_msg(c, ret, "removing dev alloc info"); return ret; } @@ -1551,34 +1549,29 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) __bch2_dev_read_only(c, ca); ret = bch2_dev_data_drop(c, ca->dev_idx, flags); - if (ret) { - bch_err_msg(ca, ret, "dropping data"); + bch_err_msg(ca, ret, "dropping data"); + if (ret) goto err; - } ret = bch2_dev_remove_alloc(c, ca); - if (ret) { - bch_err_msg(ca, ret, "deleting alloc info"); + bch_err_msg(ca, ret, "deleting alloc info"); + if (ret) goto err; - } ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); - if (ret) { - bch_err_msg(ca, ret, "flushing journal"); + bch_err_msg(ca, ret, "flushing journal"); + if (ret) goto err; - } ret = bch2_journal_flush(&c->journal); - if (ret) { - bch_err(ca, "journal error"); + bch_err(ca, "journal error"); + if (ret) goto err; - } ret = bch2_replicas_gc2(c); - if (ret) { - bch_err_msg(ca, ret, "in replicas_gc2()"); + bch_err_msg(ca, ret, "in replicas_gc2()"); + if (ret) goto err; - } data = bch2_dev_has_data(c, ca); if (data) { @@ -1650,10 +1643,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path) int ret; ret = bch2_read_super(path, &opts, &sb); - if (ret) { - bch_err_msg(c, ret, "reading super"); + bch_err_msg(c, ret, "reading super"); + if (ret) goto err; - } dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); @@ -1666,10 +1658,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } ret = bch2_dev_may_add(sb.sb, c); - if (ret) { - bch_err_fn(c, ret); + if (ret) goto err; - } ca = __bch2_dev_alloc(c, &dev_mi); if (!ca) { @@ -1684,19 +1674,17 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err; ret = bch2_dev_journal_alloc(ca); - if (ret) { - bch_err_msg(c, ret, "allocating journal"); + bch_err_msg(c, ret, "allocating journal"); + if (ret) goto err; - } down_write(&c->state_lock); mutex_lock(&c->sb_lock); ret = bch2_sb_from_fs(c, ca); - if (ret) { - bch_err_msg(c, ret, "setting up new superblock"); + bch_err_msg(c, ret, "setting up new superblock"); + if (ret) goto err_unlock; - } if (dynamic_fault("bcachefs:add:no_slot")) goto no_slot; @@ -1735,10 +1723,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (BCH_MEMBER_GROUP(&dev_mi)) { ret = __bch2_dev_group_set(c, ca, label.buf); - if (ret) { - bch_err_msg(c, ret, "creating new label"); + bch_err_msg(c, ret, "creating new label"); + if (ret) goto err_unlock; - } } bch2_write_super(c); @@ -1747,16 +1734,14 @@ int bch2_dev_add(struct bch_fs *c, const char *path) bch2_dev_usage_journal_reserve(c); ret = bch2_trans_mark_dev_sb(c, ca); - if (ret) { - bch_err_msg(ca, ret, "marking new superblock"); + bch_err_msg(ca, ret, "marking new superblock"); + if (ret) goto err_late; - } ret = bch2_fs_freespace_init(c); - if (ret) { - bch_err_msg(ca, ret, "initializing free space"); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) goto err_late; - } ca->new_fs_bucket_idx = 0; @@ -1775,6 +1760,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) bch2_free_super(&sb); printbuf_exit(&label); printbuf_exit(&errbuf); + bch_err_fn(c, ret); return ret; err_late: up_write(&c->state_lock); @@ -1802,10 +1788,9 @@ int bch2_dev_online(struct bch_fs *c, const char *path) dev_idx = sb.sb->dev_idx; ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); - if (ret) { - bch_err_msg(c, ret, "bringing %s online", path); + bch_err_msg(c, ret, "bringing %s online", path); + if (ret) goto err; - } ret = bch2_dev_attach_bdev(c, &sb); if (ret) @@ -1814,10 +1799,9 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ca = bch_dev_locked(c, dev_idx); ret = bch2_trans_mark_dev_sb(c, ca); - if (ret) { - bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path); + bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path); + if (ret) goto err; - } if (ca->mi.state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); @@ -1896,10 +1880,9 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) } ret = bch2_dev_buckets_resize(c, ca, nbuckets); - if (ret) { - bch_err_msg(ca, ret, "resizing buckets"); + bch_err_msg(ca, ret, "resizing buckets"); + if (ret) goto err; - } ret = bch2_trans_mark_dev_sb(c, ca); if (ret) From 038fecc045932171e882a4e3668208c28f66f795 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 21:16:34 -0500 Subject: [PATCH 111/226] bcachefs: qstr_eq() Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 4 ++-- fs/bcachefs/fsck.c | 2 -- fs/bcachefs/util.h | 7 +++++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 580c1c95361e..78ce9634d05c 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -65,7 +65,7 @@ static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) const struct qstr l_name = bch2_dirent_get_name(l); const struct qstr *r_name = _r; - return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len); + return !qstr_eq(l_name, *r_name); } static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) @@ -75,7 +75,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct qstr l_name = bch2_dirent_get_name(l); const struct qstr r_name = bch2_dirent_get_name(r); - return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len); + return !qstr_eq(l_name, r_name); } static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 7023c2800d59..43801e29bc5a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -20,8 +20,6 @@ #include #include /* struct qstr */ -#define QSTR(n) { { { .len = strlen(n) } }, .name = n } - /* * XXX: this is handling transaction restarts without returning * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 7eb567ab4457..1ff063bb8741 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -856,4 +856,11 @@ static inline int cmp_le32(__le32 l, __le32 r) #include +#define QSTR(n) { { { .len = strlen(n) } }, .name = n } + +static inline bool qstr_eq(const struct qstr l, const struct qstr r) +{ + return l.len == r.len && !memcmp(l.name, r.name, l.len); +} + #endif /* _BCACHEFS_UTIL_H */ From ac19c4c3d02e19b365209d2cee409671e1fb66bb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 18 Dec 2023 18:26:26 -0600 Subject: [PATCH 112/226] bcachefs: Use array_size() in call to copy_from_user() Use array_size() helper, instead of the open-coded version in call to copy_from_user(). Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 08922f7e380a..295b1f4e9ece 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -360,7 +360,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a init_waitqueue_head(&thr->output.wait); darray_init(&thr->output2); - if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) { + if (copy_from_user(devs, &user_arg->devs[0], + array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) { ret = -EINVAL; goto err; } From 4c26dea1c096138b6e7c49c4886e68fbf6013217 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 18 Dec 2023 18:24:53 -0600 Subject: [PATCH 113/226] bcachefs: Replace zero-length array with flex-array member and use __counted_by Fake flexible arrays (zero-length and one-element arrays) are deprecated, and should be replaced by flexible-array members. So, replace zero-length array with a flexible-array member in `struct bch_ioctl_fsck_offline`. Also annotate array `devs` with `__counted_by()` to prepare for the coming implementation by GCC and Clang of the `__counted_by` attribute. Flexible array members annotated with `__counted_by` can have their accesses bounds-checked at run-time via `CONFIG_UBSAN_BOUNDS` (for array indexing) and `CONFIG_FORTIFY_SOURCE` (for strcpy/memcpy-family functions). This fixes the following -Warray-bounds warnings: fs/bcachefs/chardev.c: In function 'bch2_ioctl_fsck_offline': fs/bcachefs/chardev.c:363:34: warning: array subscript 0 is outside array bounds of '__u64[0]' {aka 'long long unsigned int[]'} [-Warray-bounds=] 363 | if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) { | ^~~~~~~~~~~~~~~~~~ In file included from fs/bcachefs/chardev.c:5: fs/bcachefs/bcachefs_ioctl.h:400:33: note: while referencing 'devs' 400 | __u64 devs[0]; This results in no differences in binary output. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 21f81b16f24e..4b8fba754b1c 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -397,7 +397,7 @@ struct bch_ioctl_fsck_offline { __u64 flags; __u64 opts; /* string */ __u64 nr_devs; - __u64 devs[0]; + __u64 devs[] __counted_by(nr_devs); }; /* From 548673f8d39249d717d02038bd192a076e79edf0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 Dec 2023 16:27:38 -0500 Subject: [PATCH 114/226] bcachefs: drop extra semicolon Signed-off-by: Kent Overstreet --- fs/bcachefs/reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 1d56470e1849..9f9c8a244c80 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -310,7 +310,7 @@ static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned * if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { new->k.type = KEY_TYPE_deleted; new->k.size = 0; - set_bkey_val_u64s(&new->k, 0);; + set_bkey_val_u64s(&new->k, 0); *flags &= ~BTREE_TRIGGER_INSERT; } } From a7dc10ce689a6224c1601d5d2e4ca57dd8cce9f6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 Dec 2023 18:08:19 -0500 Subject: [PATCH 115/226] bcachefs: Make sure allocation failure errors are logged The previous patch fixed a bug in allocation path error handling, and it would've been noticed sooner had it been logged properly. Generally speaking, errors that shouldn't happen in normal operation and are being returned up the stack should be logged: the write path was already logging IO errors, but non IO errors were missed. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 6 ++++++ fs/bcachefs/btree_update_interior.c | 3 +++ fs/bcachefs/data_update.c | 2 ++ fs/bcachefs/io_misc.c | 9 +++++++-- fs/bcachefs/io_write.c | 4 ++++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index fe65dfea0780..53a2624176ab 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -326,6 +326,12 @@ do { \ bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\ } while (0) +#define bch_err_fn_ratelimited(_c, _ret) \ +do { \ + if (should_print_err(_ret)) \ + bch_err_ratelimited(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\ +} while (0) + #define bch_err_msg(_c, _ret, _msg, ...) \ do { \ if (should_print_err(_ret)) \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6350c661ce09..f0e3f51e62ca 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1190,6 +1190,9 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, return as; err: bch2_btree_update_free(as, trans); + if (!bch2_err_matches(ret, ENOSPC) && + !bch2_err_matches(ret, EROFS)) + bch_err_fn_ratelimited(c, ret); return ERR_PTR(ret); } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 632e8ed5bbda..1e6d162753fb 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -418,6 +418,8 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, continue; } + bch_err_fn_ratelimited(c, ret); + if (ret) return; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index eab0c8c57785..26fdc6f35f04 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -61,7 +61,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, */ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); if (unlikely(ret)) - goto err; + goto err_noprint; bch2_bkey_buf_reassemble(&old, c, k); } @@ -125,7 +125,12 @@ int bch2_extent_fallocate(struct btree_trans *trans, err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); - + if (should_print_err(ret)) + bch_err_inum_offset_ratelimited(c, + inum.inum, + iter->pos.offset << 9, + "%s(): error: %s", __func__, bch2_err_str(ret)); +err_noprint: bch2_open_buckets_put(c, &open_buckets); bch2_disk_reservation_put(c, &disk_res); bch2_bkey_buf_exit(&new, c); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 264d8fcef176..2817f27909b3 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1463,6 +1463,10 @@ static void __bch2_write(struct bch_write_op *op) op->flags |= BCH_WRITE_DONE; if (ret < 0) { + bch_err_inum_offset_ratelimited(c, + op->pos.inode, + op->pos.offset << 9, + "%s(): error: %s", __func__, bch2_err_str(ret)); op->error = ret; break; } From d8d819580ae02ba37388769b72f86cb4432a1cb9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 19 Dec 2023 09:02:15 -0500 Subject: [PATCH 116/226] bcachefs: clean up some dead fallocate code The have_reservation local variable in bch2_extent_fallocate() is initialized to false and set to true further down in the function. Between this two points, one branch of code checks for negative value and one for positive, and nothing ever checks the variable after it is set to true. Clean up some of the unnecessary logic and code. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/io_misc.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 26fdc6f35f04..92e7f765f9cf 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -34,8 +34,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, struct open_buckets open_buckets = { 0 }; struct bkey_s_c k; struct bkey_buf old, new; - unsigned sectors_allocated = 0; - bool have_reservation = false; + unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && c->sb.version >= bcachefs_metadata_version_unwritten_extents; int ret; @@ -50,28 +49,20 @@ int bch2_extent_fallocate(struct btree_trans *trans, return ret; sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); + new_replicas = max(0, (int) opts.data_replicas - + (int) bch2_bkey_nr_ptrs_fully_allocated(k)); - if (!have_reservation) { - unsigned new_replicas = - max(0, (int) opts.data_replicas - - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); - /* - * Get a disk reservation before (in the nocow case) calling - * into the allocator: - */ - ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); - if (unlikely(ret)) - goto err_noprint; + /* + * Get a disk reservation before (in the nocow case) calling + * into the allocator: + */ + ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); + if (unlikely(ret)) + goto err_noprint; - bch2_bkey_buf_reassemble(&old, c, k); - } + bch2_bkey_buf_reassemble(&old, c, k); - if (have_reservation) { - if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) - goto err; - - bch2_key_resize(&new.k->k, sectors); - } else if (!unwritten) { + if (!unwritten) { struct bkey_i_reservation *reservation; bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); @@ -118,8 +109,6 @@ int bch2_extent_fallocate(struct btree_trans *trans, ptr->unwritten = true; } - have_reservation = true; - ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 0, i_sectors_delta, true); err: From 033c9d7a2a348457239e69a2a9036d2e4029ee4d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Dec 2023 23:57:22 -0500 Subject: [PATCH 117/226] MAINTAINERS: Update my email address Signed-off-by: Kent Overstreet --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f5c2450fa4ec..ae86d5436e6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3488,7 +3488,7 @@ F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) M: Coly Li -M: Kent Overstreet +M: Kent Overstreet L: linux-bcache@vger.kernel.org S: Maintained W: http://bcache.evilpiepirate.org From 023f9ac9f70fbb1d94d27583fc06225355c73a67 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 Dec 2023 21:58:20 -0500 Subject: [PATCH 118/226] bcachefs: Delete dio read alignment check We'll typically fomat devices with the physical blocksize supported, but the logical blocksize will be smaller. There's no real need to be checking the blocksize at the filesystem level, anyways - the block layer has to check this anyways. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-direct.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 84e20c3ada6c..fdd57c5785c9 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -77,9 +77,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) bch2_inode_opts_get(&opts, c, &inode->ei_inode); - if ((offset|iter->count) & (block_bytes(c) - 1)) - return -EINVAL; - ret = min_t(loff_t, iter->count, max_t(loff_t, 0, i_size_read(&inode->v) - offset)); From 44fd13a4c68e87953ccd827e764fa566ddcbbcf5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Dec 2023 19:47:55 -0500 Subject: [PATCH 119/226] bcachefs: Fixes for rust bindgen bindgen doesn't seem to like u128 or DECLARE_FLEX_ARRAY(), but we can hack around them. Signed-off-by: Kent Overstreet --- fs/bcachefs/mean_and_variance.h | 5 ++++- fs/bcachefs/subvolume_types.h | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h index 647505010b39..b2be565bb8f2 100644 --- a/fs/bcachefs/mean_and_variance.h +++ b/fs/bcachefs/mean_and_variance.h @@ -12,9 +12,12 @@ /* * u128_u: u128 user mode, because not all architectures support a real int128 * type + * + * We don't use this version in userspace, because in userspace we link with + * Rust and rustc has issues with u128. */ -#ifdef __SIZEOF_INT128__ +#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) typedef struct { unsigned __int128 v; diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index 2d2e66a4e468..ae644adfc391 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -20,7 +20,11 @@ struct snapshot_t { }; struct snapshot_table { +#ifndef RUST_BINDGEN DECLARE_FLEX_ARRAY(struct snapshot_t, s); +#else + struct snapshot_t s[0]; +#endif }; typedef struct { From 447c1c01051251853e851bd77f26546488cbc7b7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 22 Dec 2023 21:58:43 -0500 Subject: [PATCH 120/226] bcachefs: check for failure to downgrade With the upcoming member seq patch, it's now critical that we don't ever write to a superblock that hasn't been version downgraded - failure to update member seq fields will cause split brain detection to fire erroniously. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/super-io.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index c39bea441983..d7dc8f644a05 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -223,6 +223,7 @@ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ + x(EIO, sb_not_downgraded) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 55e560c198de..b610dfb0287a 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -987,6 +987,18 @@ int bch2_write_super(struct bch_fs *c) if (!BCH_SB_INITIALIZED(c->disk_sb.sb)) goto out; + if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { + struct printbuf buf = PRINTBUF; + prt_printf(&buf, "attempting to write superblock that wasn't version downgraded ("); + bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version)); + prt_str(&buf, " > "); + bch2_version_to_text(&buf, bcachefs_metadata_version_current); + prt_str(&buf, ")"); + bch2_fs_fatal_error(c, "%s", buf.buf); + printbuf_exit(&buf); + return -BCH_ERR_sb_not_downgraded; + } + for_each_online_member(ca, c, i) { __set_bit(ca->dev_idx, sb_written.d); ca->sb_write_error = 0; From 920388254f613410324a10695b43485a8d9f838a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Dec 2023 22:44:09 -0800 Subject: [PATCH 121/226] bcachefs: mean and variance: fix kernel-doc for function params Add missing function parameter descriptions in mean_and_variance.c. The also eliminates the "Excess function parameter" warnings. Prevents these kernel-doc warnings: mean_and_variance.c:67: warning: Function parameter or member 's' not described in 'mean_and_variance_get_mean' mean_and_variance.c:78: warning: Function parameter or member 's1' not described in 'mean_and_variance_get_variance' mean_and_variance.c:94: warning: Function parameter or member 's' not described in 'mean_and_variance_get_stddev' mean_and_variance.c:108: warning: Function parameter or member 's' not described in 'mean_and_variance_weighted_update' mean_and_variance.c:108: warning: Function parameter or member 'x' not described in 'mean_and_variance_weighted_update' mean_and_variance.c:108: warning: Excess function parameter 's1' description in 'mean_and_variance_weighted_update' mean_and_variance.c:108: warning: Excess function parameter 's2' description in 'mean_and_variance_weighted_update' mean_and_variance.c:134: warning: Function parameter or member 's' not described in 'mean_and_variance_weighted_get_mean' mean_and_variance.c:143: warning: Function parameter or member 's' not described in 'mean_and_variance_weighted_get_variance' mean_and_variance.c:153: warning: Function parameter or member 's' not described in 'mean_and_variance_weighted_get_stddev' Signed-off-by: Randy Dunlap Cc: Kent Overstreet Cc: Brian Foster Cc: linux-bcachefs@vger.kernel.org Signed-off-by: Kent Overstreet --- fs/bcachefs/mean_and_variance.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/mean_and_variance.c b/fs/bcachefs/mean_and_variance.c index 1f0801e2e565..bf0ef668fd38 100644 --- a/fs/bcachefs/mean_and_variance.c +++ b/fs/bcachefs/mean_and_variance.c @@ -62,6 +62,7 @@ EXPORT_SYMBOL_GPL(u128_div); /** * mean_and_variance_get_mean() - get mean from @s + * @s: mean and variance number of samples and their sums */ s64 mean_and_variance_get_mean(struct mean_and_variance s) { @@ -71,6 +72,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_mean); /** * mean_and_variance_get_variance() - get variance from @s1 + * @s1: mean and variance number of samples and sums * * see linked pdf equation 12. */ @@ -89,6 +91,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_variance); /** * mean_and_variance_get_stddev() - get standard deviation from @s + * @s: mean and variance number of samples and their sums */ u32 mean_and_variance_get_stddev(struct mean_and_variance s) { @@ -98,8 +101,8 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev); /** * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update() - * @s1: .. - * @s2: .. + * @s: mean and variance number of samples and their sums + * @x: new value to include in the &mean_and_variance_weighted * * see linked pdf: function derived from equations 140-143 where alpha = 2^w. * values are stored bitshifted for performance and added precision. @@ -129,6 +132,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update); /** * mean_and_variance_weighted_get_mean() - get mean from @s + * @s: mean and variance number of samples and their sums */ s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s) { @@ -138,6 +142,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean); /** * mean_and_variance_weighted_get_variance() -- get variance from @s + * @s: mean and variance number of samples and their sums */ u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s) { @@ -148,6 +153,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance); /** * mean_and_variance_weighted_get_stddev() - get standard deviation from @s + * @s: mean and variance number of samples and their sums */ u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s) { From 1ad36a010c698fde9148b8e2abf2303e3d6fd001 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 20 Dec 2023 02:38:10 -0500 Subject: [PATCH 122/226] bcachefs: Use GFP_KERNEL for promote allocations We already have btree locks dropped here - no need for GFP_NOFS. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4c9eaf7cea8d..88aa004eade8 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -174,7 +174,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) return NULL; - op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); + op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_KERNEL); if (!op) goto err; @@ -187,7 +187,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, */ *rbio = kzalloc(sizeof(struct bch_read_bio) + sizeof(struct bio_vec) * pages, - GFP_NOFS); + GFP_KERNEL); if (!*rbio) goto err; @@ -195,7 +195,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, - GFP_NOFS)) + GFP_KERNEL)) goto err; (*rbio)->bounce = true; From cee0a8ea6d4f0c1960d47af2dd4b6f550af39e31 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 20 Dec 2023 16:49:43 -0500 Subject: [PATCH 123/226] bcachefs: Improve the nopromote tracepoint Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/io_read.c | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index d7dc8f644a05..716ff643dc85 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -235,6 +235,7 @@ x(BCH_ERR_nopromote, nopromote_unwritten) \ x(BCH_ERR_nopromote, nopromote_congested) \ x(BCH_ERR_nopromote, nopromote_in_flight) \ + x(BCH_ERR_nopromote, nopromote_no_writes) \ x(BCH_ERR_nopromote, nopromote_enomem) enum bch_errcode { diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 88aa004eade8..5c2d118eaf6f 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -172,11 +172,13 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, int ret; if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) - return NULL; + return ERR_PTR(-BCH_ERR_nopromote_no_writes); op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_KERNEL); - if (!op) + if (!op) { + ret = -BCH_ERR_nopromote_enomem; goto err; + } op->start_time = local_clock(); op->pos = pos; @@ -188,23 +190,28 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, *rbio = kzalloc(sizeof(struct bch_read_bio) + sizeof(struct bio_vec) * pages, GFP_KERNEL); - if (!*rbio) + if (!*rbio) { + ret = -BCH_ERR_nopromote_enomem; goto err; + } rbio_init(&(*rbio)->bio, opts); bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); - if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, - GFP_KERNEL)) + if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) { + ret = -BCH_ERR_nopromote_enomem; goto err; + } (*rbio)->bounce = true; (*rbio)->split = true; (*rbio)->kmalloc = true; if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, - bch_promote_params)) + bch_promote_params)) { + ret = -BCH_ERR_nopromote_in_flight; goto err; + } bio = &op->write.op.wbio.bio; bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); @@ -223,9 +230,8 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, * -BCH_ERR_ENOSPC_disk_reservation: */ if (ret) { - ret = rhashtable_remove_fast(&c->promote_table, &op->hash, - bch_promote_params); - BUG_ON(ret); + BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params)); goto err; } @@ -239,7 +245,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, *rbio = NULL; kfree(op); bch2_write_ref_put(c, BCH_WRITE_REF_promote); - return NULL; + return ERR_PTR(ret); } noinline @@ -274,10 +280,9 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, ? BTREE_ID_reflink : BTREE_ID_extents, k, pos, pick, opts, sectors, rbio); - if (!promote) { - ret = -BCH_ERR_nopromote_enomem; + ret = PTR_ERR_OR_ZERO(promote); + if (ret) goto nopromote; - } *bounce = true; *read_full = promote_full; From 559e6c23367f4c122834e7bf4eeac0a3ac382149 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 21:31:26 -0500 Subject: [PATCH 124/226] bcachefs: trans_for_each_update() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 5 ----- fs/bcachefs/btree_trans_commit.c | 21 +++++---------------- fs/bcachefs/btree_update.c | 2 +- fs/bcachefs/btree_update.h | 4 +--- 4 files changed, 7 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index ed4b8c302a9a..0ffa3cdf8134 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -648,7 +648,6 @@ void bch2_btree_path_level_init(struct btree_trans *trans, static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; - struct btree_insert_entry *i; trans_for_each_update(trans, i) if (!i->cached && @@ -1369,8 +1368,6 @@ void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) noinline __cold void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) { - struct btree_insert_entry *i; - prt_printf(buf, "transaction updates for %s journal seq %llu", trans->fn, trans->journal_res.seq); prt_newline(buf); @@ -1840,7 +1837,6 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) static noinline struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) { - struct btree_insert_entry *i; struct bkey_i *ret = NULL; trans_for_each_update(iter->trans, i) { @@ -2975,7 +2971,6 @@ static void check_btree_paths_leaked(struct btree_trans *trans) void bch2_trans_put(struct btree_trans *trans) __releases(&c->btree_trans_barrier) { - struct btree_insert_entry *i; struct bch_fs *c = trans->c; struct btree_transaction_stats *s = btree_trans_stats(trans); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 2078ab507012..9fe22628a946 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -93,8 +93,6 @@ static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btre static inline int bch2_trans_lock_write(struct btree_trans *trans) { - struct btree_insert_entry *i; - EBUG_ON(trans->write_locked); trans_for_each_update(trans, i) { @@ -115,8 +113,6 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans) static inline void bch2_trans_unlock_write(struct btree_trans *trans) { if (likely(trans->write_locked)) { - struct btree_insert_entry *i; - trans_for_each_update(trans, i) if (!same_leaf_as_prev(trans, i)) bch2_btree_node_unlock_write_inlined(trans, i->path, @@ -361,7 +357,6 @@ noinline static int btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, struct btree_path *path, unsigned new_u64s) { - struct btree_insert_entry *i; struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_i *new_k; int ret; @@ -400,7 +395,6 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; - struct btree_insert_entry *i; unsigned new_u64s; struct bkey_i *new_k; @@ -550,7 +544,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, static int bch2_trans_commit_run_triggers(struct btree_trans *trans) { - struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates; + struct btree_insert_entry *btree_id_start = trans->updates; unsigned btree_id = 0; int ret = 0; @@ -597,7 +591,6 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) { struct bch_fs *c = trans->c; - struct btree_insert_entry *i; int ret = 0; trans_for_each_update(trans, i) { @@ -623,7 +616,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_insert_entry *i; struct btree_trans_commit_hook *h; unsigned u64s = 0; int ret; @@ -775,8 +767,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) { - struct btree_insert_entry *i; - trans_for_each_update(trans, i) bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } @@ -820,7 +810,6 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_insert_entry *i; int ret = 0, u64s_delta = 0; trans_for_each_update(trans, i) { @@ -964,7 +953,6 @@ static noinline int do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) { struct bch_fs *c = trans->c; - struct btree_insert_entry *i; int ret = 0; trans_for_each_update(trans, i) { @@ -978,8 +966,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) { + struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; - struct btree_insert_entry *i = NULL; int ret = 0; if (!trans->nr_updates && @@ -1058,11 +1046,12 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto err; } retry: + errored_at = NULL; bch2_trans_verify_not_in_restart(trans); if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) memset(&trans->journal_res, 0, sizeof(trans->journal_res)); - ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_); + ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_); /* make sure we didn't drop or screw up locks: */ bch2_trans_verify_locks(trans); @@ -1081,7 +1070,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) return ret; err: - ret = bch2_trans_commit_error(trans, flags, i, ret, _RET_IP_); + ret = bch2_trans_commit_error(trans, flags, errored_at, ret, _RET_IP_); if (ret) goto out; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index ecaf8d6332fc..ce3f0af5b1a0 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -408,7 +408,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, * Pending updates are kept sorted: first, find position of new update, * then delete/trim any updates the new update overwrites: */ - trans_for_each_update(trans, i) { + for (i = trans->updates; i < trans->updates + trans->nr_updates; i++) { cmp = btree_insert_entry_cmp(&n, i); if (cmp <= 0) break; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 8aec86ca2d20..fbb83e1aa2b2 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -187,14 +187,12 @@ static inline int bch2_trans_commit(struct btree_trans *trans, bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) #define trans_for_each_update(_trans, _i) \ - for ((_i) = (_trans)->updates; \ + for (struct btree_insert_entry *_i = (_trans)->updates; \ (_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i)++) static inline void bch2_trans_reset_updates(struct btree_trans *trans) { - struct btree_insert_entry *i; - trans_for_each_update(trans, i) bch2_path_put(trans, i->path, true); From defd9e39b59961ea0fd774020fb5c05dba199412 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 21:40:26 -0500 Subject: [PATCH 125/226] bcachefs: darray_for_each() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 2 -- fs/bcachefs/darray.h | 7 +++++-- fs/bcachefs/fs-io-buffered.c | 7 ++----- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 22 ++++------------------ fs/bcachefs/io_write.c | 1 - fs/bcachefs/move.c | 4 +--- fs/bcachefs/movinggc.c | 1 - fs/bcachefs/recovery.c | 1 - fs/bcachefs/snapshot.c | 3 +-- fs/bcachefs/snapshot.h | 4 ---- fs/bcachefs/super.c | 2 +- fs/bcachefs/util.c | 2 -- 13 files changed, 15 insertions(+), 43 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index ed5640c4d1a3..6bb5756b5db7 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -246,7 +246,6 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree_write_buffer *wb = &c->btree_write_buffer; - struct wb_key_ref *i; struct btree_iter iter = { NULL }; size_t skipped = 0, fast = 0, slowpath = 0; bool write_locked = false; @@ -355,7 +354,6 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) */ trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr); - struct btree_write_buffered_key *i; darray_for_each(wb->flushing.keys, i) { if (!i->journal_seq) continue; diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index e367c625f057..c7f153cecde3 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -81,11 +81,14 @@ static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, #define darray_remove_item(_d, _pos) \ array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data) +#define __darray_for_each(_d, _i) \ + for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++) + #define darray_for_each(_d, _i) \ - for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++) + for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) #define darray_for_each_reverse(_d, _i) \ - for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) + for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) #define darray_init(_d) \ do { \ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 637a83e4d961..e48b364db5ae 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -52,14 +52,11 @@ struct readpages_iter { static int readpages_iter_init(struct readpages_iter *iter, struct readahead_control *ractl) { - struct folio **fi; - int ret; - memset(iter, 0, sizeof(*iter)); iter->mapping = ractl->mapping; - ret = bch2_filemap_get_contig_folios_d(iter->mapping, + int ret = bch2_filemap_get_contig_folios_d(iter->mapping, ractl->_index << PAGE_SHIFT, (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, 0, mapping_gfp_mask(iter->mapping), @@ -826,7 +823,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation res; folios fs; - struct folio **fi, *f; + struct folio *f; unsigned copied = 0, f_offset, f_copied; u64 end = pos + len, f_pos, f_len; loff_t last_folio_pos = inode->v.i_size; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4c35df1230b5..943b4e5e4725 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1498,7 +1498,7 @@ static void bch2_evict_inode(struct inode *vinode) void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) { - struct bch_inode_info *inode, **i; + struct bch_inode_info *inode; DARRAY(struct bch_inode_info *) grabbed; bool clean_pass = false, this_pass_clean; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 43801e29bc5a..52438f86530f 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -398,7 +398,7 @@ static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s }; int ret = 0; - darray_for_each(s->ids, i) { + __darray_for_each(s->ids, i) { if (i->id == id) return 0; if (i->id > id) @@ -415,7 +415,7 @@ static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, enum btree_id btree_id, struct bpos pos) { - struct snapshots_seen_entry *i, n = { + struct snapshots_seen_entry n = { .id = pos.snapshot, .equiv = bch2_snapshot_equiv(c, pos.snapshot), }; @@ -616,7 +616,7 @@ lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, snapshot = bch2_snapshot_equiv(c, snapshot); - darray_for_each(w->inodes, i) + __darray_for_each(w->inodes, i) if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot)) goto found; @@ -658,11 +658,8 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans, if (ret) return ERR_PTR(ret); } else if (bkey_cmp(w->last_pos, pos)) { - struct inode_walker_entry *i; - darray_for_each(w->inodes, i) i->seen_this_pos = false; - } w->last_pos = pos; @@ -1032,7 +1029,6 @@ static bool dirent_points_to_inode(struct bkey_s_c_dirent d, static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; - struct inode_walker_entry *i; u32 restart_count = trans->restart_count; int ret = 0; s64 count2; @@ -1081,11 +1077,8 @@ struct extent_ends { static void extent_ends_reset(struct extent_ends *extent_ends) { - struct extent_end *i; - darray_for_each(extent_ends->e, i) snapshots_seen_exit(&i->seen); - extent_ends->e.nr = 0; } @@ -1117,7 +1110,7 @@ static int extent_ends_at(struct bch_fs *c, if (!n.seen.ids.data) return -BCH_ERR_ENOMEM_fsck_extent_ends_at; - darray_for_each(extent_ends->e, i) { + __darray_for_each(extent_ends->e, i) { if (i->snapshot == k.k->p.snapshot) { snapshots_seen_exit(&i->seen); *i = n; @@ -1256,7 +1249,6 @@ static int check_overlapping_extents(struct btree_trans *trans, bool *fixed) { struct bch_fs *c = trans->c; - struct extent_end *i; int ret = 0; /* transaction restart, running again */ @@ -1495,7 +1487,6 @@ int bch2_check_indirect_extents(struct bch_fs *c) static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; - struct inode_walker_entry *i; u32 restart_count = trans->restart_count; int ret = 0; s64 count2; @@ -1992,13 +1983,10 @@ typedef DARRAY(struct pathbuf_entry) pathbuf; static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) { - struct pathbuf_entry *i; - darray_for_each(*p, i) if (i->inum == inum && i->snapshot == snapshot) return true; - return false; } @@ -2092,8 +2080,6 @@ static int check_path(struct btree_trans *trans, } if (path_is_dup(p, inode->bi_inum, snapshot)) { - struct pathbuf_entry *i; - /* XXX print path */ bch_err(c, "directory structure loop"); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 2817f27909b3..d8306c190150 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1230,7 +1230,6 @@ static void bch2_nocow_write(struct bch_write_op *op) struct bkey_ptrs_c ptrs; const struct bch_extent_ptr *ptr; DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; - struct bucket_to_lock *i; u32 snapshot; struct bucket_to_lock *stale_at; int ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index b1b9566c7a61..106d95623cc8 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -400,12 +400,10 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, if (ret) return ERR_PTR(ret); - if (extent_k.k->p.snapshot) { - struct snapshot_io_opts_entry *i; + if (extent_k.k->p.snapshot) darray_for_each(io_opts->d, i) if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) return &i->io_opts; - } return &io_opts->fs_io_opts; } diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 963191a2f4e0..abe925e705a4 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -209,7 +209,6 @@ static int bch2_copygc(struct moving_context *ctxt, }; move_buckets buckets = { 0 }; struct move_bucket_in_flight *f; - struct move_bucket *i; u64 moved = atomic64_read(&ctxt->stats->sectors_moved); int ret = 0; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 3b84c4ab2c4f..d54e8dff99d4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -146,7 +146,6 @@ static int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; DARRAY(struct journal_key *) keys_sorted = { 0 }; - struct journal_key **kp; struct journal *j = &c->journal; u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b0ef415914dc..f5f1d7797533 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1311,7 +1311,6 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, struct bch_fs *c = trans->c; u32 nr_deleted_ancestors = 0; struct bkey_i_snapshot *s; - u32 *i; int ret; if (k.k->type != KEY_TYPE_snapshot) @@ -1369,7 +1368,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) struct bkey_s_c_snapshot snap; snapshot_id_list deleted = { 0 }; snapshot_id_list deleted_interior = { 0 }; - u32 *i, id; + u32 id; int ret = 0; if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index f09a22f44239..94f35b2cfbb3 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -202,8 +202,6 @@ static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) { - u32 *i; - darray_for_each(*s, i) if (*i == id) return true; @@ -212,8 +210,6 @@ static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id) { - u32 *i; - darray_for_each(*s, i) if (bch2_snapshot_is_ancestor(c, id, *i)) return true; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index c04d425c872f..5cc09b4b50af 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1937,7 +1937,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, { DARRAY(struct bch_sb_handle) sbs = { 0 }; struct bch_fs *c = NULL; - struct bch_sb_handle *sb, *best = NULL; + struct bch_sb_handle *best = NULL; struct printbuf errbuf = PRINTBUF; int ret = 0; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index e85b1f467295..00ddd91ddfcc 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -297,8 +297,6 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) { - unsigned long *i; - darray_for_each(*stack, i) { prt_printf(out, "[<0>] %pB", (void *) *i); prt_newline(out); From 037a2d9f48285bf3bfed0f55f1aee04d8b329d07 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 21:02:45 -0500 Subject: [PATCH 126/226] bcachefs: simplify bch_devs_list Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 4 ++-- fs/bcachefs/extents.h | 6 +++--- fs/bcachefs/replicas.c | 6 ++---- fs/bcachefs/sb-members.h | 23 ++++++++++------------- fs/bcachefs/super_types.h | 2 +- 5 files changed, 18 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index d99f38c1e7f9..f41cfd966447 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -970,8 +970,8 @@ static int __open_bucket_add_buckets(struct btree_trans *trans, devs = target_rw_devs(c, wp->data_type, target); /* Don't allocate from devices we already have pointers to: */ - for (i = 0; i < devs_have->nr; i++) - __clear_bit(devs_have->devs[i], devs.d); + darray_for_each(*devs_have, i) + __clear_bit(*i, devs.d); open_bucket_for_each(c, ptrs, ob, i) __clear_bit(ob->dev, devs.d); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index a2ce8a3be13c..4295e93984c4 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -568,7 +568,7 @@ static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) const struct bch_extent_ptr *ptr; bkey_for_each_ptr(p, ptr) - ret.devs[ret.nr++] = ptr->dev; + ret.data[ret.nr++] = ptr->dev; return ret; } @@ -581,7 +581,7 @@ static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) bkey_for_each_ptr(p, ptr) if (!ptr->cached) - ret.devs[ret.nr++] = ptr->dev; + ret.data[ret.nr++] = ptr->dev; return ret; } @@ -594,7 +594,7 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) bkey_for_each_ptr(p, ptr) if (ptr->cached) - ret.devs[ret.nr++] = ptr->dev; + ret.data[ret.nr++] = ptr->dev; return ret; } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index ccb776e045dc..92ba56ef1fc8 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -173,8 +173,6 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, enum bch_data_type data_type, struct bch_devs_list devs) { - unsigned i; - BUG_ON(!data_type || data_type == BCH_DATA_sb || data_type >= BCH_DATA_NR); @@ -183,8 +181,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, e->nr_devs = 0; e->nr_required = 1; - for (i = 0; i < devs.nr; i++) - e->devs[e->nr_devs++] = devs.devs[i]; + darray_for_each(devs, i) + e->devs[e->nr_devs++] = *i; bch2_replicas_entry_sort(e); } diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 03613e3eb8e3..a7e863bc77a9 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_SB_MEMBERS_H #define _BCACHEFS_SB_MEMBERS_H +#include "darray.h" + extern char * const bch2_member_error_strs[]; static inline struct bch_member * @@ -47,23 +49,18 @@ static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs) static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs, unsigned dev) { - unsigned i; - - for (i = 0; i < devs.nr; i++) - if (devs.devs[i] == dev) + darray_for_each(devs, i) + if (*i == dev) return true; - return false; } static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs, unsigned dev) { - unsigned i; - - for (i = 0; i < devs->nr; i++) - if (devs->devs[i] == dev) { - array_remove_item(devs->devs, devs->nr, i); + darray_for_each(*devs, i) + if (*i == dev) { + darray_remove_item(devs, i); return; } } @@ -72,14 +69,14 @@ static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs, unsigned dev) { if (!bch2_dev_list_has_dev(*devs, dev)) { - BUG_ON(devs->nr >= ARRAY_SIZE(devs->devs)); - devs->devs[devs->nr++] = dev; + BUG_ON(devs->nr >= ARRAY_SIZE(devs->data)); + devs->data[devs->nr++] = dev; } } static inline struct bch_devs_list bch2_dev_list_single(unsigned dev) { - return (struct bch_devs_list) { .nr = 1, .devs[0] = dev }; + return (struct bch_devs_list) { .nr = 1, .data[0] = dev }; } static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter, diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 9c1fd4ca2b10..87d159b9b8cd 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -22,7 +22,7 @@ struct bch_devs_mask { struct bch_devs_list { u8 nr; - u8 devs[BCH_BKEY_PTRS_MAX]; + u8 data[BCH_BKEY_PTRS_MAX]; }; struct bch_member_cpu { From 53b67d8dcf47a9d9fef856484a57c7f0b30873fb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 21:09:34 -0500 Subject: [PATCH 127/226] bcachefs: better error message in btree_node_write_work() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 4 +++- fs/bcachefs/errcode.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d99c237c8873..d7cac6a80507 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1798,8 +1798,10 @@ static void btree_node_write_work(struct work_struct *work) bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { + ret = -BCH_ERR_btree_write_all_failed; goto err; + } if (wbio->wbio.first_btree_write) { if (wbio->wbio.failed.nr) { diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 716ff643dc85..9d53b7fd6e84 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -224,6 +224,7 @@ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ x(EIO, sb_not_downgraded) \ + x(EIO, btree_write_all_failed) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ From e34ec13a56a02695458f6296daa9239c70b4cd29 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 21:39:45 -0500 Subject: [PATCH 128/226] bcachefs: add more verbose logging Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 5cc09b4b50af..45355b33933e 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1057,12 +1057,13 @@ int bch2_fs_start(struct bch_fs *c) } ret = 0; -out: +err: + if (ret) + bch_err_msg(c, ret, "starting filesystem"); + else + bch_verbose(c, "done starting filesystem"); up_write(&c->state_lock); return ret; -err: - bch_err_msg(c, ret, "starting filesystem"); - goto out; } static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) From ef23397c301900e9e7050f8245e2a25b5eea1589 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 22:55:05 -0500 Subject: [PATCH 129/226] bcachefs: fix warning about uninitialized time_stats Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 00ddd91ddfcc..d530277f9017 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -447,9 +447,9 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) { unsigned long flags; - WARN_RATELIMIT(!stats->min_duration || !stats->min_freq, - "time_stats: min_duration = %llu, min_freq = %llu", - stats->min_duration, stats->min_freq); + WARN_ONCE(!stats->duration_stats_weighted.weight || + !stats->freq_stats_weighted.weight, + "uninitialized time_stats"); if (!stats->buffer) { spin_lock_irqsave(&stats->lock, flags); From 6d5c606c1cdc4e7c3d2ca83660e9f68827fc04ed Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 10:15:18 -0500 Subject: [PATCH 130/226] bcachefs: use track_event_change() for allocator blocked stats Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index f41cfd966447..990ea7f9bc1b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -239,9 +239,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * if (cl) closure_wait(&c->open_buckets_wait, cl); - if (!c->blocked_allocate_open_bucket) - c->blocked_allocate_open_bucket = local_clock(); - + track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], + &c->blocked_allocate_open_bucket, true); spin_unlock(&c->freelist_lock); return ERR_PTR(-BCH_ERR_open_buckets_empty); } @@ -267,19 +266,11 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * ca->nr_open_buckets++; bch2_open_bucket_hash_add(c, ob); - if (c->blocked_allocate_open_bucket) { - bch2_time_stats_update( - &c->times[BCH_TIME_blocked_allocate_open_bucket], - c->blocked_allocate_open_bucket); - c->blocked_allocate_open_bucket = 0; - } + track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], + &c->blocked_allocate_open_bucket, false); - if (c->blocked_allocate) { - bch2_time_stats_update( - &c->times[BCH_TIME_blocked_allocate], - c->blocked_allocate); - c->blocked_allocate = 0; - } + track_event_change(&c->times[BCH_TIME_blocked_allocate], + &c->blocked_allocate, false); spin_unlock(&c->freelist_lock); return ob; @@ -567,8 +558,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, goto again; } - if (!c->blocked_allocate) - c->blocked_allocate = local_clock(); + track_event_change(&c->times[BCH_TIME_blocked_allocate], + &c->blocked_allocate, true); ob = ERR_PTR(-BCH_ERR_freelist_empty); goto err; From 79904fa2bb540e2ecfaa181d7fef8cb21653b0c0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 18:04:29 -0500 Subject: [PATCH 131/226] bcachefs: bch2_trans_srcu_lock() should be static Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 3 ++- fs/bcachefs/btree_iter.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 0ffa3cdf8134..72da7bedbe50 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -35,6 +35,7 @@ static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter) } static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); +static void bch2_trans_srcu_lock(struct btree_trans *); static inline int __btree_path_cmp(const struct btree_path *l, enum btree_id r_btree_id, @@ -2764,7 +2765,7 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans) } } -void bch2_trans_srcu_lock(struct btree_trans *trans) +static void bch2_trans_srcu_lock(struct btree_trans *trans) { if (!trans->srcu_held) { trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 6bc75bee55c8..c8745d5bdc27 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -565,7 +565,6 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, KEY_TYPE_##_type, sizeof(*_val), _val) void bch2_trans_srcu_unlock(struct btree_trans *); -void bch2_trans_srcu_lock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); From c8ef2dc2fc87369a949c3e379ebfe50fe540fa36 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 03:05:30 -0500 Subject: [PATCH 132/226] bcachefs: bch2_dirent_lookup() -> lockrestart_do() Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 78ce9634d05c..51fe04f45690 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -471,17 +471,11 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct qstr *name, subvol_inum *inum) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - int ret; -retry: - bch2_trans_begin(trans); + struct btree_iter iter = { NULL }; - ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, - name, inum, 0); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - if (!ret) - bch2_trans_iter_exit(trans, &iter); + int ret = lockrestart_do(trans, + __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); + bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); return ret; } From 3a860b5ad5f7fb851d1a9de5cd09b3e6e95362b2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 21:46:23 -0500 Subject: [PATCH 133/226] bcachefs: for_each_btree_key_upto() -> for_each_btree_key_old_upto() And for_each_btree_key2_upto -> for_each_btree_key_upto Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/btree_iter.h | 12 ++++++------ fs/bcachefs/fsck.c | 16 ++++++++-------- fs/bcachefs/movinggc.c | 2 +- fs/bcachefs/tests.c | 14 +++++++------- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 006ac48ec102..0aa1a8c6301c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1818,7 +1818,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, lru_pos(ca->dev_idx, 0, 0), lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), BTREE_ITER_INTENT, k, diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c8745d5bdc27..8e0e579400b5 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -701,8 +701,8 @@ transaction_restart: \ _ret2 ?: trans_was_restarted(_trans, _restart_count); \ }) -#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _do) \ +#define for_each_btree_key_upto(_trans, _iter, _btree_id, \ + _start, _end, _flags, _k, _do) \ ({ \ int _ret3 = 0; \ \ @@ -726,7 +726,7 @@ transaction_restart: \ #define for_each_btree_key(_trans, _iter, _btree_id, \ _start, _flags, _k, _do) \ - for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, \ + for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \ SPOS_MAX, _flags, _k, _do) #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ @@ -772,11 +772,11 @@ transaction_restart: \ _start, _end, _iter_flags, _k, \ _disk_res, _journal_seq, _commit_flags,\ _do) \ - for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ + for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) -#define for_each_btree_key_old(_trans, _iter, _btree_id, \ +#define for_each_btree_key_old(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ @@ -784,7 +784,7 @@ transaction_restart: \ !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) -#define for_each_btree_key_upto(_trans, _iter, _btree_id, \ +#define for_each_btree_key_old_upto(_trans, _iter, _btree_id, \ _start, _end, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 52438f86530f..c3c0b9c1dba2 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -32,10 +32,10 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, u64 sectors = 0; int ret; - for_each_btree_key_upto(trans, iter, BTREE_ID_extents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ret) + for_each_btree_key_old_upto(trans, iter, BTREE_ID_extents, + SPOS(inum, 0, snapshot), + POS(inum, U64_MAX), + 0, k, ret) if (bkey_extent_is_allocation(k.k)) sectors += k.k->size; @@ -53,10 +53,10 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, u64 subdirs = 0; int ret; - for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ret) { + for_each_btree_key_old_upto(trans, iter, BTREE_ID_dirents, + SPOS(inum, 0, snapshot), + POS(inum, U64_MAX), + 0, k, ret) { if (k.k->type != KEY_TYPE_dirent) continue; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index abe925e705a4..4aa8ef583037 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -161,7 +161,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, __func__, bch2_err_str(ret))) return ret; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), 0, k, ({ diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 2fc9e60c754b..7ffb58b80d3d 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -134,7 +134,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i++); @@ -195,7 +195,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i); @@ -257,7 +257,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); @@ -274,7 +274,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), BTREE_ITER_SLOTS, k, ({ if (i >= nr * 2) @@ -326,7 +326,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i + 8); @@ -344,7 +344,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), BTREE_ITER_SLOTS, k, ({ if (i == nr) @@ -760,7 +760,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) struct bkey_s_c k; return bch2_trans_run(c, - for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, + for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, 0)); From 44ddd8ad1e0be697cf090af669765b3a8b8965dc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 21:51:34 -0500 Subject: [PATCH 134/226] bcachefs: kill for_each_btree_key_old_upto() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 9 --------- fs/bcachefs/fsck.c | 30 ++++++++++++------------------ 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 8e0e579400b5..f9f6ef9e178f 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -784,15 +784,6 @@ transaction_restart: \ !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) -#define for_each_btree_key_old_upto(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _ret) \ - for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - (_k) = __bch2_btree_iter_peek_upto_and_restart((_trans), \ - &(_iter), _end, _flags),\ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - #define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index c3c0b9c1dba2..3e66f244a67e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -30,16 +30,15 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, struct btree_iter iter; struct bkey_s_c k; u64 sectors = 0; - int ret; - for_each_btree_key_old_upto(trans, iter, BTREE_ID_extents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ret) + int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, + SPOS(inum, 0, snapshot), + POS(inum, U64_MAX), + 0, k, ({ if (bkey_extent_is_allocation(k.k)) sectors += k.k->size; - - bch2_trans_iter_exit(trans, &iter); + 0; + })); return ret ?: sectors; } @@ -49,22 +48,17 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, { struct btree_iter iter; struct bkey_s_c k; - struct bkey_s_c_dirent d; u64 subdirs = 0; - int ret; - for_each_btree_key_old_upto(trans, iter, BTREE_ID_dirents, + int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, SPOS(inum, 0, snapshot), POS(inum, U64_MAX), - 0, k, ret) { - if (k.k->type != KEY_TYPE_dirent) - continue; - - d = bkey_s_c_to_dirent(k); - if (d.v->d_type == DT_DIR) + 0, k, ({ + if (k.k->type == KEY_TYPE_dirent && + bkey_s_c_to_dirent(k).v->d_type == DT_DIR) subdirs++; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); return ret ?: subdirs; } From c47e8bfbb76991c7ea5feae4b21539c36d1200be Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 21:55:12 -0500 Subject: [PATCH 135/226] bcachefs: kill for_each_btree_key_norestart() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index f9f6ef9e178f..ac96f8570169 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -784,14 +784,6 @@ transaction_restart: \ !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) -#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ - _start, _flags, _k, _ret) \ - for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - (_k) = bch2_btree_iter_peek_type(&(_iter), _flags), \ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - #define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ _start, _end, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ @@ -800,24 +792,20 @@ transaction_restart: \ !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) -#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \ - for (; \ - (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - -#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ - for (; \ - (_k) = bch2_btree_iter_peek_type(&(_iter), _flags), \ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - #define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\ for (; \ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \ !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) +#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ + _start, _flags, _k, _ret) \ + for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\ + SPOS_MAX, _flags, _k, _ret) + +#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ + for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) + #define drop_locks_do(_trans, _do) \ ({ \ bch2_trans_unlock(_trans); \ From 80eab7a7c2808f84e56ba1f2f1408a16c46d3bdc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 22:30:09 -0500 Subject: [PATCH 136/226] bcachefs: for_each_btree_key() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 36 ++----- fs/bcachefs/backpointers.c | 5 - fs/bcachefs/btree_gc.c | 57 +++------- fs/bcachefs/btree_iter.h | 4 + fs/bcachefs/debug.c | 87 ++++++--------- fs/bcachefs/ec.c | 15 +-- fs/bcachefs/fsck.c | 39 +------ fs/bcachefs/inode.c | 3 - fs/bcachefs/io_write.c | 2 - fs/bcachefs/logged_ops.c | 6 +- fs/bcachefs/lru.c | 6 +- fs/bcachefs/migrate.c | 2 - fs/bcachefs/move.c | 3 - fs/bcachefs/movinggc.c | 2 - fs/bcachefs/quota.c | 25 ++--- fs/bcachefs/snapshot.c | 24 +---- fs/bcachefs/subvolume.c | 8 +- fs/bcachefs/sysfs.c | 2 - fs/bcachefs/tests.c | 191 +++++++++++++-------------------- 19 files changed, 159 insertions(+), 358 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 0aa1a8c6301c..2141bea4d941 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -534,14 +534,8 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke int bch2_bucket_gens_init(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; - struct bch_alloc_v4 a; struct bkey_i_bucket_gens g; bool have_bucket_gens_key = false; - unsigned offset; - struct bpos pos; - u8 gen; int ret; ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, @@ -553,8 +547,10 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (!bch2_dev_bucket_exists(c, k.k->p)) continue; - gen = bch2_alloc_to_v4(k, &a)->gen; - pos = alloc_gens_pos(iter.pos, &offset); + struct bch_alloc_v4 a; + u8 gen = bch2_alloc_to_v4(k, &a)->gen; + unsigned offset; + struct bpos pos = alloc_gens_pos(iter.pos, &offset); if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { ret = commit_do(trans, NULL, NULL, @@ -589,17 +585,11 @@ int bch2_bucket_gens_init(struct bch_fs *c) int bch2_alloc_read(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; - struct bch_dev *ca; int ret; down_read(&c->gc_lock); if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) { - const struct bch_bucket_gens *g; - u64 b; - ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, ({ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; @@ -608,7 +598,7 @@ int bch2_alloc_read(struct bch_fs *c) if (k.k->type != KEY_TYPE_bucket_gens) continue; - g = bkey_s_c_to_bucket_gens(k).v; + const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v; /* * Not a fsck error because this is checked/repaired by @@ -617,17 +607,15 @@ int bch2_alloc_read(struct bch_fs *c) if (!bch2_dev_exists2(c, k.k->p.inode)) continue; - ca = bch_dev_bkey_exists(c, k.k->p.inode); + struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode); - for (b = max_t(u64, ca->mi.first_bucket, start); + for (u64 b = max_t(u64, ca->mi.first_bucket, start); b < min_t(u64, ca->mi.nbuckets, end); b++) *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; 0; })); } else { - struct bch_alloc_v4 a; - ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ({ /* @@ -637,8 +625,9 @@ int bch2_alloc_read(struct bch_fs *c) if (!bch2_dev_bucket_exists(c, k.k->p)) continue; - ca = bch_dev_bkey_exists(c, k.k->p.inode); + struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode); + struct bch_alloc_v4 a; *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; 0; })); @@ -1549,9 +1538,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, @@ -1680,8 +1666,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, static void bch2_do_discards_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, discard_work); - struct btree_iter iter; - struct bkey_s_c k; u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; struct bpos discard_pos_done = POS_MAX; int ret; @@ -1805,8 +1789,6 @@ static void bch2_do_invalidates_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); struct bch_dev *ca; struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; unsigned i; int ret = 0; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index b0a933d04337..a97fc2b61ee2 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -391,9 +391,6 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ /* verify that every backpointer has a corresponding alloc key */ int bch2_check_btree_backpointers(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, @@ -821,8 +818,6 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, struct bbpos start, struct bbpos end) { - struct btree_iter iter; - struct bkey_s_c k; struct bpos last_flushed_pos = SPOS_MAX; return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index f0951c17a27a..d5c5cc4282db 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1479,8 +1479,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; struct bch_dev *ca; unsigned i; int ret = 0; @@ -1507,8 +1505,6 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) { struct bch_dev *ca; struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; struct bucket *g; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; @@ -1632,43 +1628,31 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) { - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; size_t idx = 0; - int ret = 0; if (metadata_only) return 0; - trans = bch2_trans_get(c); - - ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_reflink, POS_MIN, - BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_gc_write_reflink_key(trans, &iter, k, &idx)); - + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, + BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_gc_write_reflink_key(trans, &iter, k, &idx))); c->reflink_gc_nr = 0; - bch2_trans_put(trans); - bch_err_fn(c, ret); return ret; } static int bch2_gc_reflink_start(struct bch_fs *c, bool metadata_only) { - struct btree_iter iter; - struct bkey_s_c k; - struct reflink_gc *r; - int ret = 0; if (metadata_only) return 0; c->reflink_gc_nr = 0; - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_PREFETCH, k, ({ const __le64 *refcount = bkey_refcount_c(k); @@ -1676,8 +1660,8 @@ static int bch2_gc_reflink_start(struct bch_fs *c, if (!refcount) continue; - r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, - GFP_KERNEL); + struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, + c->reflink_gc_nr++, GFP_KERNEL); if (!r) { ret = -BCH_ERR_ENOMEM_gc_reflink_start; break; @@ -1757,24 +1741,15 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) { - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - if (metadata_only) return 0; - trans = bch2_trans_get(c); - - ret = for_each_btree_key_commit(trans, iter, - BTREE_ID_stripes, POS_MIN, - BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_gc_write_stripes_key(trans, &iter, k)); - - bch2_trans_put(trans); - return ret; + return bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, + BTREE_ID_stripes, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_gc_write_stripes_key(trans, &iter, k))); } static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only) @@ -1958,8 +1933,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i int bch2_gc_gens(struct bch_fs *c) { struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; struct bch_dev *ca; u64 b, start_time = local_clock(); unsigned i; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index ac96f8570169..4a16214b1e6b 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -704,6 +704,8 @@ transaction_restart: \ #define for_each_btree_key_upto(_trans, _iter, _btree_id, \ _start, _end, _flags, _k, _do) \ ({ \ + struct btree_iter _iter; \ + struct bkey_s_c _k; \ int _ret3 = 0; \ \ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ @@ -732,6 +734,8 @@ transaction_restart: \ #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ _start, _flags, _k, _do) \ ({ \ + struct btree_iter _iter; \ + struct bkey_s_c _k; \ int _ret3 = 0; \ \ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 97a699fa68d5..76e43318fc5f 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -366,36 +366,23 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); - if (ret) - return ret; - - trans = bch2_trans_get(i->c); - ret = for_each_btree_key(trans, iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ({ - bch2_bkey_val_to_text(&i->buf, i->c, k); - prt_newline(&i->buf); - bch2_trans_unlock(trans); - flush_buf(i); - })); - i->from = iter.pos; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); - - return ret ?: i->ret; + return flush_buf(i) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + bch2_bkey_val_to_text(&i->buf, i->c, k); + prt_newline(&i->buf); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + flush_buf(i); + }))) ?: + i->ret; } static const struct file_operations btree_debug_ops = { @@ -463,45 +450,31 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); - if (ret) - return ret; + return flush_buf(i) ?: + bch2_trans_run(i->c, + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + struct btree_path_level *l = &iter.path->l[0]; + struct bkey_packed *_k = + bch2_btree_node_iter_peek(&l->iter, l->b); - trans = bch2_trans_get(i->c); + if (bpos_gt(l->b->key.k.p, i->prev_node)) { + bch2_btree_node_to_text(&i->buf, i->c, l->b); + i->prev_node = l->b->key.k.p; + } - ret = for_each_btree_key(trans, iter, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ({ - struct btree_path_level *l = &iter.path->l[0]; - struct bkey_packed *_k = - bch2_btree_node_iter_peek(&l->iter, l->b); - - if (bpos_gt(l->b->key.k.p, i->prev_node)) { - bch2_btree_node_to_text(&i->buf, i->c, l->b); - i->prev_node = l->b->key.k.p; - } - - bch2_bfloat_to_text(&i->buf, l->b, _k); - bch2_trans_unlock(trans); - flush_buf(i); - })); - i->from = iter.pos; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); - - return ret ?: i->ret; + bch2_bfloat_to_text(&i->buf, l->b, _k); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + flush_buf(i); + }))) ?: + i->ret; } static const struct file_operations bfloat_failed_debug_ops = { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 6cbc1d8a3ab5..fc1563f6273f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1826,14 +1826,7 @@ void bch2_fs_ec_flush(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - const struct bch_stripe *s; - struct stripe *m; - unsigned i; - int ret; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_PREFETCH, k, ({ if (k.k->type != KEY_TYPE_stripe) @@ -1843,16 +1836,16 @@ int bch2_stripes_read(struct bch_fs *c) if (ret) break; - s = bkey_s_c_to_stripe(k).v; + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - m = genradix_ptr(&c->stripes, k.k->p.offset); + struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset); m->sectors = le16_to_cpu(s->sectors); m->algorithm = s->algorithm; m->nr_blocks = s->nr_blocks; m->nr_redundant = s->nr_redundant; m->blocks_nonempty = 0; - for (i = 0; i < s->nr_blocks; i++) + for (unsigned i = 0; i < s->nr_blocks; i++) m->blocks_nonempty += !!stripe_blockcount_get(s, i); bch2_stripes_heap_insert(c, m, k.k->p.offset); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 3e66f244a67e..a66b0a8203ac 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -27,8 +27,6 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, u32 snapshot) { - struct btree_iter iter; - struct bkey_s_c k; u64 sectors = 0; int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, @@ -46,8 +44,6 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, u32 snapshot) { - struct btree_iter iter; - struct bkey_s_c k; u64 subdirs = 0; int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, @@ -978,10 +974,8 @@ int bch2_check_inodes(struct bch_fs *c) { bool full = c->opts.fsck; struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; struct bch_inode_unpacked prev = { 0 }; struct snapshots_seen s; - struct bkey_s_c k; int ret; snapshots_seen_init(&s); @@ -1424,8 +1418,6 @@ int bch2_check_extents(struct bch_fs *c) struct inode_walker w = inode_walker_init(); struct snapshots_seen s; struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; struct extent_ends extent_ends; struct disk_reservation res = { 0 }; int ret = 0; @@ -1457,8 +1449,6 @@ int bch2_check_extents(struct bch_fs *c) int bch2_check_indirect_extents(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; struct disk_reservation res = { 0 }; int ret = 0; @@ -1827,8 +1817,6 @@ int bch2_check_dirents(struct bch_fs *c) struct snapshots_seen s; struct bch_hash_info hash_info; struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; int ret = 0; snapshots_seen_init(&s); @@ -1892,8 +1880,6 @@ int bch2_check_xattrs(struct bch_fs *c) { struct inode_walker inode = inode_walker_init(); struct bch_hash_info hash_info; - struct btree_iter iter; - struct bkey_s_c k; int ret = 0; ret = bch2_trans_run(c, @@ -2220,10 +2206,6 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct nlink_table *t, u64 start, u64 *end) { - struct btree_iter iter; - struct bkey_s_c k; - struct bch_inode_unpacked u; - int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start), @@ -2234,6 +2216,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, continue; /* Should never fail, checked by bch2_inode_invalid: */ + struct bch_inode_unpacked u; BUG_ON(bch2_inode_unpack(k, &u)); /* @@ -2264,9 +2247,6 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links u64 range_start, u64 range_end) { struct snapshots_seen s; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_c_dirent d; snapshots_seen_init(&s); @@ -2279,16 +2259,14 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links if (ret) break; - switch (k.k->type) { - case KEY_TYPE_dirent: - d = bkey_s_c_to_dirent(k); + if (k.k->type == KEY_TYPE_dirent) { + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); if (d.v->d_type != DT_DIR && d.v->d_type != DT_SUBVOL) inc_link(c, &s, links, range_start, range_end, le64_to_cpu(d.v->d_inum), bch2_snapshot_equiv(c, d.k->p.snapshot)); - break; } 0; }))); @@ -2346,12 +2324,9 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct btree_iter iter; - struct bkey_s_c k; size_t idx = 0; - int ret = 0; - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS(0, range_start), BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, @@ -2427,14 +2402,10 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, int bch2_fix_reflink_p(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret; - if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) return 0; - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_extents, POS_MIN, BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index e75c6bc2bc28..c39844b8e596 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1155,8 +1155,6 @@ static int may_delete_deleted_inode(struct btree_trans *trans, int bch2_delete_dead_inodes(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; bool need_another_pass; int ret; again: @@ -1200,6 +1198,5 @@ int bch2_delete_dead_inodes(struct bch_fs *c) } err: bch2_trans_put(trans); - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d8306c190150..0af39b562666 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1166,9 +1166,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) { struct bch_fs *c = op->c; struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; struct bkey_i *orig; - struct bkey_s_c k; int ret; for_each_keylist_key(&op->insert_keys, orig) { diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 80fcea641e77..ad598105c587 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -54,11 +54,7 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, int bch2_resume_logged_ops(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k, diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 4de1bc242660..7a4ca5a28b3e 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -147,12 +147,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, int bch2_check_lrus(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; struct bpos last_flushed_pos = POS_MIN; - int ret = 0; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index a2d998dbcc88..5623cee3ef86 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -79,8 +79,6 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; enum btree_id id; int ret = 0; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 106d95623cc8..6cb38484fa77 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -372,9 +372,6 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, int ret = 0; if (io_opts->cur_inum != extent_k.k->p.inode) { - struct btree_iter iter; - struct bkey_s_c k; - io_opts->d.nr = 0; ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 4aa8ef583037..8aa212685b3f 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -145,8 +145,6 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4); size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; int ret; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a33f370e1fc7..e68b34eab90a 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -599,14 +599,9 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, int bch2_fs_quota_read(struct bch_fs *c) { - struct bch_sb_field_quota *sb_quota; - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - int ret; mutex_lock(&c->sb_lock); - sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); + struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); if (!sb_quota) { mutex_unlock(&c->sb_lock); return -BCH_ERR_ENOSPC_sb_quota; @@ -615,17 +610,13 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_sb_quota_read(c); mutex_unlock(&c->sb_lock); - trans = bch2_trans_get(c); - - ret = for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, - BTREE_ITER_PREFETCH, k, - __bch2_quota_set(c, k, NULL)) ?: - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - bch2_fs_quota_read_inode(trans, &iter, k)); - - bch2_trans_put(trans); - + int ret = bch2_trans_run(c, + for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, + BTREE_ITER_PREFETCH, k, + __bch2_quota_set(c, k, NULL)) ?: + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + bch2_fs_quota_read_inode(trans, &iter, k))); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index f5f1d7797533..96df4052ff7b 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -581,11 +581,7 @@ static int check_snapshot_tree(struct btree_trans *trans, */ int bch2_check_snapshot_trees(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_PREFETCH, k, @@ -853,15 +849,11 @@ static int check_snapshot(struct btree_trans *trans, int bch2_check_snapshots(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret; - /* * We iterate backwards as checking/fixing the depth field requires that * the parent's depth already be correct: */ - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_PREFETCH, k, @@ -1363,9 +1355,6 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, int bch2_delete_dead_snapshots(struct bch_fs *c) { struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_c_snapshot snap; snapshot_id_list deleted = { 0 }; snapshot_id_list deleted_interior = { 0 }; u32 id; @@ -1407,8 +1396,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (k.k->type != KEY_TYPE_snapshot) continue; - snap = bkey_s_c_to_snapshot(k); - BCH_SNAPSHOT_DELETED(snap.v) + BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v) ? snapshot_list_add(c, &deleted, k.k->p.offset) : 0; })); @@ -1673,11 +1661,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct int bch2_snapshots_read(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index be0de4f030a4..7c67c28d3ef8 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -79,11 +79,7 @@ static int check_subvol(struct btree_trans *trans, int bch2_check_subvols(struct bch_fs *c) { - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, @@ -224,8 +220,6 @@ static int bch2_subvolume_reparent(struct btree_trans *trans, */ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) { - struct btree_iter iter; - struct bkey_s_c k; struct bch_subvolume s; return lockrestart_do(trans, diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e818ca7a1b70..8ed52319ff68 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -256,8 +256,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; enum btree_id id; struct compression_type_stats { u64 nr_extents; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 7ffb58b80d3d..b3fe9fc57747 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -107,9 +107,6 @@ static int test_delete_written(struct bch_fs *c, u64 nr) static int test_iterate(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; - struct bkey_s_c k; u64 i; int ret = 0; @@ -127,49 +124,43 @@ static int test_iterate(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0); bch_err_msg(c, ret, "insert error"); if (ret) - goto err; + return ret; } pr_info("iterating forwards"); - i = 0; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(k.k->p.offset != i++); - 0; - })); + ret = bch2_trans_run(c, + for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(k.k->p.offset != i++); + 0; + }))); bch_err_msg(c, ret, "error iterating forwards"); if (ret) - goto err; + return ret; BUG_ON(i != nr); pr_info("iterating backwards"); - ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, - SPOS(0, U64_MAX, U32_MAX), 0, k, - ({ + ret = bch2_trans_run(c, + for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, + SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != --i); 0; - })); + }))); bch_err_msg(c, ret, "error iterating backwards"); if (ret) - goto err; + return ret; BUG_ON(i); -err: - bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); - return ret; + return 0; } static int test_iterate_extents(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; - struct bkey_s_c k; u64 i; int ret = 0; @@ -188,51 +179,45 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0); bch_err_msg(c, ret, "insert error"); if (ret) - goto err; + return ret; } pr_info("iterating forwards"); - i = 0; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i); - i = k.k->p.offset; - 0; - })); + ret = bch2_trans_run(c, + for_each_btree_key_upto(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(bkey_start_offset(k.k) != i); + i = k.k->p.offset; + 0; + }))); bch_err_msg(c, ret, "error iterating forwards"); if (ret) - goto err; + return ret; BUG_ON(i != nr); pr_info("iterating backwards"); - ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, - SPOS(0, U64_MAX, U32_MAX), 0, k, - ({ + ret = bch2_trans_run(c, + for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, + SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); 0; - })); + }))); bch_err_msg(c, ret, "error iterating backwards"); if (ret) - goto err; + return ret; BUG_ON(i); -err: - bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); - return ret; + return 0; } static int test_iterate_slots(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; - struct bkey_s_c k; u64 i; int ret = 0; @@ -250,57 +235,48 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0); bch_err_msg(c, ret, "insert error"); if (ret) - goto err; + return ret; } pr_info("iterating forwards"); - i = 0; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(k.k->p.offset != i); - i += 2; - 0; - })); + ret = bch2_trans_run(c, + for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(k.k->p.offset != i); + i += 2; + 0; + }))); bch_err_msg(c, ret, "error iterating forwards"); if (ret) - goto err; + return ret; BUG_ON(i != nr * 2); pr_info("iterating forwards by slots"); - i = 0; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_SLOTS, k, ({ - if (i >= nr * 2) - break; + ret = bch2_trans_run(c, + for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_SLOTS, k, ({ + if (i >= nr * 2) + break; - BUG_ON(k.k->p.offset != i); - BUG_ON(bkey_deleted(k.k) != (i & 1)); + BUG_ON(k.k->p.offset != i); + BUG_ON(bkey_deleted(k.k) != (i & 1)); - i++; - 0; - })); - if (ret < 0) { - bch_err_msg(c, ret, "error iterating forwards by slots"); - goto err; - } - ret = 0; -err: - bch2_trans_put(trans); + i++; + 0; + }))); + bch_err_msg(c, ret, "error iterating forwards by slots"); return ret; } static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; - struct bkey_s_c k; u64 i; int ret = 0; @@ -319,50 +295,45 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0); bch_err_msg(c, ret, "insert error"); if (ret) - goto err; + return ret; } pr_info("iterating forwards"); - i = 0; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i + 8); - BUG_ON(k.k->size != 8); - i += 16; - 0; - })); + ret = bch2_trans_run(c, + for_each_btree_key_upto(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ + BUG_ON(bkey_start_offset(k.k) != i + 8); + BUG_ON(k.k->size != 8); + i += 16; + 0; + }))); bch_err_msg(c, ret, "error iterating forwards"); if (ret) - goto err; + return ret; BUG_ON(i != nr); pr_info("iterating forwards by slots"); - i = 0; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_SLOTS, k, ({ - if (i == nr) - break; - BUG_ON(bkey_deleted(k.k) != !(i % 16)); + ret = bch2_trans_run(c, + for_each_btree_key_upto(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_SLOTS, k, ({ + if (i == nr) + break; + BUG_ON(bkey_deleted(k.k) != !(i % 16)); - BUG_ON(bkey_start_offset(k.k) != i); - BUG_ON(k.k->size != 8); - i = k.k->p.offset; - 0; - })); + BUG_ON(bkey_start_offset(k.k) != i); + BUG_ON(k.k->size != 8); + i = k.k->p.offset; + 0; + }))); bch_err_msg(c, ret, "error iterating forwards by slots"); - if (ret) - goto err; - ret = 0; -err: - bch2_trans_put(trans); - return 0; + return ret; } /* @@ -736,8 +707,6 @@ static int rand_delete(struct bch_fs *c, u64 nr) static int seq_insert(struct bch_fs *c, u64 nr) { - struct btree_iter iter; - struct bkey_s_c k; struct bkey_i_cookie insert; bkey_cookie_init(&insert.k_i); @@ -756,9 +725,6 @@ static int seq_insert(struct bch_fs *c, u64 nr) static int seq_lookup(struct bch_fs *c, u64 nr) { - struct btree_iter iter; - struct bkey_s_c k; - return bch2_trans_run(c, for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), @@ -768,9 +734,6 @@ static int seq_lookup(struct bch_fs *c, u64 nr) static int seq_overwrite(struct bch_fs *c, u64 nr) { - struct btree_iter iter; - struct bkey_s_c k; - return bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), From 9fea2274f783a39ba54727571e5e669c947ddd39 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 16 Dec 2023 23:47:29 -0500 Subject: [PATCH 137/226] bcachefs: for_each_member_device() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 18 +--- fs/bcachefs/btree_gc.c | 161 +++++++++++++-------------------- fs/bcachefs/buckets.c | 19 ++-- fs/bcachefs/chardev.c | 6 +- fs/bcachefs/fs.c | 11 +-- fs/bcachefs/journal.c | 5 +- fs/bcachefs/journal_io.c | 13 +-- fs/bcachefs/journal_reclaim.c | 8 +- fs/bcachefs/movinggc.c | 7 +- fs/bcachefs/recovery.c | 10 +- fs/bcachefs/sb-clean.c | 17 ++-- fs/bcachefs/sb-members.h | 56 ++++++------ fs/bcachefs/super-io.c | 27 +++--- fs/bcachefs/super.c | 35 +++---- 14 files changed, 154 insertions(+), 239 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 2141bea4d941..24a46e02f344 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1787,16 +1787,14 @@ static int invalidate_one_bucket(struct btree_trans *trans, static void bch2_do_invalidates_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); - struct bch_dev *ca; struct btree_trans *trans = bch2_trans_get(c); - unsigned i; int ret = 0; ret = bch2_btree_write_buffer_tryflush(trans); if (ret) goto err; - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); @@ -1925,8 +1923,6 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, int bch2_fs_freespace_init(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; int ret = 0; bool doing_init = false; @@ -1935,7 +1931,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) * every mount: */ - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { if (ca->mi.freespace_initialized) continue; @@ -1995,15 +1991,13 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, void bch2_recalc_capacity(struct bch_fs *c) { - struct bch_dev *ca; u64 capacity = 0, reserved_sectors = 0, gc_reserve; unsigned bucket_size_max = 0; unsigned long ra_pages = 0; - unsigned i; lockdep_assert_held(&c->state_lock); - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi; ra_pages += bdi->ra_pages; @@ -2011,7 +2005,7 @@ void bch2_recalc_capacity(struct bch_fs *c) bch2_set_ra_pages(c, ra_pages); - for_each_rw_member(ca, c, i) { + for_each_rw_member(c, ca) { u64 dev_reserve = 0; /* @@ -2067,11 +2061,9 @@ void bch2_recalc_capacity(struct bch_fs *c) u64 bch2_min_rw_member_capacity(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; u64 ret = U64_MAX; - for_each_rw_member(ca, c, i) + for_each_rw_member(c, ca) ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size); return ret; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index d5c5cc4282db..ea1d0ed70f50 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1153,13 +1153,10 @@ static void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, static void bch2_mark_superblocks(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_SB)); - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC); mutex_unlock(&c->sb_lock); } @@ -1184,13 +1181,10 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) static void bch2_gc_free(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - genradix_free(&c->reflink_gc_table); genradix_free(&c->gc_stripes); - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { kvpfree(rcu_dereference_protected(ca->buckets_gc, 1), sizeof(struct bucket_array) + ca->mi.nbuckets * sizeof(struct bucket)); @@ -1212,7 +1206,7 @@ static int bch2_gc_done(struct bch_fs *c, bool verify = !metadata_only && !c->opts.reconstruct_alloc && (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info))); - unsigned i, dev; + unsigned i; int ret = 0; percpu_down_write(&c->mark_lock); @@ -1224,14 +1218,14 @@ static int bch2_gc_done(struct bch_fs *c, , ##__VA_ARGS__, dst->_f, src->_f))) \ dst->_f = src->_f #define copy_dev_field(_err, _f, _msg, ...) \ - copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) + copy_field(_err, _f, "dev %u has wrong " _msg, ca->dev_idx, ##__VA_ARGS__) #define copy_fs_field(_err, _f, _msg, ...) \ copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__) for (i = 0; i < ARRAY_SIZE(c->usage); i++) bch2_fs_usage_acc_to_base(c, i); - for_each_member_device(ca, c, dev) { + __for_each_member_device(c, ca) { struct bch_dev_usage *dst = ca->usage_base; struct bch_dev_usage *src = (void *) bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc, @@ -1307,9 +1301,6 @@ static int bch2_gc_done(struct bch_fs *c, static int bch2_gc_start(struct bch_fs *c) { - struct bch_dev *ca = NULL; - unsigned i; - BUG_ON(c->usage_gc); c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), @@ -1319,7 +1310,7 @@ static int bch2_gc_start(struct bch_fs *c) return -BCH_ERR_ENOMEM_gc_start; } - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { BUG_ON(ca->usage_gc); ca->usage_gc = alloc_percpu(struct bch_dev_usage); @@ -1338,10 +1329,7 @@ static int bch2_gc_start(struct bch_fs *c) static int bch2_gc_reset(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { free_percpu(ca->usage_gc); ca->usage_gc = NULL; } @@ -1379,9 +1367,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, enum bch_data_type type; int ret; - if (bkey_ge(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets))) - return 1; - old = bch2_alloc_to_v4(k, &old_convert); new = *old; @@ -1478,48 +1463,36 @@ static int bch2_alloc_write_key(struct btree_trans *trans, static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) { - struct btree_trans *trans = bch2_trans_get(c); - struct bch_dev *ca; - unsigned i; int ret = 0; - for_each_member_device(ca, c, i) { - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, - bch2_alloc_write_key(trans, &iter, k, metadata_only)); - - if (ret < 0) { - bch_err_fn(c, ret); + for_each_member_device(c, ca) { + ret = bch2_trans_run(c, + for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc, + POS(ca->dev_idx, ca->mi.first_bucket), + POS(ca->dev_idx, ca->mi.nbuckets - 1), + BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, + bch2_alloc_write_key(trans, &iter, k, metadata_only))); + if (ret) { percpu_ref_put(&ca->ref); break; } } - bch2_trans_put(trans); - return ret < 0 ? ret : 0; + bch_err_fn(c, ret); + return ret; } static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) { - struct bch_dev *ca; - struct btree_trans *trans = bch2_trans_get(c); - struct bucket *g; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - unsigned i; - int ret; - - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) + ca->mi.nbuckets * sizeof(struct bucket), GFP_KERNEL|__GFP_ZERO); if (!buckets) { percpu_ref_put(&ca->ref); bch_err(c, "error allocating ca->buckets[gc]"); - ret = -BCH_ERR_ENOMEM_gc_alloc_start; - goto err; + return -BCH_ERR_ENOMEM_gc_alloc_start; } buckets->first_bucket = ca->mi.first_bucket; @@ -1527,41 +1500,38 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) rcu_assign_pointer(ca->buckets_gc, buckets); } - ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ - ca = bch_dev_bkey_exists(c, k.k->p.inode); - g = gc_bucket(ca, k.k->p.offset); + int ret = bch2_trans_run(c, + for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ + struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode); + struct bucket *g = gc_bucket(ca, k.k->p.offset); - a = bch2_alloc_to_v4(k, &a_convert); + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); - g->gen_valid = 1; - g->gen = a->gen; + g->gen_valid = 1; + g->gen = a->gen; - if (metadata_only && - (a->data_type == BCH_DATA_user || - a->data_type == BCH_DATA_cached || - a->data_type == BCH_DATA_parity)) { - g->data_type = a->data_type; - g->dirty_sectors = a->dirty_sectors; - g->cached_sectors = a->cached_sectors; - g->stripe = a->stripe; - g->stripe_redundancy = a->stripe_redundancy; - } + if (metadata_only && + (a->data_type == BCH_DATA_user || + a->data_type == BCH_DATA_cached || + a->data_type == BCH_DATA_parity)) { + g->data_type = a->data_type; + g->dirty_sectors = a->dirty_sectors; + g->cached_sectors = a->cached_sectors; + g->stripe = a->stripe; + g->stripe_redundancy = a->stripe_redundancy; + } - 0; - })); -err: - bch2_trans_put(trans); + 0; + }))); bch_err_fn(c, ret); return ret; } static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) { - struct bch_dev *ca; - unsigned i; - - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { struct bucket_array *buckets = gc_bucket_array(ca); struct bucket *g; @@ -1932,10 +1902,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i int bch2_gc_gens(struct bch_fs *c) { - struct btree_trans *trans; - struct bch_dev *ca; u64 b, start_time = local_clock(); - unsigned i; int ret; /* @@ -1948,9 +1915,8 @@ int bch2_gc_gens(struct bch_fs *c) trace_and_count(c, gc_gens_start, c); down_read(&c->gc_lock); - trans = bch2_trans_get(c); - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { struct bucket_gens *gens = bucket_gens(ca); BUG_ON(ca->oldest_gen); @@ -1967,33 +1933,31 @@ int bch2_gc_gens(struct bch_fs *c) ca->oldest_gen[b] = gens->b[b]; } - for (i = 0; i < BTREE_ID_NR; i++) + for (unsigned i = 0; i < BTREE_ID_NR; i++) if (btree_type_has_ptrs(i)) { c->gc_gens_btree = i; c->gc_gens_pos = POS_MIN; - ret = for_each_btree_key_commit(trans, iter, i, - POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, - k, - NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - gc_btree_gens_key(trans, &iter, k)); - if (!bch2_err_matches(ret, EROFS)) - bch_err_fn(c, ret); + ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, i, + POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, + k, + NULL, NULL, + BCH_TRANS_COMMIT_no_enospc, + gc_btree_gens_key(trans, &iter, k))); if (ret) goto err; } - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, - POS_MIN, - BTREE_ITER_PREFETCH, - k, - NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - bch2_alloc_write_oldest_gen(trans, &iter, k)); - if (!bch2_err_matches(ret, EROFS)) - bch_err_fn(c, ret); + ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, + POS_MIN, + BTREE_ITER_PREFETCH, + k, + NULL, NULL, + BCH_TRANS_COMMIT_no_enospc, + bch2_alloc_write_oldest_gen(trans, &iter, k))); if (ret) goto err; @@ -2005,14 +1969,15 @@ int bch2_gc_gens(struct bch_fs *c) bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); trace_and_count(c, gc_gens_end, c); err: - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { kvfree(ca->oldest_gen); ca->oldest_gen = NULL; } - bch2_trans_put(trans); up_read(&c->gc_lock); mutex_unlock(&c->gc_gens_lock); + if (!bch2_err_matches(ret, EROFS)) + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 31b7f0ba7d9e..29a6e2b17e48 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -47,27 +47,23 @@ static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, void bch2_fs_usage_initialize(struct bch_fs *c) { - struct bch_fs_usage *usage; - struct bch_dev *ca; - unsigned i; - percpu_down_write(&c->mark_lock); - usage = c->usage_base; + struct bch_fs_usage *usage = c->usage_base; - for (i = 0; i < ARRAY_SIZE(c->usage); i++) + for (unsigned i = 0; i < ARRAY_SIZE(c->usage); i++) bch2_fs_usage_acc_to_base(c, i); - for (i = 0; i < BCH_REPLICAS_MAX; i++) + for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) usage->reserved += usage->persistent_reserved[i]; - for (i = 0; i < c->replicas.nr; i++) { + for (unsigned i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); } - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { struct bch_dev_usage dev = bch2_dev_usage_read(ca); usage->hidden += (dev.d[BCH_DATA_sb].buckets + @@ -1766,10 +1762,7 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) int bch2_trans_mark_dev_sbs(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { int ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { percpu_ref_put(&ca->ref); diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 295b1f4e9ece..22a52bc8406b 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -866,8 +866,6 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, struct bch_ioctl_disk_get_idx arg) { dev_t dev = huge_decode_dev(arg.dev); - struct bch_dev *ca; - unsigned i; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -875,10 +873,10 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, if (!dev) return -EINVAL; - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) if (ca->dev == dev) { percpu_ref_put(&ca->io_ref); - return i; + return ca->dev_idx; } return -BCH_ERR_ENOENT_dev_idx_not_found; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 943b4e5e4725..b1c89ed1bf46 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1694,11 +1694,9 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) static int bch2_show_devname(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; - struct bch_dev *ca; - unsigned i; bool first = true; - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { if (!first) seq_putc(seq, ':'); first = false; @@ -1822,13 +1820,12 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { struct bch_fs *c; - struct bch_dev *ca; struct super_block *sb; struct inode *vinode; struct bch_opts opts = bch2_opts_empty(); char **devs; struct bch_fs **devs_to_fs = NULL; - unsigned i, nr_devs; + unsigned nr_devs; int ret; opt_set(opts, read_only, (flags & SB_RDONLY) != 0); @@ -1850,7 +1847,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, goto got_sb; } - for (i = 0; i < nr_devs; i++) + for (unsigned i = 0; i < nr_devs; i++) devs_to_fs[i] = bch2_path_to_fs(devs[i]); sb = sget(fs_type, bch2_test_super, bch2_noset_super, @@ -1921,7 +1918,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { struct block_device *bdev = ca->disk_sb.bdev; /* XXX: create an anonymous device for multi device filesystems */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5883fc38b4f5..f9cd24113920 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1029,10 +1029,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) int bch2_fs_journal_alloc(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { if (ca->journal.nr) continue; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 6db821c86551..364aca3b78fe 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1171,8 +1171,6 @@ int bch2_journal_read(struct bch_fs *c, struct journal_list jlist; struct journal_replay *i, **_i, *prev = NULL; struct genradix_iter radix_iter; - struct bch_dev *ca; - unsigned iter; struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; u64 seq; @@ -1183,7 +1181,7 @@ int bch2_journal_read(struct bch_fs *c, jlist.last_seq = 0; jlist.ret = 0; - for_each_member_device(ca, c, iter) { + for_each_member_device(c, ca) { if (!c->opts.fsck && !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal))) continue; @@ -1349,7 +1347,7 @@ int bch2_journal_read(struct bch_fs *c, continue; for (ptr = 0; ptr < i->nr_ptrs; ptr++) { - ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); + struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); if (!i->ptrs[ptr].csum_good) bch_err_dev_offset(ca, i->ptrs[ptr].sector, @@ -1893,12 +1891,11 @@ CLOSURE_CALLBACK(bch2_journal_write) { closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); struct bch_replicas_padded replicas; struct bio *bio; struct printbuf journal_debug_buf = PRINTBUF; - unsigned i, nr_rw_members = 0; + unsigned nr_rw_members = 0; int ret; BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); @@ -1958,7 +1955,7 @@ CLOSURE_CALLBACK(bch2_journal_write) if (c->opts.nochanges) goto no_io; - for_each_rw_member(ca, c, i) + for_each_rw_member(c, ca) nr_rw_members++; if (nr_rw_members > 1) @@ -1975,7 +1972,7 @@ CLOSURE_CALLBACK(bch2_journal_write) goto err; if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { - for_each_rw_member(ca, c, i) { + for_each_rw_member(c, ca) { percpu_ref_get(&ca->io_ref); bio = ca->journal.bio; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 794c10ec01b6..ef8f7e537f9f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -263,12 +263,10 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) void bch2_journal_do_discards(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; - unsigned iter; mutex_lock(&j->discard_lock); - for_each_rw_member(ca, c, iter) { + for_each_rw_member(c, ca) { struct journal_device *ja = &ca->journal; while (should_discard_bucket(j, ja)) { @@ -583,13 +581,11 @@ static size_t journal_flush_pins(struct journal *j, static u64 journal_seq_to_flush(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; u64 seq_to_flush = 0; - unsigned iter; spin_lock(&j->lock); - for_each_rw_member(ca, c, iter) { + for_each_rw_member(c, ca) { struct journal_device *ja = &ca->journal; unsigned nr_buckets, bucket_to_flush; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 8aa212685b3f..69e06a84dad4 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -267,19 +267,16 @@ static int bch2_copygc(struct moving_context *ctxt, */ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) { - struct bch_dev *ca; - unsigned dev_idx; s64 wait = S64_MAX, fragmented_allowed, fragmented; - unsigned i; - for_each_rw_member(ca, c, dev_idx) { + for_each_rw_member(c, ca) { struct bch_dev_usage usage = bch2_dev_usage_read(ca); fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) * ca->mi.bucket_size) >> 1); fragmented = 0; - for (i = 0; i < BCH_DATA_NR; i++) + for (unsigned i = 0; i < BCH_DATA_NR; i++) if (data_type_movable(i)) fragmented += usage.d[i].fragmented; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d54e8dff99d4..0314a4c00aee 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1097,8 +1097,6 @@ int bch2_fs_initialize(struct bch_fs *c) struct bch_inode_unpacked root_inode, lostfound_inode; struct bkey_inode_buf packed_inode; struct qstr lostfound = QSTR("lost+found"); - struct bch_dev *ca; - unsigned i; int ret; bch_notice(c, "initializing new filesystem"); @@ -1120,10 +1118,10 @@ int bch2_fs_initialize(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); set_bit(BCH_FS_fsck_done, &c->flags); - for (i = 0; i < BTREE_ID_NR; i++) + for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); - for_each_member_device(ca, c, i) + for_each_member_device(c, ca) bch2_dev_usage_init(ca); ret = bch2_fs_journal_alloc(c); @@ -1151,7 +1149,7 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) ca->new_fs_bucket_idx = 0; ret = bch2_fs_freespace_init(c); @@ -1214,6 +1212,6 @@ int bch2_fs_initialize(struct bch_fs *c) return 0; err: - bch_err_fn(ca, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index c100f2ec2b96..9632f36f5f31 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -191,13 +191,10 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, struct jset_entry **end, u64 journal_seq) { - struct bch_dev *ca; - unsigned i, dev; - percpu_down_read(&c->mark_lock); if (!journal_seq) { - for (i = 0; i < ARRAY_SIZE(c->usage); i++) + for (unsigned i = 0; i < ARRAY_SIZE(c->usage); i++) bch2_fs_usage_acc_to_base(c, i); } else { bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK); @@ -223,7 +220,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->v = cpu_to_le64(atomic64_read(&c->key_version)); } - for (i = 0; i < BCH_REPLICAS_MAX; i++) { + for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) { struct jset_entry_usage *u = container_of(jset_entry_init(end, sizeof(*u)), struct jset_entry_usage, entry); @@ -234,7 +231,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]); } - for (i = 0; i < c->replicas.nr; i++) { + for (unsigned i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry_v1 *e = cpu_replicas_entry(&c->replicas, i); struct jset_entry_data_usage *u = @@ -247,7 +244,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, "embedded variable length struct"); } - for_each_member_device(ca, c, dev) { + for_each_member_device(c, ca) { unsigned b = sizeof(struct jset_entry_dev_usage) + sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR; struct jset_entry_dev_usage *u = @@ -255,9 +252,9 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, struct jset_entry_dev_usage, entry); u->entry.type = BCH_JSET_ENTRY_dev_usage; - u->dev = cpu_to_le32(dev); + u->dev = cpu_to_le32(ca->dev_idx); - for (i = 0; i < BCH_DATA_NR; i++) { + for (unsigned i = 0; i < BCH_DATA_NR; i++) { u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); u->d[i].sectors = cpu_to_le64(ca->usage_base->d[i].sectors); u->d[i].fragmented = cpu_to_le64(ca->usage_base->d[i].fragmented); @@ -266,7 +263,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, percpu_up_read(&c->mark_lock); - for (i = 0; i < 2; i++) { + for (unsigned i = 0; i < 2; i++) { struct jset_entry_clock *clock = container_of(jset_entry_init(end, sizeof(*clock)), struct jset_entry_clock, entry); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index a7e863bc77a9..e5ca5b30b36b 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -97,12 +97,15 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter, #define for_each_member_device_rcu(ca, c, iter, mask) \ for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++) -static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter) +static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca) { - struct bch_dev *ca; + unsigned idx = ca ? ca->dev_idx + 1 : 0; + + if (ca) + percpu_ref_put(&ca->ref); rcu_read_lock(); - if ((ca = __bch2_next_dev(c, iter, NULL))) + if ((ca = __bch2_next_dev(c, &idx, NULL))) percpu_ref_get(&ca->ref); rcu_read_unlock(); @@ -112,41 +115,44 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter /* * If you break early, you must drop your ref on the current device */ -#define for_each_member_device(ca, c, iter) \ - for ((iter) = 0; \ - (ca = bch2_get_next_dev(c, &(iter))); \ - percpu_ref_put(&ca->ref), (iter)++) +#define __for_each_member_device(_c, _ca) \ + for (; (_ca = bch2_get_next_dev(_c, _ca));) + +#define for_each_member_device(_c, _ca) \ + for (struct bch_dev *_ca = NULL; \ + (_ca = bch2_get_next_dev(_c, _ca));) static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, - unsigned *iter, - int state_mask) + struct bch_dev *ca, + unsigned state_mask) { - struct bch_dev *ca; + unsigned idx = ca ? ca->dev_idx + 1 : 0; + + if (ca) + percpu_ref_put(&ca->io_ref); rcu_read_lock(); - while ((ca = __bch2_next_dev(c, iter, NULL)) && + while ((ca = __bch2_next_dev(c, &idx, NULL)) && (!((1 << ca->mi.state) & state_mask) || !percpu_ref_tryget(&ca->io_ref))) - (*iter)++; + idx++; rcu_read_unlock(); return ca; } -#define __for_each_online_member(ca, c, iter, state_mask) \ - for ((iter) = 0; \ - (ca = bch2_get_next_online_dev(c, &(iter), state_mask)); \ - percpu_ref_put(&ca->io_ref), (iter)++) +#define __for_each_online_member(_c, _ca, state_mask) \ + for (struct bch_dev *_ca = NULL; \ + (_ca = bch2_get_next_online_dev(_c, _ca, state_mask));) -#define for_each_online_member(ca, c, iter) \ - __for_each_online_member(ca, c, iter, ~0) +#define for_each_online_member(c, ca) \ + __for_each_online_member(c, ca, ~0) -#define for_each_rw_member(ca, c, iter) \ - __for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_rw) +#define for_each_rw_member(c, ca) \ + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw)) -#define for_each_readable_member(ca, c, iter) \ - __for_each_online_member(ca, c, iter, \ - (1 << BCH_MEMBER_STATE_rw)|(1 << BCH_MEMBER_STATE_ro)) +#define for_each_readable_member(c, ca) \ + __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro)) /* * If a key exists that references a device, the device won't be going away and @@ -172,11 +178,9 @@ static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx) static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) { struct bch_devs_mask devs; - struct bch_dev *ca; - unsigned i; memset(&devs, 0, sizeof(devs)); - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) __set_bit(ca->dev_idx, devs.d); return devs; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index b610dfb0287a..270a96b5a6fd 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -241,14 +241,12 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, if (sb->fs_sb) { struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb); - struct bch_dev *ca; - unsigned i; lockdep_assert_held(&c->sb_lock); /* XXX: we're not checking that offline device have enough space */ - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { struct bch_sb_handle *dev_sb = &ca->disk_sb; if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { @@ -514,8 +512,6 @@ static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned static void bch2_sb_update(struct bch_fs *c) { struct bch_sb *src = c->disk_sb.sb; - struct bch_dev *ca; - unsigned i; lockdep_assert_held(&c->sb_lock); @@ -546,7 +542,7 @@ static void bch2_sb_update(struct bch_fs *c) le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); ca->mi = bch2_mi_to_cpu(&m); } @@ -926,9 +922,8 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; - struct bch_dev *ca; struct printbuf err = PRINTBUF; - unsigned i, sb = 0, nr_wrote; + unsigned sb = 0, nr_wrote; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; @@ -963,10 +958,10 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_errors_from_cpu(c); bch2_sb_downgrade_update(c); - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) bch2_sb_from_fs(c, ca); - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { printbuf_reset(&err); ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE); @@ -999,16 +994,16 @@ int bch2_write_super(struct bch_fs *c) return -BCH_ERR_sb_not_downgraded; } - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { __set_bit(ca->dev_idx, sb_written.d); ca->sb_write_error = 0; } - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) read_back_super(c, ca); closure_sync(cl); - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { if (ca->sb_write_error) continue; @@ -1035,7 +1030,7 @@ int bch2_write_super(struct bch_fs *c) do { wrote = false; - for_each_online_member(ca, c, i) + for_each_online_member(c, ca) if (!ca->sb_write_error && sb < ca->disk_sb.sb->layout.nr_superblocks) { write_one_super(c, ca, sb); @@ -1045,7 +1040,7 @@ int bch2_write_super(struct bch_fs *c) sb++; } while (wrote); - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { if (ca->sb_write_error) __clear_bit(ca->dev_idx, sb_written.d); else @@ -1057,7 +1052,7 @@ int bch2_write_super(struct bch_fs *c) can_mount_with_written = bch2_have_enough_devs(c, sb_written, degraded_flags, false); - for (i = 0; i < ARRAY_SIZE(sb_written.d); i++) + for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++) sb_written.d[i] = ~sb_written.d[i]; can_mount_without_written = diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 45355b33933e..e3c71091cc46 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -249,8 +249,7 @@ static void bch2_dev_usage_journal_reserve(struct bch_fs *c) static void __bch2_fs_read_only(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i, clean_passes = 0; + unsigned clean_passes = 0; u64 seq = 0; bch2_fs_ec_stop(c); @@ -286,7 +285,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) /* * After stopping journal: */ - for_each_member_device(ca, c, i) + for_each_member_device(c, ca) bch2_dev_allocator_remove(c, ca); } @@ -427,8 +426,6 @@ static int bch2_fs_read_write_late(struct bch_fs *c) static int __bch2_fs_read_write(struct bch_fs *c, bool early) { - struct bch_dev *ca; - unsigned i; int ret; if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { @@ -469,7 +466,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) */ set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags); - for_each_rw_member(ca, c, i) + for_each_rw_member(c, ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); @@ -479,7 +476,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) #ifndef BCH_WRITE_REF_DEBUG percpu_ref_reinit(&c->writes); #else - for (i = 0; i < BCH_WRITE_REF_NR; i++) { + for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) { BUG_ON(atomic_long_read(&c->writes[i])); atomic_long_inc(&c->writes[i]); } @@ -602,9 +599,6 @@ static void bch2_fs_release(struct kobject *kobj) void __bch2_fs_stop(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - bch_verbose(c, "shutting down"); set_bit(BCH_FS_stopping, &c->flags); @@ -615,7 +609,7 @@ void __bch2_fs_stop(struct bch_fs *c) bch2_fs_read_only(c); up_write(&c->state_lock); - for_each_member_device(ca, c, i) + for_each_member_device(c, ca) if (ca->kobj.state_in_sysfs && ca->disk_sb.bdev) sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); @@ -637,7 +631,7 @@ void __bch2_fs_stop(struct bch_fs *c) /* btree prefetch might have kicked off reads in the background: */ bch2_btree_flush_all_reads(c); - for_each_member_device(ca, c, i) + for_each_member_device(c, ca) cancel_work_sync(&ca->io_error_work); cancel_work_sync(&c->read_only_work); @@ -676,8 +670,6 @@ void bch2_fs_stop(struct bch_fs *c) static int bch2_fs_online(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; int ret = 0; lockdep_assert_held(&bch_fs_list_lock); @@ -710,7 +702,7 @@ static int bch2_fs_online(struct bch_fs *c) down_write(&c->state_lock); - for_each_member_device(ca, c, i) { + for_each_member_device(c, ca) { ret = bch2_dev_sysfs_online(c, ca); if (ret) { bch_err(c, "error creating sysfs objects"); @@ -1000,9 +992,7 @@ static void print_mount_opts(struct bch_fs *c) int bch2_fs_start(struct bch_fs *c) { - struct bch_dev *ca; time64_t now = ktime_get_real_seconds(); - unsigned i; int ret; print_mount_opts(c); @@ -1019,12 +1009,12 @@ int bch2_fs_start(struct bch_fs *c) goto err; } - for_each_online_member(ca, c, i) - bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now); + for_each_online_member(c, ca) + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); mutex_unlock(&c->sb_lock); - for_each_rw_member(ca, c, i) + for_each_rw_member(c, ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); @@ -1362,8 +1352,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { struct bch_devs_mask new_online_devs; - struct bch_dev *ca2; - int i, nr_rw = 0, required; + int nr_rw = 0, required; lockdep_assert_held(&c->state_lock); @@ -1375,7 +1364,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, return true; /* do we have enough devices to write to? */ - for_each_member_device(ca2, c, i) + for_each_member_device(c, ca2) if (ca2 != ca) nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw; From 41b84fb489f7707af4490d1e5e22d88f3207da71 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 02:34:05 -0500 Subject: [PATCH 138/226] bcachefs: for_each_member_device_rcu() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 4 +--- fs/bcachefs/alloc_foreground.c | 5 +---- fs/bcachefs/buckets.c | 5 ++--- fs/bcachefs/disk_groups.c | 11 +++-------- fs/bcachefs/ec.c | 15 ++++++--------- fs/bcachefs/journal.c | 12 ++++-------- fs/bcachefs/journal_reclaim.c | 14 +++++--------- fs/bcachefs/sb-members.c | 8 +++----- fs/bcachefs/sb-members.h | 35 ++++++++++++++++++---------------- fs/bcachefs/super.c | 25 +++++++++--------------- 10 files changed, 53 insertions(+), 81 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 24a46e02f344..13beaddac4c6 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -891,7 +891,6 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos static bool next_bucket(struct bch_fs *c, struct bpos *bucket) { struct bch_dev *ca; - unsigned iter; if (bch2_dev_bucket_exists(c, *bucket)) return true; @@ -909,8 +908,7 @@ static bool next_bucket(struct bch_fs *c, struct bpos *bucket) } rcu_read_lock(); - iter = bucket->inode; - ca = __bch2_next_dev(c, &iter, NULL); + ca = __bch2_next_dev_idx(c, bucket->inode, NULL); if (ca) *bucket = POS(ca->dev_idx, ca->mi.first_bucket); rcu_read_unlock(); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 990ea7f9bc1b..8525d2b84c33 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -69,11 +69,8 @@ const char * const bch2_watermarks[] = { void bch2_reset_alloc_cursors(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i; - rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, NULL) + for_each_member_device_rcu(c, ca, NULL) ca->alloc_cursor = 0; rcu_read_unlock(); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 29a6e2b17e48..c0dac04253f7 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -154,8 +154,7 @@ struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) { - struct bch_dev *ca; - unsigned i, u64s = fs_usage_u64s(c); + unsigned u64s = fs_usage_u64s(c); BUG_ON(idx >= ARRAY_SIZE(c->usage)); @@ -167,7 +166,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) percpu_memset(c->usage[idx], 0, u64s * sizeof(u64)); rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, NULL) { + for_each_member_device_rcu(c, ca, NULL) { u64s = dev_usage_u64s(); acc_u64s_percpu((u64 *) ca->usage_base, diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 4d0cb0ccff32..1cd6ba8d0cce 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -89,19 +89,14 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) { - struct bch_disk_groups_cpu *g; - struct bch_dev *ca; - int i; - unsigned iter; - out->atomic++; rcu_read_lock(); - g = rcu_dereference(c->disk_groups); + struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); if (!g) goto out; - for (i = 0; i < g->nr; i++) { + for (unsigned i = 0; i < g->nr; i++) { if (i) prt_printf(out, " "); @@ -111,7 +106,7 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) } prt_printf(out, "[parent %d devs", g->entries[i].parent); - for_each_member_device_rcu(ca, c, iter, &g->entries[i].devs) + for_each_member_device_rcu(c, ca, &g->entries[i].devs) prt_printf(out, " %s", ca->name); prt_printf(out, "]"); } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index fc1563f6273f..0e5c455557a9 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1243,18 +1243,17 @@ static int unsigned_cmp(const void *_l, const void *_r) static unsigned pick_blocksize(struct bch_fs *c, struct bch_devs_mask *devs) { - struct bch_dev *ca; - unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX]; + unsigned nr = 0, sizes[BCH_SB_MEMBERS_MAX]; struct { unsigned nr, size; } cur = { 0, 0 }, best = { 0, 0 }; - for_each_member_device_rcu(ca, c, i, devs) + for_each_member_device_rcu(c, ca, devs) sizes[nr++] = ca->mi.bucket_size; sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL); - for (i = 0; i < nr; i++) { + for (unsigned i = 0; i < nr; i++) { if (sizes[i] != cur.size) { if (cur.nr > best.nr) best = cur; @@ -1337,8 +1336,6 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, enum bch_watermark watermark) { struct ec_stripe_head *h; - struct bch_dev *ca; - unsigned i; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -1355,13 +1352,13 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, rcu_read_lock(); h->devs = target_rw_devs(c, BCH_DATA_user, target); - for_each_member_device_rcu(ca, c, i, &h->devs) + for_each_member_device_rcu(c, ca, &h->devs) if (!ca->mi.durability) - __clear_bit(i, h->devs.d); + __clear_bit(ca->dev_idx, h->devs.d); h->blocksize = pick_blocksize(c, &h->devs); - for_each_member_device_rcu(ca, c, i, &h->devs) + for_each_member_device_rcu(c, ca, &h->devs) if (ca->mi.bucket_size == h->blocksize) h->nr_active_devs++; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index f9cd24113920..8538ef34f62b 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1294,11 +1294,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); union journal_res_state s; - struct bch_dev *ca; unsigned long now = jiffies; u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; - u64 seq; - unsigned i; if (!out->nr_tabstops) printbuf_tabstop_push(out, 24); @@ -1343,10 +1340,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_newline(out); - for (seq = journal_cur_seq(j); + for (u64 seq = journal_cur_seq(j); seq >= journal_last_unwritten_seq(j); --seq) { - i = seq & JOURNAL_BUF_MASK; + unsigned i = seq & JOURNAL_BUF_MASK; prt_printf(out, "unwritten entry:"); prt_tab(out); @@ -1390,8 +1387,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) j->space[journal_space_total].next_entry, j->space[journal_space_total].total); - for_each_member_device_rcu(ca, c, i, - &c->rw_devs[BCH_DATA_journal]) { + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { struct journal_device *ja = &ca->journal; if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d)) @@ -1400,7 +1396,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) if (!ja->nr) continue; - prt_printf(out, "dev %u:\n", i); + prt_printf(out, "dev %u:\n", ca->dev_idx); prt_printf(out, "\tnr\t\t%u\n", ja->nr); prt_printf(out, "\tbucket size\t%u\n", ca->mi.bucket_size); prt_printf(out, "\tavailable\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ef8f7e537f9f..820d25e19e5f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -136,15 +136,13 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne enum journal_space_from from) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; - unsigned i, pos, nr_devs = 0; + unsigned pos, nr_devs = 0; struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, - &c->rw_devs[BCH_DATA_journal]) { + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { if (!ca->journal.nr) continue; @@ -173,19 +171,17 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne void bch2_journal_space_available(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; unsigned clean, clean_ondisk, total; unsigned max_entry_size = min(j->buf[0].buf_size >> 9, j->buf[1].buf_size >> 9); - unsigned i, nr_online = 0, nr_devs_want; + unsigned nr_online = 0, nr_devs_want; bool can_discard = false; int ret = 0; lockdep_assert_held(&j->lock); rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, - &c->rw_devs[BCH_DATA_journal]) { + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { struct journal_device *ja = &ca->journal; if (!ja->nr) @@ -216,7 +212,7 @@ void bch2_journal_space_available(struct journal *j) nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas); - for (i = 0; i < journal_space_nr; i++) + for (unsigned i = 0; i < journal_space_nr; i++) j->space[i] = __journal_space_available(j, nr_devs_want, i); clean_ondisk = j->space[journal_space_clean_ondisk].total; diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 259af07f4624..7c5db669a467 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -358,14 +358,12 @@ const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { void bch2_sb_members_from_cpu(struct bch_fs *c) { struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); - struct bch_dev *ca; - unsigned i, e; rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, NULL) { - struct bch_member *m = __bch2_members_v2_get_mut(mi, i); + for_each_member_device_rcu(c, ca, NULL) { + struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx); - for (e = 0; e < BCH_MEMBER_ERROR_NR; e++) + for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); } rcu_read_unlock(); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index e5ca5b30b36b..be0a94183271 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -79,33 +79,38 @@ static inline struct bch_devs_list bch2_dev_list_single(unsigned dev) return (struct bch_devs_list) { .nr = 1, .data[0] = dev }; } -static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter, - const struct bch_devs_mask *mask) +static inline struct bch_dev *__bch2_next_dev_idx(struct bch_fs *c, unsigned idx, + const struct bch_devs_mask *mask) { struct bch_dev *ca = NULL; - while ((*iter = mask - ? find_next_bit(mask->d, c->sb.nr_devices, *iter) - : *iter) < c->sb.nr_devices && - !(ca = rcu_dereference_check(c->devs[*iter], + while ((idx = mask + ? find_next_bit(mask->d, c->sb.nr_devices, idx) + : idx) < c->sb.nr_devices && + !(ca = rcu_dereference_check(c->devs[idx], lockdep_is_held(&c->state_lock)))) - (*iter)++; + idx++; return ca; } -#define for_each_member_device_rcu(ca, c, iter, mask) \ - for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++) +static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *ca, + const struct bch_devs_mask *mask) +{ + return __bch2_next_dev_idx(c, ca ? ca->dev_idx + 1 : 0, mask); +} + +#define for_each_member_device_rcu(_c, _ca, _mask) \ + for (struct bch_dev *_ca = NULL; \ + (_ca = __bch2_next_dev((_c), _ca, (_mask)));) static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca) { - unsigned idx = ca ? ca->dev_idx + 1 : 0; - if (ca) percpu_ref_put(&ca->ref); rcu_read_lock(); - if ((ca = __bch2_next_dev(c, &idx, NULL))) + if ((ca = __bch2_next_dev(c, ca, NULL))) percpu_ref_get(&ca->ref); rcu_read_unlock(); @@ -126,16 +131,14 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, unsigned state_mask) { - unsigned idx = ca ? ca->dev_idx + 1 : 0; - if (ca) percpu_ref_put(&ca->io_ref); rcu_read_lock(); - while ((ca = __bch2_next_dev(c, &idx, NULL)) && + while ((ca = __bch2_next_dev(c, ca, NULL)) && (!((1 << ca->mi.state) & state_mask) || !percpu_ref_tryget(&ca->io_ref))) - idx++; + ; rcu_read_unlock(); return ca; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e3c71091cc46..592005b8e448 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -167,14 +167,12 @@ static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); struct bch_fs *bch2_dev_to_fs(dev_t dev) { struct bch_fs *c; - struct bch_dev *ca; - unsigned i; mutex_lock(&bch_fs_list_lock); rcu_read_lock(); list_for_each_entry(c, &bch_fs_list, list) - for_each_member_device_rcu(ca, c, i, NULL) + for_each_member_device_rcu(c, ca, NULL) if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) { closure_get(&c->cl); goto found; @@ -215,14 +213,13 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) static void bch2_dev_usage_journal_reserve(struct bch_fs *c) { - struct bch_dev *ca; - unsigned i, nr = 0, u64s = + unsigned nr = 0, u64s = ((sizeof(struct jset_entry_dev_usage) + sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR)) / sizeof(u64); rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, NULL) + for_each_member_device_rcu(c, ca, NULL) nr++; rcu_read_unlock(); @@ -1906,18 +1903,14 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) { - struct bch_dev *ca; - unsigned i; - rcu_read_lock(); - for_each_member_device_rcu(ca, c, i, NULL) - if (!strcmp(name, ca->name)) - goto found; - ca = ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); -found: + for_each_member_device_rcu(c, ca, NULL) + if (!strcmp(name, ca->name)) { + rcu_read_unlock(); + return ca; + } rcu_read_unlock(); - - return ca; + return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); } /* Filesystem open: */ From cea07a7b6ac2cd9f40e20a5010203014c4996eb2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 02:19:23 -0500 Subject: [PATCH 139/226] bcachefs: vstruct_for_each() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.h | 2 -- fs/bcachefs/journal_io.c | 3 +-- fs/bcachefs/recovery.c | 9 +++------ fs/bcachefs/super-io.c | 4 ---- fs/bcachefs/vstructs.h | 10 +++++----- 5 files changed, 9 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 1e14e6b324f8..4544ce24bb8a 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -238,8 +238,6 @@ bch2_journal_add_entry(struct journal *j, struct journal_res *res, static inline bool journal_entry_empty(struct jset *j) { - struct jset_entry *i; - if (j->seq != j->last_seq) return false; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 364aca3b78fe..671a6820bc0c 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -783,7 +783,6 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, static int jset_validate_entries(struct bch_fs *c, struct jset *jset, enum bkey_invalid_flags flags) { - struct jset_entry *entry; unsigned version = le32_to_cpu(jset->version); int ret = 0; @@ -1723,7 +1722,7 @@ static CLOSURE_CALLBACK(do_journal_write) static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct jset_entry *start, *end, *i; + struct jset_entry *start, *end; struct jset *jset = w->data; struct journal_keys_to_wb wb = { NULL }; unsigned sectors, bytes, u64s; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0314a4c00aee..a792f42ab5ff 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -344,14 +344,11 @@ static int journal_replay_entry_early(struct bch_fs *c, static int journal_replay_early(struct bch_fs *c, struct bch_sb_field_clean *clean) { - struct jset_entry *entry; - int ret; - if (clean) { - for (entry = clean->start; + for (struct jset_entry *entry = clean->start; entry != vstruct_end(&clean->field); entry = vstruct_next(entry)) { - ret = journal_replay_entry_early(c, entry); + int ret = journal_replay_entry_early(c, entry); if (ret) return ret; } @@ -366,7 +363,7 @@ static int journal_replay_early(struct bch_fs *c, continue; vstruct_for_each(&i->j, entry) { - ret = journal_replay_entry_early(c, entry); + int ret = journal_replay_entry_early(c, entry); if (ret) return ret; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 270a96b5a6fd..89d2d2a86eb0 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -101,8 +101,6 @@ static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb, enum bch_sb_field_type type) { - struct bch_sb_field *f; - /* XXX: need locking around superblock to access optional fields */ vstruct_for_each(sb, f) @@ -366,7 +364,6 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, int rw) { struct bch_sb *sb = disk_sb->sb; - struct bch_sb_field *f; struct bch_sb_field_members_v1 *mi; enum bch_opt_id opt_id; u16 block_size; @@ -1250,7 +1247,6 @@ void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, bool print_layout, unsigned fields) { - struct bch_sb_field *f; u64 fields_have = 0; unsigned nr_devices = 0; diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h index a6561b4b36a6..2ad338e282da 100644 --- a/fs/bcachefs/vstructs.h +++ b/fs/bcachefs/vstructs.h @@ -48,14 +48,14 @@ ((void *) ((u64 *) (_s)->_data + __vstruct_u64s(_s))) #define vstruct_for_each(_s, _i) \ - for (_i = (_s)->start; \ + for (typeof(&(_s)->start[0]) _i = (_s)->start; \ _i < vstruct_last(_s); \ _i = vstruct_next(_i)) -#define vstruct_for_each_safe(_s, _i, _t) \ - for (_i = (_s)->start; \ - _i < vstruct_last(_s) && (_t = vstruct_next(_i), true); \ - _i = _t) +#define vstruct_for_each_safe(_s, _i) \ + for (typeof(&(_s)->start[0]) _next, _i = (_s)->start; \ + _i < vstruct_last(_s) && (_next = vstruct_next(_i), true); \ + _i = _next) #define vstruct_idx(_s, _idx) \ ((typeof(&(_s)->start[0])) ((_s)->_data + (_idx))) From 4eb3877eaeba4feb81b260ff48a2063ca296ac74 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 03:07:26 -0500 Subject: [PATCH 140/226] bcachefs: fsck -> bch2_trans_run() Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 77 ++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index a66b0a8203ac..e778978d44fa 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -973,21 +973,19 @@ static int check_inode(struct btree_trans *trans, int bch2_check_inodes(struct bch_fs *c) { bool full = c->opts.fsck; - struct btree_trans *trans = bch2_trans_get(c); struct bch_inode_unpacked prev = { 0 }; struct snapshots_seen s; - int ret; snapshots_seen_init(&s); - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, - POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_inode(trans, &iter, k, &prev, &s, full)); + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_inode(trans, &iter, k, &prev, &s, full))); snapshots_seen_exit(&s); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } @@ -1417,30 +1415,28 @@ int bch2_check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct snapshots_seen s; - struct btree_trans *trans = bch2_trans_get(c); struct extent_ends extent_ends; struct disk_reservation res = { 0 }; - int ret = 0; snapshots_seen_init(&s); extent_ends_init(&extent_ends); - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_extents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - &res, NULL, - BCH_TRANS_COMMIT_no_enospc, ({ - bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: - check_extent_overbig(trans, &iter, k); - })) ?: - check_i_sectors(trans, &w); + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_extents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + &res, NULL, + BCH_TRANS_COMMIT_no_enospc, ({ + bch2_disk_reservation_put(c, &res); + check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: + check_extent_overbig(trans, &iter, k); + })) ?: + check_i_sectors(trans, &w)); bch2_disk_reservation_put(c, &res); extent_ends_exit(&extent_ends); inode_walker_exit(&w); snapshots_seen_exit(&s); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; @@ -1448,22 +1444,19 @@ int bch2_check_extents(struct bch_fs *c) int bch2_check_indirect_extents(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct disk_reservation res = { 0 }; - int ret = 0; - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, - POS_MIN, - BTREE_ITER_PREFETCH, k, - &res, NULL, - BCH_TRANS_COMMIT_no_enospc, ({ - bch2_disk_reservation_put(c, &res); - check_extent_overbig(trans, &iter, k); - })); + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + POS_MIN, + BTREE_ITER_PREFETCH, k, + &res, NULL, + BCH_TRANS_COMMIT_no_enospc, ({ + bch2_disk_reservation_put(c, &res); + check_extent_overbig(trans, &iter, k); + }))); bch2_disk_reservation_put(c, &res); - bch2_trans_put(trans); - bch_err_fn(c, ret); return ret; } @@ -1816,20 +1809,18 @@ int bch2_check_dirents(struct bch_fs *c) struct inode_walker target = inode_walker_init(); struct snapshots_seen s; struct bch_hash_info hash_info; - struct btree_trans *trans = bch2_trans_get(c); - int ret = 0; snapshots_seen_init(&s); - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, - k, - NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)); + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, + k, + NULL, NULL, + BCH_TRANS_COMMIT_no_enospc, + check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s))); - bch2_trans_put(trans); snapshots_seen_exit(&s); inode_walker_exit(&dir); inode_walker_exit(&target); From 0bc64d7e2649f735eaa4ba7d422da446505ef89a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 03:39:03 -0500 Subject: [PATCH 141/226] bcachefs: kill __bch2_btree_iter_peek_upto_and_restart() dead code Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 2 +- fs/bcachefs/btree_iter.h | 50 +++++++++++++--------------------------- 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 72da7bedbe50..79e6ec7b9a52 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2285,7 +2285,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) goto got_key; } - if (bch2_snapshot_is_ancestor(iter->trans->c, + if (bch2_snapshot_is_ancestor(trans->c, iter->snapshot, k.k->p.snapshot)) { if (saved_path) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 4a16214b1e6b..29f02335642e 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -623,38 +623,6 @@ static inline int btree_trans_too_many_iters(struct btree_trans *trans) return 0; } -struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); - -static inline struct bkey_s_c -__bch2_btree_iter_peek_and_restart(struct btree_trans *trans, - struct btree_iter *iter, unsigned flags) -{ - struct bkey_s_c k; - - while (btree_trans_too_many_iters(trans) || - (k = bch2_btree_iter_peek_type(iter, flags), - bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) - bch2_trans_begin(trans); - - return k; -} - -static inline struct bkey_s_c -__bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos end, - unsigned flags) -{ - struct bkey_s_c k; - - while (btree_trans_too_many_iters(trans) || - (k = bch2_btree_iter_peek_upto_type(iter, end, flags), - bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) - bch2_trans_begin(trans); - - return k; -} - /* * goto instead of loop, so that when used inside for_each_btree_key2() * break/continue work correctly @@ -780,6 +748,22 @@ transaction_restart: \ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) +struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); + +static inline struct bkey_s_c +__bch2_btree_iter_peek_and_restart(struct btree_trans *trans, + struct btree_iter *iter, unsigned flags) +{ + struct bkey_s_c k; + + while (btree_trans_too_many_iters(trans) || + (k = bch2_btree_iter_peek_type(iter, flags), + bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) + bch2_trans_begin(trans); + + return k; +} + #define for_each_btree_key_old(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ @@ -841,8 +825,6 @@ transaction_restart: \ _p; \ }) -/* new multiple iterator interface: */ - void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); From 0beebd92457c3221b59840487591c1dff8071168 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Dec 2023 15:47:15 -0500 Subject: [PATCH 142/226] bcachefs: bkey_for_each_ptr() now declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 1 - fs/bcachefs/btree_io.c | 2 -- fs/bcachefs/data_update.c | 4 ---- fs/bcachefs/ec.c | 1 - fs/bcachefs/extents.c | 4 ---- fs/bcachefs/extents.h | 6 +----- fs/bcachefs/io_misc.c | 1 - fs/bcachefs/io_write.c | 20 +++++--------------- fs/bcachefs/journal_io.c | 1 - fs/bcachefs/move.c | 5 +---- 10 files changed, 7 insertions(+), 38 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ea1d0ed70f50..9f27cb3ea563 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1844,7 +1844,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; struct bkey_i *u; int ret; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d7cac6a80507..38d27cae49ea 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -934,7 +934,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct sort_iter *iter; struct btree_node *sorted; struct bkey_packed *k; - struct bch_extent_ptr *ptr; struct bset *i; bool used_mempool, blacklisted; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && @@ -1896,7 +1895,6 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, static void btree_write_submit(struct work_struct *work) { struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work); - struct bch_extent_ptr *ptr; BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; bkey_copy(&tmp.k, &wbio->key); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1e6d162753fb..6f13477ff652 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -356,7 +356,6 @@ void bch2_data_update_exit(struct data_update *update) struct bch_fs *c = update->op.c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(ptrs, ptr) { if (c->opts.nocow_enabled) @@ -377,7 +376,6 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, struct bio *bio = &update->op.wbio.bio; struct bkey_i_extent *e; struct write_point *wp; - struct bch_extent_ptr *ptr; struct closure cl; struct btree_iter iter; struct bkey_s_c k; @@ -509,7 +507,6 @@ int bch2_data_update_init(struct btree_trans *trans, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - const struct bch_extent_ptr *ptr; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; unsigned ptrs_locked = 0; int ret = 0; @@ -655,7 +652,6 @@ int bch2_data_update_init(struct btree_trans *trans, void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; unsigned i = 0; bkey_for_each_ptr(ptrs, ptr) { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 0e5c455557a9..e89185a28e08 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -161,7 +161,6 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, struct bkey_s_c k, unsigned *block) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; unsigned i, nr_data = s->nr_blocks - s->nr_redundant; bkey_for_each_ptr(ptrs, ptr) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 9d8afcb5979a..82ec056f4cdb 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -843,7 +843,6 @@ void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(ptrs, ptr) if (ptr->dev == dev) @@ -855,7 +854,6 @@ const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(ptrs, ptr) if (bch2_dev_in_target(c, ptr->dev, target) && @@ -1065,7 +1063,6 @@ static int extent_ptr_invalid(struct bch_fs *c, struct printbuf *err) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr2; u64 bucket; u32 bucket_offset; struct bch_dev *ca; @@ -1307,7 +1304,6 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, } incompressible: if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { - const struct bch_extent_ptr *ptr; unsigned i = 0; bkey_for_each_ptr(ptrs, ptr) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 4295e93984c4..77ae4476578b 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -300,7 +300,7 @@ static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k) bkey_extent_entry_for_each_from(_p, _entry, _p.start) #define __bkey_for_each_ptr(_start, _end, _ptr) \ - for ((_ptr) = (_start); \ + for (typeof(_start) (_ptr) = (_start); \ ((_ptr) = __bkey_ptr_next(_ptr, _end)); \ (_ptr)++) @@ -547,7 +547,6 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k) static inline bool bkey_extent_is_unwritten(struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(ptrs, ptr) if (ptr->unwritten) @@ -565,7 +564,6 @@ static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(p, ptr) ret.data[ret.nr++] = ptr->dev; @@ -577,7 +575,6 @@ static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(p, ptr) if (!ptr->cached) @@ -590,7 +587,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(p, ptr) if (ptr->cached) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 92e7f765f9cf..ca6d5f516aa2 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -74,7 +74,6 @@ int bch2_extent_fallocate(struct btree_trans *trans, struct bkey_i_extent *e; struct bch_devs_list devs_have; struct write_point *wp; - struct bch_extent_ptr *ptr; devs_have.nr = 0; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 0af39b562666..628b3e966c1d 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -396,16 +396,14 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, bool nocow) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); - const struct bch_extent_ptr *ptr; struct bch_write_bio *n; - struct bch_dev *ca; BUG_ON(c->opts.nochanges); bkey_for_each_ptr(ptrs, ptr) { BUG_ON(!bch2_dev_exists2(c, ptr->dev)); - ca = bch_dev_bkey_exists(c, ptr->dev); + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); if (to_entry(ptr + 1) < ptrs.end) { n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, @@ -1108,7 +1106,6 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, static inline void bch2_nocow_write_unlock(struct bch_write_op *op) { struct bch_fs *c = op->c; - const struct bch_extent_ptr *ptr; struct bkey_i *k; for_each_keylist_key(&op->insert_keys, k) { @@ -1127,25 +1124,20 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, struct bkey_s_c k, u64 new_i_size) { - struct bkey_i *new; - struct bkey_ptrs ptrs; - struct bch_extent_ptr *ptr; - int ret; - if (!bch2_extents_match(bkey_i_to_s_c(orig), k)) { /* trace this */ return 0; } - new = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(new); + struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); + int ret = PTR_ERR_OR_ZERO(new); if (ret) return ret; bch2_cut_front(bkey_start_pos(&orig->k), new); bch2_cut_back(orig->k.p, new); - ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); bkey_for_each_ptr(ptrs, ptr) ptr->unwritten = 0; @@ -1225,8 +1217,6 @@ static void bch2_nocow_write(struct bch_write_op *op) struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_ptrs_c ptrs; - const struct bch_extent_ptr *ptr; DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; u32 snapshot; struct bucket_to_lock *stale_at; @@ -1269,7 +1259,7 @@ static void bch2_nocow_write(struct bch_write_op *op) break; /* Get iorefs before dropping btree locks: */ - ptrs = bch2_bkey_ptrs_c(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) { struct bpos b = PTR_BUCKET_POS(c, ptr); struct nocow_lock_bucket *l = diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 671a6820bc0c..c5bc58247146 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1678,7 +1678,6 @@ static CLOSURE_CALLBACK(do_journal_write) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); - struct bch_extent_ptr *ptr; struct bio *bio; unsigned sectors = vstruct_sectors(w->data, c->block_bits); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 6cb38484fa77..7a33319dcd16 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -695,9 +695,6 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, break; if (!bp.level) { - const struct bch_extent_ptr *ptr; - unsigned i = 0; - k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -720,6 +717,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, data_opts.target = io_opts.background_target; data_opts.rewrite_ptrs = 0; + unsigned i = 0; bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { if (ptr->dev == bucket.inode) { data_opts.rewrite_ptrs |= 1U << i; @@ -890,7 +888,6 @@ static bool migrate_pred(struct bch_fs *c, void *arg, struct data_update_opts *data_opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; struct bch_ioctl_data *op = arg; unsigned i = 0; From 1a2a9f9f53a6c4d77bc16062fcb65c7169fb6ed1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Dec 2023 22:24:46 -0500 Subject: [PATCH 143/226] bcachefs: for_each_keylist_key() declares loop iter Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 2 -- fs/bcachefs/io_write.c | 9 +++------ fs/bcachefs/keylist.c | 2 -- fs/bcachefs/keylist.h | 4 +--- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index f0e3f51e62ca..060fe357aa35 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -556,8 +556,6 @@ static void btree_update_add_key(struct btree_update *as, static int btree_update_nodes_written_trans(struct btree_trans *trans, struct btree_update *as) { - struct bkey_i *k; - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, as->journal_u64s); int ret = PTR_ERR_OR_ZERO(e); if (ret) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 628b3e966c1d..33c0e783d546 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1106,15 +1106,14 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, static inline void bch2_nocow_write_unlock(struct bch_write_op *op) { struct bch_fs *c = op->c; - struct bkey_i *k; for_each_keylist_key(&op->insert_keys, k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); bkey_for_each_ptr(ptrs, ptr) bch2_bucket_nocow_unlock(&c->nocow_locks, - PTR_BUCKET_POS(c, ptr), - BUCKET_NOCOW_LOCK_UPDATE); + PTR_BUCKET_POS(c, ptr), + BUCKET_NOCOW_LOCK_UPDATE); } } @@ -1158,11 +1157,9 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) { struct bch_fs *c = op->c; struct btree_trans *trans = bch2_trans_get(c); - struct bkey_i *orig; - int ret; for_each_keylist_key(&op->insert_keys, orig) { - ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, + int ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, bkey_start_pos(&orig->k), orig->k.p, BTREE_ITER_INTENT, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c index 5699cd4873c8..1b828bddd11b 100644 --- a/fs/bcachefs/keylist.c +++ b/fs/bcachefs/keylist.c @@ -43,8 +43,6 @@ void bch2_keylist_pop_front(struct keylist *l) #ifdef CONFIG_BCACHEFS_DEBUG void bch2_verify_keylist_sorted(struct keylist *l) { - struct bkey_i *k; - for_each_keylist_key(l, k) BUG_ON(bkey_next(k) != l->top && bpos_ge(k->k.p, bkey_next(k)->k.p)); diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h index fe759c7031e0..e687e0e9aede 100644 --- a/fs/bcachefs/keylist.h +++ b/fs/bcachefs/keylist.h @@ -50,18 +50,16 @@ static inline struct bkey_i *bch2_keylist_front(struct keylist *l) } #define for_each_keylist_key(_keylist, _k) \ - for (_k = (_keylist)->keys; \ + for (struct bkey_i *_k = (_keylist)->keys; \ _k != (_keylist)->top; \ _k = bkey_next(_k)) static inline u64 keylist_sectors(struct keylist *keys) { - struct bkey_i *k; u64 ret = 0; for_each_keylist_key(keys, k) ret += k->k.size; - return ret; } From 0c0ba8e9c5a9f06a6a57acfc34a5526f9fdd41c4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 Dec 2023 20:54:11 -0500 Subject: [PATCH 144/226] bcachefs: skip journal more often in key cache reclaim Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 7d2db1e5df3b..a4e9e7ffad32 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -655,7 +655,9 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, */ if (ck->journal.seq == journal_last_seq(j)) commit_flags |= BCH_WATERMARK_reclaim; - else + + if (ck->journal.seq != journal_last_seq(j) || + j->watermark == BCH_WATERMARK_stripe) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; ret = bch2_btree_iter_traverse(&b_iter) ?: From 806ebf2aa01749ca4c510fbb42cd146bdfdbe5a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 22 Dec 2023 21:10:32 -0500 Subject: [PATCH 145/226] bcachefs: Convert split_devs() to darray Bit of cleanup & modernization: also moving this code to util.c, it'll be used by userspace as well. Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.h | 1 + fs/bcachefs/fs.c | 68 +++++++++++++------------------------------- fs/bcachefs/util.c | 34 ++++++++++++++++++++++ fs/bcachefs/util.h | 3 ++ 4 files changed, 58 insertions(+), 48 deletions(-) diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index c7f153cecde3..4b340d13caac 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -20,6 +20,7 @@ struct { \ #define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0) typedef DARRAY(char) darray_char; +typedef DARRAY(char *) darray_str; int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b1c89ed1bf46..12de97bc93b9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1624,31 +1624,6 @@ static struct bch_fs *bch2_path_to_fs(const char *path) return c ?: ERR_PTR(-ENOENT); } -static char **split_devs(const char *_dev_name, unsigned *nr) -{ - char *dev_name = NULL, **devs = NULL, *s; - size_t i = 0, nr_devs = 0; - - dev_name = kstrdup(_dev_name, GFP_KERNEL); - if (!dev_name) - return NULL; - - for (s = dev_name; s; s = strchr(s + 1, ':')) - nr_devs++; - - devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL); - if (!devs) { - kfree(dev_name); - return NULL; - } - - while ((s = strsep(&dev_name, ":"))) - devs[i++] = s; - - *nr = nr_devs; - return devs; -} - static int bch2_remount(struct super_block *sb, int *flags, char *data) { struct bch_fs *c = sb->s_fs_info; @@ -1801,17 +1776,18 @@ static int bch2_noset_super(struct super_block *s, void *data) return -EBUSY; } +typedef DARRAY(struct bch_fs *) darray_fs; + static int bch2_test_super(struct super_block *s, void *data) { struct bch_fs *c = s->s_fs_info; - struct bch_fs **devs = data; - unsigned i; + darray_fs *d = data; if (!c) return false; - for (i = 0; devs[i]; i++) - if (c != devs[i]) + darray_for_each(*d, i) + if (c != *i) return false; return true; } @@ -1823,9 +1799,6 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, struct super_block *sb; struct inode *vinode; struct bch_opts opts = bch2_opts_empty(); - char **devs; - struct bch_fs **devs_to_fs = NULL; - unsigned nr_devs; int ret; opt_set(opts, read_only, (flags & SB_RDONLY) != 0); @@ -1837,25 +1810,25 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (!dev_name || strlen(dev_name) == 0) return ERR_PTR(-EINVAL); - devs = split_devs(dev_name, &nr_devs); - if (!devs) - return ERR_PTR(-ENOMEM); + darray_str devs; + ret = bch2_split_devs(dev_name, &devs); + if (ret) + return ERR_PTR(ret); - devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL); - if (!devs_to_fs) { - sb = ERR_PTR(-ENOMEM); - goto got_sb; + darray_fs devs_to_fs = {}; + darray_for_each(devs, i) { + ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i)); + if (ret) { + sb = ERR_PTR(ret); + goto got_sb; + } } - for (unsigned i = 0; i < nr_devs; i++) - devs_to_fs[i] = bch2_path_to_fs(devs[i]); - - sb = sget(fs_type, bch2_test_super, bch2_noset_super, - flags|SB_NOSEC, devs_to_fs); + sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs); if (!IS_ERR(sb)) goto got_sb; - c = bch2_fs_open(devs, nr_devs, opts); + c = bch2_fs_open(devs.data, devs.nr, opts); if (IS_ERR(c)) { sb = ERR_CAST(c); goto got_sb; @@ -1875,9 +1848,8 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) bch2_fs_stop(c); got_sb: - kfree(devs_to_fs); - kfree(devs[0]); - kfree(devs); + darray_exit(&devs_to_fs); + bch2_darray_str_exit(&devs); if (IS_ERR(sb)) { ret = PTR_ERR(sb); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index d530277f9017..2e4c5d9606de 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -1174,3 +1174,37 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) return ret; } + +void bch2_darray_str_exit(darray_str *d) +{ + darray_for_each(*d, i) + kfree(*i); + darray_exit(d); +} + +int bch2_split_devs(const char *_dev_name, darray_str *ret) +{ + darray_init(ret); + + char *dev_name = kstrdup(_dev_name, GFP_KERNEL), *s = dev_name; + if (!dev_name) + return -ENOMEM; + + while ((s = strsep(&dev_name, ":"))) { + char *p = kstrdup(s, GFP_KERNEL); + if (!p) + goto err; + + if (darray_push(ret, p)) { + kfree(p); + goto err; + } + } + + kfree(dev_name); + return 0; +err: + bch2_darray_str_exit(ret); + kfree(dev_name); + return -ENOMEM; +} diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 1ff063bb8741..4290e0a53b75 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -863,4 +863,7 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r) return l.len == r.len && !memcmp(l.name, r.name, l.len); } +void bch2_darray_str_exit(darray_str *); +int bch2_split_devs(const char *, darray_str *); + #endif /* _BCACHEFS_UTIL_H */ From 4753bdeb26d55eebb415254f795b8be66cb80452 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 20 Dec 2023 01:20:53 -0500 Subject: [PATCH 146/226] bcachefs: Kill GFP_NOFAIL usage in readahead path Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index e48b364db5ae..73c12e565af5 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -52,23 +52,20 @@ struct readpages_iter { static int readpages_iter_init(struct readpages_iter *iter, struct readahead_control *ractl) { - memset(iter, 0, sizeof(*iter)); + struct folio *folio; - iter->mapping = ractl->mapping; + *iter = (struct readpages_iter) { ractl->mapping }; - int ret = bch2_filemap_get_contig_folios_d(iter->mapping, - ractl->_index << PAGE_SHIFT, - (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, - 0, mapping_gfp_mask(iter->mapping), - &iter->folios); - if (ret) - return ret; + while ((folio = __readahead_folio(ractl))) { + if (!bch2_folio_create(folio, GFP_KERNEL) || + darray_push(&iter->folios, folio)) { + bch2_folio_release(folio); + ractl->_nr_pages += folio_nr_pages(folio); + ractl->_index -= folio_nr_pages(folio); + return iter->folios.nr ? 0 : -ENOMEM; + } - darray_for_each(iter->folios, fi) { - ractl->_nr_pages -= 1U << folio_order(*fi); - __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); - folio_put(*fi); - folio_put(*fi); + folio_put(folio); } return 0; @@ -270,12 +267,12 @@ void bch2_readahead(struct readahead_control *ractl) struct btree_trans *trans = bch2_trans_get(c); struct folio *folio; struct readpages_iter readpages_iter; - int ret; bch2_inode_opts_get(&opts, c, &inode->ei_inode); - ret = readpages_iter_init(&readpages_iter, ractl); - BUG_ON(ret); + int ret = readpages_iter_init(&readpages_iter, ractl); + if (ret) + return; bch2_pagecache_add_get(inode); From 5ce8b92da0b04faa4864845f43ed7357034e700d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 01:51:04 -0500 Subject: [PATCH 147/226] bcachefs: minor bch2_btree_path_set_pos() optimization bpos_eq() is cheaper than bpos_cmp() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 4 +++- fs/bcachefs/btree_iter.h | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 79e6ec7b9a52..f16fefbb6497 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1221,8 +1221,10 @@ struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans, struct btree_path * __must_check __bch2_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos new_pos, - bool intent, unsigned long ip, int cmp) + bool intent, unsigned long ip) { + int cmp = bpos_cmp(new_pos, path->pos); + bch2_trans_verify_not_in_restart(trans); EBUG_ON(!path->ref); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 29f02335642e..3b981144e472 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -176,17 +176,15 @@ bch2_btree_path_make_mut(struct btree_trans *trans, struct btree_path * __must_check __bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *, - struct bpos, bool, unsigned long, int); + struct bpos, bool, unsigned long); static inline struct btree_path * __must_check bch2_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos new_pos, bool intent, unsigned long ip) { - int cmp = bpos_cmp(new_pos, path->pos); - - return cmp - ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, ip, cmp) + return !bpos_eq(new_pos, path->pos) + ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, ip) : path; } From 255ebbbf750727e62a07e1602ffeaf1d4842018d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 02:00:43 -0500 Subject: [PATCH 148/226] bcachefs: bch2_path_get() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 12 ++++++------ fs/bcachefs/btree_iter.h | 6 +++--- fs/bcachefs/btree_types.h | 2 ++ fs/bcachefs/btree_update.c | 12 +++++++----- fs/bcachefs/btree_update_interior.c | 4 ++-- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index f16fefbb6497..8662d396e57b 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1547,10 +1547,10 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, return path; } -struct btree_path *bch2_path_get(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos, - unsigned locks_want, unsigned level, - unsigned flags, unsigned long ip) +btree_path_idx_t bch2_path_get(struct btree_trans *trans, + enum btree_id btree_id, struct bpos pos, + unsigned locks_want, unsigned level, + unsigned flags, unsigned long ip) { struct btree_path *path, *path_pos = NULL; bool cached = flags & BTREE_ITER_CACHED; @@ -1618,7 +1618,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, if (locks_want > path->locks_want) bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL); - return path; + return path->idx; } struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) @@ -1928,7 +1928,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos return bkey_s_c_null; if (!iter->key_cache_path) - iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos, + iter->key_cache_path = trans->paths + bch2_path_get(trans, iter->btree_id, pos, iter->flags & BTREE_ITER_INTENT, 0, iter->flags|BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL, diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 3b981144e472..757354c53224 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -202,8 +202,8 @@ static inline int __must_check bch2_btree_path_traverse(struct btree_trans *tran int __must_check bch2_btree_path_traverse(struct btree_trans *, struct btree_path *, unsigned); -struct btree_path *bch2_path_get(struct btree_trans *, enum btree_id, struct bpos, - unsigned, unsigned, unsigned, unsigned long); +btree_path_idx_t bch2_path_get(struct btree_trans *, enum btree_id, struct bpos, + unsigned, unsigned, unsigned, unsigned long); struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *); /* @@ -443,7 +443,7 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, #ifdef CONFIG_BCACHEFS_DEBUG iter->ip_allocated = ip; #endif - iter->path = bch2_path_get(trans, btree_id, iter->pos, + iter->path = trans->paths + bch2_path_get(trans, btree_id, iter->pos, locks_want, depth, flags, ip); } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index d129c0e468cc..19595a599c55 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -222,6 +222,8 @@ enum btree_path_uptodate { #define TRACK_PATH_ALLOCATED #endif +typedef u16 btree_path_idx_t; + struct btree_path { u8 idx; u8 sorted_idx; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index ce3f0af5b1a0..7ee4d18af2d5 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -344,16 +344,18 @@ static noinline int flush_new_cached_update(struct btree_trans *trans, enum btree_update_flags flags, unsigned long ip) { - struct btree_path *btree_path; struct bkey k; int ret; - btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, - BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, btree_path, 0); + btree_path_idx_t path_idx = + bch2_path_get(trans, path->btree_id, path->pos, 1, 0, + BTREE_ITER_INTENT, _THIS_IP_); + ret = bch2_btree_path_traverse(trans, trans->paths + path_idx, 0); if (ret) goto out; + struct btree_path *btree_path = trans->paths + path_idx; + /* * The old key in the insert entry might actually refer to an existing * key in the btree that has been deleted from cache and not yet @@ -467,7 +469,7 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, int ret; if (!iter->key_cache_path) - iter->key_cache_path = + iter->key_cache_path = trans->paths + bch2_path_get(trans, path->btree_id, path->pos, 1, 0, BTREE_ITER_INTENT| BTREE_ITER_CACHED, _THIS_IP_); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 060fe357aa35..12a14d05c578 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -36,7 +36,7 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, { struct btree_path *path; - path = bch2_path_get(trans, btree_id, pos, level + 1, level, + path = trans->paths + bch2_path_get(trans, btree_id, pos, level + 1, level, BTREE_ITER_NOPRESERVE| BTREE_ITER_INTENT, _RET_IP_); path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_); @@ -1795,7 +1795,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - sib_path = bch2_path_get(trans, path->btree_id, sib_pos, + sib_path = trans->paths + bch2_path_get(trans, path->btree_id, sib_pos, U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); ret = bch2_btree_path_traverse(trans, sib_path, false); if (ret) From 74e600c19a18bf6c1696f82b6d04d894f9bbdf40 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 23:18:52 -0500 Subject: [PATCH 149/226] bcachefs; bch2_path_put() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 42 ++++++++++++++--------------- fs/bcachefs/btree_iter.h | 4 +-- fs/bcachefs/btree_update.c | 4 +-- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/btree_update_interior.c | 12 ++++----- 5 files changed, 31 insertions(+), 33 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8662d396e57b..1c2ad8195070 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1305,19 +1305,18 @@ static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btr return NULL; } -static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) +static inline void __bch2_path_free(struct btree_trans *trans, btree_path_idx_t path) { - __bch2_btree_path_unlock(trans, path); - btree_path_list_remove(trans, path); - __clear_bit(path->idx, trans->paths_allocated); + __bch2_btree_path_unlock(trans, trans->paths + path); + btree_path_list_remove(trans, trans->paths + path); + __clear_bit(path, trans->paths_allocated); } -void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) +void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool intent) { - struct btree_path *dup; + struct btree_path *path = trans->paths + path_idx, *dup; - EBUG_ON(trans->paths + path->idx != path); - EBUG_ON(!path->ref); + EBUG_ON(path->idx != path_idx); if (!__btree_path_put(path, intent)) return; @@ -1339,16 +1338,15 @@ void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool inte dup->should_be_locked |= path->should_be_locked; } - __bch2_path_free(trans, path); + __bch2_path_free(trans, path_idx); } -static void bch2_path_put_nokeep(struct btree_trans *trans, struct btree_path *path, +static void bch2_path_put_nokeep(struct btree_trans *trans, btree_path_idx_t path, bool intent) { - EBUG_ON(trans->paths + path->idx != path); - EBUG_ON(!path->ref); + EBUG_ON(trans->paths[path].idx != path); - if (!__btree_path_put(path, intent)) + if (!__btree_path_put(trans->paths + path, intent)) return; __bch2_path_free(trans, path); @@ -2066,7 +2064,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX)); if (iter->update_path) { - bch2_path_put_nokeep(trans, iter->update_path, + bch2_path_put_nokeep(trans, iter->update_path->idx, iter->flags & BTREE_ITER_INTENT); iter->update_path = NULL; } @@ -2095,7 +2093,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e if (iter->update_path && !bkey_eq(iter->update_path->pos, k.k->p)) { - bch2_path_put_nokeep(trans, iter->update_path, + bch2_path_put_nokeep(trans, iter->update_path->idx, iter->flags & BTREE_ITER_INTENT); iter->update_path = NULL; } @@ -2278,7 +2276,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) * that candidate */ if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { - bch2_path_put_nokeep(trans, iter->path, + bch2_path_put_nokeep(trans, iter->path->idx, iter->flags & BTREE_ITER_INTENT); iter->path = saved_path; saved_path = NULL; @@ -2291,7 +2289,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) iter->snapshot, k.k->p.snapshot)) { if (saved_path) - bch2_path_put_nokeep(trans, saved_path, + bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT); saved_path = btree_path_clone(trans, iter->path, iter->flags & BTREE_ITER_INTENT); @@ -2335,7 +2333,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) btree_path_set_should_be_locked(iter->path); out_no_locked: if (saved_path) - bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_INTENT); + bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT); bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -2639,13 +2637,13 @@ static inline void btree_path_list_add(struct btree_trans *trans, void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) { if (iter->update_path) - bch2_path_put_nokeep(trans, iter->update_path, + bch2_path_put_nokeep(trans, iter->update_path->idx, iter->flags & BTREE_ITER_INTENT); if (iter->path) - bch2_path_put(trans, iter->path, + bch2_path_put(trans, iter->path->idx, iter->flags & BTREE_ITER_INTENT); if (iter->key_cache_path) - bch2_path_put(trans, iter->key_cache_path, + bch2_path_put(trans, iter->key_cache_path->idx, iter->flags & BTREE_ITER_INTENT); iter->path = NULL; iter->update_path = NULL; @@ -2814,7 +2812,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) * iterators if we do that */ if (!path->ref && !path->preserve) - __bch2_path_free(trans, path); + __bch2_path_free(trans, path->idx); else path->preserve = false; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 757354c53224..888ff4f2c1a5 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -254,7 +254,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *, int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); -void bch2_path_put(struct btree_trans *, struct btree_path *, bool); +void bch2_path_put(struct btree_trans *, btree_path_idx_t, bool); int bch2_trans_relock(struct btree_trans *); int bch2_trans_relock_notrace(struct btree_trans *); @@ -360,7 +360,7 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) { if (unlikely(iter->update_path)) - bch2_path_put(iter->trans, iter->update_path, + bch2_path_put(iter->trans, iter->update_path->idx, iter->flags & BTREE_ITER_INTENT); iter->update_path = NULL; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 7ee4d18af2d5..8f933c2c2bda 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -372,7 +372,7 @@ static noinline int flush_new_cached_update(struct btree_trans *trans, btree_path_set_should_be_locked(btree_path); ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); out: - bch2_path_put(trans, btree_path, true); + bch2_path_put(trans, btree_path->idx, true); return ret; } @@ -419,7 +419,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, if (!cmp && i < trans->updates + trans->nr_updates) { EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); - bch2_path_put(trans, i->path, true); + bch2_path_put(trans, i->path->idx, true); i->flags = n.flags; i->cached = n.cached; i->k = n.k; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index fbb83e1aa2b2..5c1ef9c84878 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -194,7 +194,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, static inline void bch2_trans_reset_updates(struct btree_trans *trans) { trans_for_each_update(trans, i) - bch2_path_put(trans, i->path, true); + bch2_path_put(trans, i->path->idx, true); trans->extra_journal_res = 0; trans->nr_updates = 0; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 12a14d05c578..d6b33cadc5a9 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -720,7 +720,7 @@ static void btree_update_nodes_written(struct btree_update *as) btree_node_write_if_need(c, b, SIX_LOCK_intent); btree_node_unlock(trans, path, b->c.level); - bch2_path_put(trans, path, true); + bch2_path_put(trans, path->idx, true); } bch2_journal_pin_drop(&c->journal, &as->journal); @@ -1615,11 +1615,11 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, out: if (path2) { __bch2_btree_path_unlock(trans, path2); - bch2_path_put(trans, path2, true); + bch2_path_put(trans, path2->idx, true); } if (path1) { __bch2_btree_path_unlock(trans, path1); - bch2_path_put(trans, path1, true); + bch2_path_put(trans, path1->idx, true); } bch2_trans_verify_locks(trans); @@ -1926,8 +1926,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, out: err: if (new_path) - bch2_path_put(trans, new_path, true); - bch2_path_put(trans, sib_path, true); + bch2_path_put(trans, new_path->idx, true); + bch2_path_put(trans, sib_path->idx, true); bch2_trans_verify_locks(trans); return ret; err_free_update: @@ -1992,7 +1992,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, bch2_btree_update_done(as, trans); out: if (new_path) - bch2_path_put(trans, new_path, true); + bch2_path_put(trans, new_path->idx, true); bch2_trans_downgrade(trans); return ret; err: From 4617d94617a4604288b9f9d02f808274af21ee38 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 02:10:23 -0500 Subject: [PATCH 150/226] bcachefs: bch2_btree_path_set_pos() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 44 ++++++++++++++++++++------------------ fs/bcachefs/btree_iter.h | 14 ++++++------ fs/bcachefs/btree_update.c | 5 +++-- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1c2ad8195070..5851b7895b3b 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1218,18 +1218,19 @@ struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans, return path; } -struct btree_path * __must_check +btree_path_idx_t __must_check __bch2_btree_path_set_pos(struct btree_trans *trans, - struct btree_path *path, struct bpos new_pos, - bool intent, unsigned long ip) + btree_path_idx_t path_idx, struct bpos new_pos, + bool intent, unsigned long ip) { - int cmp = bpos_cmp(new_pos, path->pos); + int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos); bch2_trans_verify_not_in_restart(trans); - EBUG_ON(!path->ref); + EBUG_ON(!trans->paths[path_idx].ref); - path = bch2_btree_path_make_mut(trans, path, intent, ip); + path_idx = bch2_btree_path_make_mut(trans, trans->paths + path_idx, intent, ip)->idx; + struct btree_path *path = trans->paths + path_idx; path->pos = new_pos; trans->paths_sorted = false; @@ -1270,7 +1271,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, } out: bch2_btree_path_verify(trans, path); - return path; + return path_idx; } /* Btree path: main interface: */ @@ -1576,7 +1577,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, path_pos->btree_id == btree_id && path_pos->level == level) { __btree_path_get(path_pos, intent); - path = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip); + path = trans->paths + bch2_btree_path_set_pos(trans, path_pos->idx, pos, intent, ip); } else { path = btree_path_alloc(trans, path_pos); path_pos = NULL; @@ -1671,14 +1672,15 @@ __bch2_btree_iter_traverse(struct btree_iter *iter) int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) { + struct btree_trans *trans = iter->trans; int ret; - iter->path = bch2_btree_path_set_pos(iter->trans, iter->path, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, btree_iter_search_key(iter), iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) return ret; @@ -1710,7 +1712,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) bkey_init(&iter->k); iter->k.p = iter->pos = b->key.k.p; - iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, b->key.k.p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); btree_path_set_should_be_locked(iter->path); @@ -1775,8 +1777,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) * Haven't gotten to the end of the parent node: go back down to * the next child node */ - path = iter->path = - bch2_btree_path_set_pos(trans, path, bpos_successor(iter->pos), + path = iter->path = trans->paths + + bch2_btree_path_set_pos(trans, path->idx, bpos_successor(iter->pos), iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); @@ -1792,7 +1794,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bkey_init(&iter->k); iter->k.p = iter->pos = b->key.k.p; - iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, b->key.k.p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); btree_path_set_should_be_locked(iter->path); @@ -1932,7 +1934,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos BTREE_ITER_CACHED_NOFILL, _THIS_IP_); - iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos, + iter->key_cache_path = trans->paths + bch2_btree_path_set_pos(trans, iter->key_cache_path->idx, pos, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); @@ -1965,7 +1967,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp while (1) { struct btree_path_level *l; - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); @@ -2118,8 +2120,8 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT); iter->update_path = iter->path; - iter->update_path = bch2_btree_path_set_pos(trans, - iter->update_path, pos, + iter->update_path = trans->paths + bch2_btree_path_set_pos(trans, + iter->update_path->idx, pos, iter->flags & BTREE_ITER_INTENT, _THIS_IP_); ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); @@ -2167,7 +2169,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e iter->pos = iter_pos; - iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, k.k->p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); @@ -2244,7 +2246,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) search_key.snapshot = U32_MAX; while (1) { - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); @@ -2377,7 +2379,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } search_key = btree_iter_search_key(iter); - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, + iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 888ff4f2c1a5..6427791a8219 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -174,16 +174,16 @@ bch2_btree_path_make_mut(struct btree_trans *trans, return path; } -struct btree_path * __must_check -__bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *, - struct bpos, bool, unsigned long); +btree_path_idx_t __must_check +__bch2_btree_path_set_pos(struct btree_trans *, btree_path_idx_t, + struct bpos, bool, unsigned long); -static inline struct btree_path * __must_check +static inline btree_path_idx_t __must_check bch2_btree_path_set_pos(struct btree_trans *trans, - struct btree_path *path, struct bpos new_pos, - bool intent, unsigned long ip) + btree_path_idx_t path, struct bpos new_pos, + bool intent, unsigned long ip) { - return !bpos_eq(new_pos, path->pos) + return !bpos_eq(new_pos, trans->paths[path].pos) ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, ip) : path; } diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 8f933c2c2bda..f846c0b004bc 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -474,8 +474,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, BTREE_ITER_INTENT| BTREE_ITER_CACHED, _THIS_IP_); - iter->key_cache_path = - bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, + iter->key_cache_path = trans->paths + + bch2_btree_path_set_pos(trans, iter->key_cache_path->idx, path->pos, + iter->flags & BTREE_ITER_INTENT, _THIS_IP_); From f6363acaa63a84e7e79bd6e8b926d40a940e7c81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 02:24:05 -0500 Subject: [PATCH 151/226] bcachefs: bch2_btree_path_make_mut() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 13 ++++++------- fs/bcachefs/btree_iter.h | 13 +++++++------ fs/bcachefs/btree_update_interior.c | 10 +++++----- fs/bcachefs/btree_write_buffer.c | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 5851b7895b3b..b53f657e39fd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1208,13 +1208,12 @@ static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btr } __flatten -struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans, - struct btree_path *path, bool intent, - unsigned long ip) +btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *trans, + btree_path_idx_t path, bool intent, unsigned long ip) { - __btree_path_put(path, intent); - path = btree_path_clone(trans, path, intent); - path->preserve = false; + __btree_path_put(trans->paths + path, intent); + path = btree_path_clone(trans, trans->paths + path, intent)->idx; + trans->paths[path].preserve = false; return path; } @@ -1228,7 +1227,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, bch2_trans_verify_not_in_restart(trans); EBUG_ON(!trans->paths[path_idx].ref); - path_idx = bch2_btree_path_make_mut(trans, trans->paths + path_idx, intent, ip)->idx; + path_idx = bch2_btree_path_make_mut(trans, path_idx, intent, ip); struct btree_path *path = trans->paths + path_idx; path->pos = new_pos; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 6427791a8219..42877b00e38f 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -160,17 +160,18 @@ __trans_next_path_with_node(struct btree_trans *trans, struct btree *b, _path = __trans_next_path_with_node((_trans), (_b), \ (_path)->idx + 1)) -struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *, - bool, unsigned long); +btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *, btree_path_idx_t, + bool, unsigned long); -static inline struct btree_path * __must_check +static inline btree_path_idx_t __must_check bch2_btree_path_make_mut(struct btree_trans *trans, - struct btree_path *path, bool intent, + btree_path_idx_t path, bool intent, unsigned long ip) { - if (path->ref > 1 || path->preserve) + if (trans->paths[path].ref > 1 || + trans->paths[path].preserve) path = __bch2_btree_path_make_mut(trans, path, intent, ip); - path->should_be_locked = false; + trans->paths[path].should_be_locked = false; return path; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d6b33cadc5a9..225b82146a0d 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -34,12 +34,12 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, unsigned level, struct bpos pos) { - struct btree_path *path; - - path = trans->paths + bch2_path_get(trans, btree_id, pos, level + 1, level, + btree_path_idx_t path_idx = bch2_path_get(trans, btree_id, pos, level + 1, level, BTREE_ITER_NOPRESERVE| BTREE_ITER_INTENT, _RET_IP_); - path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_); + path_idx = bch2_btree_path_make_mut(trans, path_idx, true, _RET_IP_); + + struct btree_path *path = trans->paths + path_idx; bch2_btree_path_downgrade(trans, path); __bch2_btree_path_unlock(trans, path); return path; @@ -2167,7 +2167,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (parent) { bch2_trans_copy_iter(&iter2, iter); - iter2.path = bch2_btree_path_make_mut(trans, iter2.path, + iter2.path = trans->paths + bch2_btree_path_make_mut(trans, iter2.path->idx, iter2.flags & BTREE_ITER_INTENT, _THIS_IP_); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 6bb5756b5db7..23e34d1b66fe 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -140,7 +140,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite * set_pos and traverse(): */ if (iter->path->ref > 1) - iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_); + iter->path = trans->paths + __bch2_btree_path_make_mut(trans, iter->path->idx, true, _THIS_IP_); path = iter->path; From 96ed47d13056b74f867c63ab9fec0fd319048b70 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 03:02:43 -0500 Subject: [PATCH 152/226] bcachefs: bch2_btree_path_traverse() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 33 ++++++++++++++++------------- fs/bcachefs/btree_iter.h | 9 ++++---- fs/bcachefs/btree_update.c | 6 +++--- fs/bcachefs/btree_update_interior.c | 10 +++++---- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b53f657e39fd..0988338990e6 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -994,16 +994,16 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) /* Now, redo traversals in correct order: */ i = 0; while (i < trans->nr_sorted) { - path = trans->paths + trans->sorted[i]; + btree_path_idx_t idx = trans->sorted[i]; /* * Traversing a path can cause another path to be added at about * the same position: */ - if (path->uptodate) { - __btree_path_get(path, false); - ret = bch2_btree_path_traverse_one(trans, path, 0, _THIS_IP_); - __btree_path_put(path, false); + if (trans->paths[idx].uptodate) { + __btree_path_get(&trans->paths[idx], false); + ret = bch2_btree_path_traverse_one(trans, idx, 0, _THIS_IP_); + __btree_path_put(&trans->paths[idx], false); if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || bch2_err_matches(ret, ENOMEM)) @@ -1108,10 +1108,11 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, * stashed in the iterator and returned from bch2_trans_exit(). */ int bch2_btree_path_traverse_one(struct btree_trans *trans, - struct btree_path *path, + btree_path_idx_t path_idx, unsigned flags, unsigned long trace_ip) { + struct btree_path *path = &trans->paths[path_idx]; unsigned depth_want = path->level; int ret = -((int) trans->restarted); @@ -1135,6 +1136,8 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, goto out; } + path = &trans->paths[path_idx]; + if (unlikely(path->level >= BTREE_MAX_DEPTH)) goto out; @@ -1665,7 +1668,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) { - return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + return bch2_btree_path_traverse(iter->trans, iter->path->idx, iter->flags); } int __must_check @@ -1679,7 +1682,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); if (ret) return ret; @@ -1698,7 +1701,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) EBUG_ON(iter->path->cached); bch2_btree_iter_verify(iter); - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); if (ret) goto err; @@ -1783,7 +1786,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) btree_path_set_level_down(trans, path, iter->min_depth); - ret = bch2_btree_path_traverse(trans, path, iter->flags); + ret = bch2_btree_path_traverse(trans, path->idx, iter->flags); if (ret) goto err; @@ -1937,7 +1940,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->key_cache_path, + ret = bch2_btree_path_traverse(trans, iter->key_cache_path->idx, iter->flags|BTREE_ITER_CACHED) ?: bch2_btree_path_relock(trans, iter->path, _THIS_IP_); if (unlikely(ret)) @@ -1970,7 +1973,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); @@ -2123,7 +2126,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e iter->update_path->idx, pos, iter->flags & BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->update_path->idx, iter->flags); if (unlikely(ret)) { k = bkey_s_c_err(ret); goto out_no_locked; @@ -2249,7 +2252,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); @@ -2382,7 +2385,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); if (unlikely(ret)) { k = bkey_s_c_err(ret); goto out_no_locked; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 42877b00e38f..77520333d013 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -189,20 +189,19 @@ bch2_btree_path_set_pos(struct btree_trans *trans, : path; } -int __must_check bch2_btree_path_traverse_one(struct btree_trans *, struct btree_path *, +int __must_check bch2_btree_path_traverse_one(struct btree_trans *, + btree_path_idx_t, unsigned, unsigned long); static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans, - struct btree_path *path, unsigned flags) + btree_path_idx_t path, unsigned flags) { - if (path->uptodate < BTREE_ITER_NEED_RELOCK) + if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK) return 0; return bch2_btree_path_traverse_one(trans, path, flags, _RET_IP_); } -int __must_check bch2_btree_path_traverse(struct btree_trans *, - struct btree_path *, unsigned); btree_path_idx_t bch2_path_get(struct btree_trans *, enum btree_id, struct bpos, unsigned, unsigned, unsigned, unsigned long); struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *); diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index f846c0b004bc..3ea5f12a4fd4 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -350,7 +350,7 @@ static noinline int flush_new_cached_update(struct btree_trans *trans, btree_path_idx_t path_idx = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, trans->paths + path_idx, 0); + ret = bch2_btree_path_traverse(trans, path_idx, 0); if (ret) goto out; @@ -372,7 +372,7 @@ static noinline int flush_new_cached_update(struct btree_trans *trans, btree_path_set_should_be_locked(btree_path); ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); out: - bch2_path_put(trans, btree_path->idx, true); + bch2_path_put(trans, path_idx, true); return ret; } @@ -480,7 +480,7 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, iter->flags & BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, iter->key_cache_path, + ret = bch2_btree_path_traverse(trans, iter->key_cache_path->idx, BTREE_ITER_CACHED); if (unlikely(ret)) return ret; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 225b82146a0d..51314831ccea 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1795,12 +1795,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - sib_path = trans->paths + bch2_path_get(trans, path->btree_id, sib_pos, - U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, sib_path, false); + btree_path_idx_t sib_path_idx = + bch2_path_get(trans, path->btree_id, sib_pos, + U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); + ret = bch2_btree_path_traverse(trans, sib_path_idx, false); if (ret) goto err; + sib_path = trans->paths + sib_path_idx; btree_path_set_should_be_locked(sib_path); m = sib_path->l[level].b; @@ -1927,7 +1929,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, err: if (new_path) bch2_path_put(trans, new_path->idx, true); - bch2_path_put(trans, sib_path->idx, true); + bch2_path_put(trans, sib_path_idx, true); bch2_trans_verify_locks(trans); return ret; err_free_update: From 788cc25d15e0d502a29fb7deae78832075e55655 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 8 Dec 2023 17:02:16 -0500 Subject: [PATCH 153/226] bcachefs: btree_path_alloc() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 49 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 0988338990e6..6d5546237de4 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -34,7 +34,7 @@ static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter) #endif } -static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); +static btree_path_idx_t btree_path_alloc(struct btree_trans *, btree_path_idx_t); static void bch2_trans_srcu_lock(struct btree_trans *); static inline int __btree_path_cmp(const struct btree_path *l, @@ -1200,13 +1200,13 @@ static inline void btree_path_copy(struct btree_trans *trans, struct btree_path } } -static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, - bool intent) +static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_idx_t src, + bool intent) { - struct btree_path *new = btree_path_alloc(trans, src); + btree_path_idx_t new = btree_path_alloc(trans, src); - btree_path_copy(trans, new, src); - __btree_path_get(new, intent); + btree_path_copy(trans, trans->paths + new, trans->paths + src); + __btree_path_get(trans->paths + new, intent); return new; } @@ -1215,7 +1215,7 @@ btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *trans, btree_path_idx_t path, bool intent, unsigned long ip) { __btree_path_put(trans->paths + path, intent); - path = btree_path_clone(trans, trans->paths + path, intent)->idx; + path = btree_path_clone(trans, path, intent); trans->paths[path].preserve = false; return path; } @@ -1517,11 +1517,10 @@ static noinline void btree_path_overflow(struct btree_trans *trans) panic("trans path overflow\n"); } -static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, - struct btree_path *pos) +static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, + btree_path_idx_t pos) { - struct btree_path *path; - size_t idx = find_first_zero_bit(trans->paths_allocated, BTREE_ITER_MAX); + btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, BTREE_ITER_MAX); if (unlikely(idx == BTREE_ITER_MAX)) btree_path_overflow(trans); @@ -1537,15 +1536,15 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, __set_bit(idx, trans->paths_allocated); - path = &trans->paths[idx]; + struct btree_path *path = &trans->paths[idx]; path->idx = idx; path->ref = 0; path->intent_ref = 0; path->nodes_locked = 0; - btree_path_list_add(trans, pos, path); + btree_path_list_add(trans, pos ? trans->paths + pos : NULL, path); trans->paths_sorted = false; - return path; + return idx; } btree_path_idx_t bch2_path_get(struct btree_trans *trans, @@ -1553,10 +1552,10 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, unsigned locks_want, unsigned level, unsigned flags, unsigned long ip) { - struct btree_path *path, *path_pos = NULL; + struct btree_path *path; bool cached = flags & BTREE_ITER_CACHED; bool intent = flags & BTREE_ITER_INTENT; - int i; + btree_path_idx_t i, path_pos = 0; bch2_trans_verify_not_in_restart(trans); bch2_trans_verify_locks(trans); @@ -1571,18 +1570,18 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, level) > 0) break; - path_pos = path; + path_pos = path->idx; } if (path_pos && - path_pos->cached == cached && - path_pos->btree_id == btree_id && - path_pos->level == level) { - __btree_path_get(path_pos, intent); - path = trans->paths + bch2_btree_path_set_pos(trans, path_pos->idx, pos, intent, ip); + trans->paths[path_pos].cached == cached && + trans->paths[path_pos].btree_id == btree_id && + trans->paths[path_pos].level == level) { + __btree_path_get(trans->paths + path_pos, intent); + path = trans->paths + bch2_btree_path_set_pos(trans, + path_pos, pos, intent, ip); } else { - path = btree_path_alloc(trans, path_pos); - path_pos = NULL; + path = trans->paths + btree_path_alloc(trans, path_pos); __btree_path_get(path, intent); path->pos = pos; @@ -2295,7 +2294,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (saved_path) bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT); - saved_path = btree_path_clone(trans, iter->path, + saved_path = trans->paths + btree_path_clone(trans, iter->path->idx, iter->flags & BTREE_ITER_INTENT); saved_k = *k.k; saved_v = k.v; From 07f383c71fadc952059ed4ffe37dd465bda4ad3e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 00:39:38 -0500 Subject: [PATCH 154/226] bcachefs: btree_iter -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 5 +- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/btree_iter.c | 200 +++++++++++++++------------- fs/bcachefs/btree_iter.h | 20 +-- fs/bcachefs/btree_key_cache.c | 11 +- fs/bcachefs/btree_types.h | 24 +++- fs/bcachefs/btree_update.c | 28 ++-- fs/bcachefs/btree_update_interior.c | 35 ++--- fs/bcachefs/btree_write_buffer.c | 20 +-- fs/bcachefs/debug.c | 3 +- fs/bcachefs/fsck.c | 2 +- 11 files changed, 195 insertions(+), 155 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 13beaddac4c6..1a127b0a08b3 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -862,8 +862,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos bch2_trans_copy_iter(&iter2, iter); - if (!bpos_eq(iter->path->l[0].b->key.k.p, SPOS_MAX)) - end = bkey_min(end, bpos_nosnap_successor(iter->path->l[0].b->key.k.p)); + struct btree_path *path = btree_iter_path(iter->trans, iter); + if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) + end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 8525d2b84c33..b0ff47998a94 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -367,7 +367,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc if (!ob) set_btree_iter_dontneed(&iter); err: - if (iter.trans && iter.path) + if (iter.path) set_btree_iter_dontneed(&iter); bch2_trans_iter_exit(trans, &iter); printbuf_exit(&buf); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6d5546237de4..909c215fbb28 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -22,8 +22,8 @@ #include static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); -static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, - struct btree_path *); +static inline void btree_path_list_add(struct btree_trans *, + btree_path_idx_t, btree_path_idx_t); static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter) { @@ -252,7 +252,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) BUG_ON(iter->btree_id >= BTREE_ID_NR); - BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached); + BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != btree_iter_path(trans, iter)->cached); BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); @@ -262,8 +262,8 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) !btree_type_has_snapshot_field(iter->btree_id)); if (iter->update_path) - bch2_btree_path_verify(trans, iter->update_path); - bch2_btree_path_verify(trans, iter->path); + bch2_btree_path_verify(trans, &trans->paths[iter->update_path]); + bch2_btree_path_verify(trans, btree_iter_path(trans, iter)); } static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) @@ -1542,7 +1542,7 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, path->intent_ref = 0; path->nodes_locked = 0; - btree_path_list_add(trans, pos ? trans->paths + pos : NULL, path); + btree_path_list_add(trans, pos, idx); trans->paths_sorted = false; return idx; } @@ -1667,7 +1667,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) { - return bch2_btree_path_traverse(iter->trans, iter->path->idx, iter->flags); + return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); } int __must_check @@ -1676,16 +1676,16 @@ bch2_btree_iter_traverse(struct btree_iter *iter) struct btree_trans *trans = iter->trans; int ret; - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, + iter->path = bch2_btree_path_set_pos(trans, iter->path, btree_iter_search_key(iter), iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); + ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); if (ret) return ret; - btree_path_set_should_be_locked(iter->path); + btree_path_set_should_be_locked(trans->paths + iter->path); return 0; } @@ -1697,14 +1697,15 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) struct btree *b = NULL; int ret; - EBUG_ON(iter->path->cached); + EBUG_ON(trans->paths[iter->path].cached); bch2_btree_iter_verify(iter); - ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) goto err; - b = btree_path_node(iter->path, iter->path->level); + struct btree_path *path = btree_iter_path(trans, iter); + b = btree_path_node(path, path->level); if (!b) goto out; @@ -1713,10 +1714,10 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) bkey_init(&iter->k); iter->k.p = iter->pos = b->key.k.p; - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, b->key.k.p, + iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - btree_path_set_should_be_locked(iter->path); + btree_path_set_should_be_locked(btree_iter_path(trans, iter)); out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -1741,14 +1742,15 @@ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; - struct btree_path *path = iter->path; struct btree *b = NULL; int ret; + EBUG_ON(trans->paths[iter->path].cached); bch2_trans_verify_not_in_restart(trans); - EBUG_ON(iter->path->cached); bch2_btree_iter_verify(iter); + struct btree_path *path = btree_iter_path(trans, iter); + /* already at end? */ if (!btree_path_node(path, path->level)) return NULL; @@ -1778,28 +1780,30 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) * Haven't gotten to the end of the parent node: go back down to * the next child node */ - path = iter->path = trans->paths + - bch2_btree_path_set_pos(trans, path->idx, bpos_successor(iter->pos), - iter->flags & BTREE_ITER_INTENT, - btree_iter_ip_allocated(iter)); + iter->path = bch2_btree_path_set_pos(trans, iter->path, + bpos_successor(iter->pos), + iter->flags & BTREE_ITER_INTENT, + btree_iter_ip_allocated(iter)); + path = btree_iter_path(trans, iter); btree_path_set_level_down(trans, path, iter->min_depth); - ret = bch2_btree_path_traverse(trans, path->idx, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) goto err; + path = btree_iter_path(trans, iter); b = path->l[path->level].b; } bkey_init(&iter->k); iter->k.p = iter->pos = b->key.k.p; - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, b->key.k.p, + iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - btree_path_set_should_be_locked(iter->path); - BUG_ON(iter->path->uptodate); + btree_path_set_should_be_locked(btree_iter_path(trans, iter)); + EBUG_ON(btree_iter_path(trans, iter)->uptodate); out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -1841,14 +1845,15 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) static noinline struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) { + struct btree_trans *trans = iter->trans; struct bkey_i *ret = NULL; - trans_for_each_update(iter->trans, i) { + trans_for_each_update(trans, i) { if (i->btree_id < iter->btree_id) continue; if (i->btree_id > iter->btree_id) break; - if (bpos_lt(i->k->k.p, iter->path->pos)) + if (bpos_lt(i->k->k.p, btree_iter_path(trans, iter)->pos)) continue; if (i->key_cache_already_flushed) continue; @@ -1870,9 +1875,11 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, struct btree_iter *iter, struct bpos end_pos) { + struct btree_path *path = btree_iter_path(trans, iter); + return bch2_journal_keys_peek_upto(trans->c, iter->btree_id, - iter->path->level, - iter->path->pos, + path->level, + path->pos, end_pos, &iter->journal_idx); } @@ -1881,7 +1888,8 @@ static noinline struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, iter->path->pos); + struct btree_path *path = btree_iter_path(trans, iter); + struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos); if (k) { iter->k = k->k; @@ -1896,9 +1904,10 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { + struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = bch2_btree_journal_peek(trans, iter, - k.k ? k.k->p : path_l(iter->path)->b->key.k.p); + k.k ? k.k->p : path_l(path)->b->key.k.p); if (next_journal) { iter->k = next_journal->k; @@ -1929,25 +1938,25 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos return bkey_s_c_null; if (!iter->key_cache_path) - iter->key_cache_path = trans->paths + bch2_path_get(trans, iter->btree_id, pos, + iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos, iter->flags & BTREE_ITER_INTENT, 0, iter->flags|BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL, _THIS_IP_); - iter->key_cache_path = trans->paths + bch2_btree_path_set_pos(trans, iter->key_cache_path->idx, pos, + iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->key_cache_path->idx, + ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED) ?: - bch2_btree_path_relock(trans, iter->path, _THIS_IP_); + bch2_btree_path_relock(trans, btree_iter_path(trans, iter), _THIS_IP_); if (unlikely(ret)) return bkey_s_c_err(ret); - btree_path_set_should_be_locked(iter->key_cache_path); + btree_path_set_should_be_locked(trans->paths + iter->key_cache_path); - k = bch2_btree_path_peek_slot(iter->key_cache_path, &u); + k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); if (k.k && !bkey_err(k)) { iter->k = u; k.k = &iter->k; @@ -1962,17 +1971,17 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp struct bkey_s_c k, k2; int ret; - EBUG_ON(iter->path->cached); + EBUG_ON(btree_iter_path(trans, iter)->cached); bch2_btree_iter_verify(iter); while (1) { struct btree_path_level *l; - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, search_key, + iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); @@ -1980,7 +1989,8 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp goto out; } - l = path_l(iter->path); + struct btree_path *path = btree_iter_path(trans, iter); + l = path_l(path); if (unlikely(!l->b)) { /* No btree nodes at requested level: */ @@ -1989,7 +1999,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp goto out; } - btree_path_set_should_be_locked(iter->path); + btree_path_set_should_be_locked(path); k = btree_path_level_peek_all(trans->c, l, &iter->k); @@ -2067,9 +2077,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX)); if (iter->update_path) { - bch2_path_put_nokeep(trans, iter->update_path->idx, + bch2_path_put_nokeep(trans, iter->update_path, iter->flags & BTREE_ITER_INTENT); - iter->update_path = NULL; + iter->update_path = 0; } bch2_btree_iter_verify_entry_exit(iter); @@ -2095,10 +2105,10 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e goto end; if (iter->update_path && - !bkey_eq(iter->update_path->pos, k.k->p)) { - bch2_path_put_nokeep(trans, iter->update_path->idx, + !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { + bch2_path_put_nokeep(trans, iter->update_path, iter->flags & BTREE_ITER_INTENT); - iter->update_path = NULL; + iter->update_path = 0; } if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && @@ -2118,14 +2128,14 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e * advance, same as on exit for iter->path, but only up * to snapshot */ - __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT); + __btree_path_get(trans->paths + iter->path, iter->flags & BTREE_ITER_INTENT); iter->update_path = iter->path; - iter->update_path = trans->paths + bch2_btree_path_set_pos(trans, - iter->update_path->idx, pos, + iter->update_path = bch2_btree_path_set_pos(trans, + iter->update_path, pos, iter->flags & BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, iter->update_path->idx, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); if (unlikely(ret)) { k = bkey_s_c_err(ret); goto out_no_locked; @@ -2170,18 +2180,18 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e iter->pos = iter_pos; - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, k.k->p, + iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - btree_path_set_should_be_locked(iter->path); + btree_path_set_should_be_locked(btree_iter_path(trans, iter)); out_no_locked: if (iter->update_path) { - ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_); + ret = bch2_btree_path_relock(trans, trans->paths + iter->update_path, _THIS_IP_); if (unlikely(ret)) k = bkey_s_c_err(ret); else - btree_path_set_should_be_locked(iter->update_path); + btree_path_set_should_be_locked(trans->paths + iter->update_path); } if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) @@ -2234,7 +2244,8 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) const struct bch_val *saved_v; int ret; - EBUG_ON(iter->path->cached || iter->path->level); + EBUG_ON(btree_iter_path(trans, iter)->cached || + btree_iter_path(trans, iter)->level); EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); if (iter->flags & BTREE_ITER_WITH_JOURNAL) @@ -2247,11 +2258,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) search_key.snapshot = U32_MAX; while (1) { - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, search_key, + iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); @@ -2259,14 +2270,14 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) goto out_no_locked; } - k = btree_path_level_peek(trans, iter->path, - &iter->path->l[0], &iter->k); + struct btree_path *path = btree_iter_path(trans, iter); + + k = btree_path_level_peek(trans, path, &path->l[0], &iter->k); if (!k.k || ((iter->flags & BTREE_ITER_IS_EXTENTS) ? bpos_ge(bkey_start_pos(k.k), search_key) : bpos_gt(k.k->p, search_key))) - k = btree_path_level_prev(trans, iter->path, - &iter->path->l[0], &iter->k); + k = btree_path_level_prev(trans, path, &path->l[0], &iter->k); if (likely(k.k)) { if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { @@ -2279,9 +2290,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) * that candidate */ if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { - bch2_path_put_nokeep(trans, iter->path->idx, + bch2_path_put_nokeep(trans, iter->path, iter->flags & BTREE_ITER_INTENT); - iter->path = saved_path; + iter->path = saved_path->idx; saved_path = NULL; iter->k = saved_k; k.v = saved_v; @@ -2294,8 +2305,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (saved_path) bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT); - saved_path = trans->paths + btree_path_clone(trans, iter->path->idx, + saved_path = trans->paths + btree_path_clone(trans, iter->path, iter->flags & BTREE_ITER_INTENT); + path = btree_iter_path(trans, iter); saved_k = *k.k; saved_v = k.v; } @@ -2312,10 +2324,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) continue; } + btree_path_set_should_be_locked(path); break; - } else if (likely(!bpos_eq(iter->path->l[0].b->data->min_key, POS_MIN))) { + } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { /* Advance to previous leaf node: */ - search_key = bpos_predecessor(iter->path->l[0].b->data->min_key); + search_key = bpos_predecessor(path->l[0].b->data->min_key); } else { /* Start of btree: */ bch2_btree_iter_set_pos(iter, POS_MIN); @@ -2332,8 +2345,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) iter->pos.snapshot = iter->snapshot; - - btree_path_set_should_be_locked(iter->path); out_no_locked: if (saved_path) bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT); @@ -2368,7 +2379,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE)); + EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE)); /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_IS_EXTENTS) && @@ -2380,11 +2391,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } search_key = btree_iter_search_key(iter); - iter->path = trans->paths + bch2_btree_path_set_pos(trans, iter->path->idx, search_key, + iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(trans, iter->path->idx, iter->flags); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { k = bkey_s_c_err(ret); goto out_no_locked; @@ -2413,7 +2424,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) goto out_no_locked; } - k = bch2_btree_path_peek_slot(iter->path, &iter->k); + k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); if (unlikely(!k.k)) goto out_no_locked; } else { @@ -2423,7 +2434,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->flags & BTREE_ITER_IS_EXTENTS) end.offset = U64_MAX; - EBUG_ON(iter->path->level); + EBUG_ON(btree_iter_path(trans, iter)->level); if (iter->flags & BTREE_ITER_INTENT) { struct btree_iter iter2; @@ -2469,7 +2480,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } } out: - btree_path_set_should_be_locked(iter->path); + btree_path_set_should_be_locked(btree_iter_path(trans, iter)); out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -2614,21 +2625,22 @@ static inline void btree_path_list_remove(struct btree_trans *trans, } static inline void btree_path_list_add(struct btree_trans *trans, - struct btree_path *pos, - struct btree_path *path) + btree_path_idx_t pos, + btree_path_idx_t path_idx) { + struct btree_path *path = trans->paths + path_idx; unsigned i; - path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted; + path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted; #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, trans->sorted + path->sorted_idx, DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); trans->nr_sorted++; - trans->sorted[path->sorted_idx] = path->idx; + trans->sorted[path->sorted_idx] = path_idx; #else - array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); + array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx); #endif for (i = path->sorted_idx; i < trans->nr_sorted; i++) @@ -2640,17 +2652,18 @@ static inline void btree_path_list_add(struct btree_trans *trans, void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) { if (iter->update_path) - bch2_path_put_nokeep(trans, iter->update_path->idx, + bch2_path_put_nokeep(trans, iter->update_path, iter->flags & BTREE_ITER_INTENT); if (iter->path) - bch2_path_put(trans, iter->path->idx, + bch2_path_put(trans, iter->path, iter->flags & BTREE_ITER_INTENT); if (iter->key_cache_path) - bch2_path_put(trans, iter->key_cache_path->idx, + bch2_path_put(trans, iter->key_cache_path, iter->flags & BTREE_ITER_INTENT); - iter->path = NULL; - iter->update_path = NULL; - iter->key_cache_path = NULL; + iter->path = 0; + iter->update_path = 0; + iter->key_cache_path = 0; + iter->trans = NULL; } void bch2_trans_iter_init_outlined(struct btree_trans *trans, @@ -2681,19 +2694,22 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, iter->min_depth = depth; - BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); - BUG_ON(iter->path->level != depth); - BUG_ON(iter->min_depth != depth); + struct btree_path *path = btree_iter_path(trans, iter); + BUG_ON(path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); + BUG_ON(path->level != depth); + BUG_ON(iter->min_depth != depth); } void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) { + struct btree_trans *trans = src->trans; + *dst = *src; if (src->path) - __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); + __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT); if (src->update_path) - __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT); - dst->key_cache_path = NULL; + __btree_path_get(trans->paths + src->update_path, src->flags & BTREE_ITER_INTENT); + dst->key_cache_path = 0; } void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 77520333d013..e6ec971d66d3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -203,7 +203,7 @@ static inline int __must_check bch2_btree_path_traverse(struct btree_trans *tran } btree_path_idx_t bch2_path_get(struct btree_trans *, enum btree_id, struct bpos, - unsigned, unsigned, unsigned, unsigned long); + unsigned, unsigned, unsigned, unsigned long); struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *); /* @@ -359,10 +359,12 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) { + struct btree_trans *trans = iter->trans; + if (unlikely(iter->update_path)) - bch2_path_put(iter->trans, iter->update_path->idx, + bch2_path_put(trans, iter->update_path, iter->flags & BTREE_ITER_INTENT); - iter->update_path = NULL; + iter->update_path = 0; if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) new_pos.snapshot = iter->snapshot; @@ -431,8 +433,8 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, unsigned long ip) { iter->trans = trans; - iter->update_path = NULL; - iter->key_cache_path = NULL; + iter->update_path = 0; + iter->key_cache_path = 0; iter->btree_id = btree_id; iter->min_depth = 0; iter->flags = flags; @@ -443,7 +445,7 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, #ifdef CONFIG_BCACHEFS_DEBUG iter->ip_allocated = ip; #endif - iter->path = trans->paths + bch2_path_get(trans, btree_id, iter->pos, + iter->path = bch2_path_get(trans, btree_id, iter->pos, locks_want, depth, flags, ip); } @@ -471,8 +473,10 @@ void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); static inline void set_btree_iter_dontneed(struct btree_iter *iter) { - if (!iter->trans->restarted) - iter->path->preserve = false; + struct btree_trans *trans = iter->trans; + + if (!trans->restarted) + btree_iter_path(trans, iter)->preserve = false; } void *__bch2_trans_kmalloc(struct btree_trans *, size_t); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index a4e9e7ffad32..4fb708e91a8f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -630,7 +630,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, if (ret) goto out; - ck = (void *) c_iter.path->l[0].b; + ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b; if (!ck) goto out; @@ -680,7 +680,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, bch2_journal_pin_drop(j, &ck->journal); - BUG_ON(!btree_node_locked(c_iter.path, 0)); + struct btree_path *path = btree_iter_path(trans, &c_iter); + BUG_ON(!btree_node_locked(path, 0)); if (!evict) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { @@ -691,17 +692,17 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, struct btree_path *path2; evict: trans_for_each_path(trans, path2) - if (path2 != c_iter.path) + if (path2 != path) __bch2_btree_path_unlock(trans, path2); - bch2_btree_node_lock_write_nofail(trans, c_iter.path, &ck->c); + bch2_btree_node_lock_write_nofail(trans, path, &ck->c); if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { clear_bit(BKEY_CACHED_DIRTY, &ck->flags); atomic_long_dec(&c->btree_key_cache.nr_dirty); } - mark_btree_node_locked_noreset(c_iter.path, 0, BTREE_NODE_UNLOCKED); + mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); bkey_cached_evict(&c->btree_key_cache, ck); bkey_cached_free_fast(&c->btree_key_cache, ck); } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 19595a599c55..4b96437c087e 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -225,8 +225,8 @@ enum btree_path_uptodate { typedef u16 btree_path_idx_t; struct btree_path { - u8 idx; - u8 sorted_idx; + btree_path_idx_t idx; + btree_path_idx_t sorted_idx; u8 ref; u8 intent_ref; @@ -282,9 +282,9 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path) */ struct btree_iter { struct btree_trans *trans; - struct btree_path *path; - struct btree_path *update_path; - struct btree_path *key_cache_path; + btree_path_idx_t path; + btree_path_idx_t update_path; + btree_path_idx_t key_cache_path; enum btree_id btree_id:8; u8 min_depth; @@ -410,7 +410,7 @@ struct btree_trans { * extent: */ unsigned extra_journal_res; - u8 nr_max_paths; + btree_path_idx_t nr_max_paths; u16 journal_entries_u64s; u16 journal_entries_size; @@ -437,6 +437,18 @@ struct btree_trans { struct replicas_delta_list *fs_usage_deltas; }; +static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) +{ + return trans->paths + iter->path; +} + +static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter) +{ + return iter->key_cache_path + ? trans->paths + iter->key_cache_path + : NULL; +} + #define BCH_BTREE_WRITE_TYPES() \ x(initial, 0) \ x(init_next_bset, 1) \ diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 3ea5f12a4fd4..41f0009fb625 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -266,7 +266,7 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, bch2_cut_front(new.k->p, update); - ret = bch2_trans_update_by_path(trans, iter->path, update, + ret = bch2_trans_update_by_path(trans, btree_iter_path(trans, iter), update, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| flags, _RET_IP_); if (ret) @@ -462,37 +462,37 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, struct btree_iter *iter, struct btree_path *path) { - if (!iter->key_cache_path || - !iter->key_cache_path->should_be_locked || - !bpos_eq(iter->key_cache_path->pos, iter->pos)) { + struct btree_path *key_cache_path = btree_iter_key_cache_path(trans, iter); + + if (!key_cache_path || + !key_cache_path->should_be_locked || + !bpos_eq(key_cache_path->pos, iter->pos)) { struct bkey_cached *ck; int ret; if (!iter->key_cache_path) - iter->key_cache_path = trans->paths + + iter->key_cache_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, BTREE_ITER_INTENT| BTREE_ITER_CACHED, _THIS_IP_); - iter->key_cache_path = trans->paths + - bch2_btree_path_set_pos(trans, iter->key_cache_path->idx, path->pos, - + iter->key_cache_path = + bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, iter->flags & BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, iter->key_cache_path->idx, - BTREE_ITER_CACHED); + ret = bch2_btree_path_traverse(trans, iter->key_cache_path, BTREE_ITER_CACHED); if (unlikely(ret)) return ret; - ck = (void *) iter->key_cache_path->l[0].b; + ck = (void *) trans->paths[iter->key_cache_path].l[0].b; if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); } - btree_path_set_should_be_locked(iter->key_cache_path); + btree_path_set_should_be_locked(trans->paths + iter->key_cache_path); } return 0; @@ -501,7 +501,7 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_update_flags flags) { - struct btree_path *path = iter->update_path ?: iter->path; + struct btree_path *path = trans->paths + (iter->update_path ?: iter->path); int ret; if (iter->flags & BTREE_ITER_IS_EXTENTS) @@ -529,7 +529,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter if (ret) return ret; - path = iter->key_cache_path; + path = trans->paths + iter->key_cache_path; } return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 51314831ccea..eb93c326db06 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1951,9 +1951,9 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, flags |= BCH_TRANS_COMMIT_no_enospc; - parent = btree_node_parent(iter->path, b); - as = bch2_btree_update_start(trans, iter->path, b->c.level, - false, flags); + struct btree_path *path = btree_iter_path(trans, iter); + parent = btree_node_parent(path, b); + as = bch2_btree_update_start(trans, path, b->c.level, false, flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto out; @@ -1975,20 +1975,20 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - ret = bch2_btree_insert_node(as, trans, iter->path, parent, - &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, btree_iter_path(trans, iter), + parent, &as->parent_keys, flags); if (ret) goto err; } else { - bch2_btree_set_root(as, trans, iter->path, n); + bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n); } bch2_btree_update_get_open_buckets(as, n); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); - bch2_btree_node_free_inmem(trans, iter->path, b); + bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b); - bch2_trans_node_add(trans, iter->path, n); + bch2_trans_node_add(trans, trans->paths + iter->path, n); six_unlock_intent(&n->c.lock); bch2_btree_update_done(as, trans); @@ -2165,18 +2165,19 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BUG_ON(ret); } - parent = btree_node_parent(iter->path, b); + parent = btree_node_parent(btree_iter_path(trans, iter), b); if (parent) { bch2_trans_copy_iter(&iter2, iter); - iter2.path = trans->paths + bch2_btree_path_make_mut(trans, iter2.path->idx, + iter2.path = bch2_btree_path_make_mut(trans, iter2.path, iter2.flags & BTREE_ITER_INTENT, _THIS_IP_); - BUG_ON(iter2.path->level != b->c.level); - BUG_ON(!bpos_eq(iter2.path->pos, new_key->k.p)); + struct btree_path *path2 = btree_iter_path(trans, &iter2); + BUG_ON(path2->level != b->c.level); + BUG_ON(!bpos_eq(path2->pos, new_key->k.p)); - btree_path_set_level_up(trans, iter2.path); + btree_path_set_level_up(trans, path2); trans->paths_sorted = false; @@ -2203,7 +2204,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (ret) goto err; - bch2_btree_node_lock_write_nofail(trans, iter->path, &b->c); + bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -2218,7 +2219,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bkey_copy(&b->key, new_key); } - bch2_btree_node_unlock_write(trans, iter->path, b); + bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b); out: bch2_trans_iter_exit(trans, &iter2); return ret; @@ -2237,7 +2238,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite { struct bch_fs *c = trans->c; struct btree *new_hash = NULL; - struct btree_path *path = iter->path; + struct btree_path *path = btree_iter_path(trans, iter); struct closure cl; int ret = 0; @@ -2295,7 +2296,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, goto out; /* has node been freed? */ - if (iter.path->l[b->c.level].b != b) { + if (btree_iter_path(trans, &iter)->l[b->c.level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); goto out; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 23e34d1b66fe..16506032670f 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -106,7 +106,9 @@ static noinline int wb_flush_one_slowpath(struct btree_trans *trans, struct btree_iter *iter, struct btree_write_buffered_key *wb) { - bch2_btree_node_unlock_write(trans, iter->path, iter->path->l[0].b); + struct btree_path *path = btree_iter_path(trans, iter); + + bch2_btree_node_unlock_write(trans, path, path->l[0].b); trans->journal_res.seq = wb->journal_seq; @@ -139,10 +141,10 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite * We can't clone a path that has write locks: unshare it now, before * set_pos and traverse(): */ - if (iter->path->ref > 1) - iter->path = trans->paths + __bch2_btree_path_make_mut(trans, iter->path->idx, true, _THIS_IP_); + if (btree_iter_path(trans, iter)->ref > 1) + iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_); - path = iter->path; + path = btree_iter_path(trans, iter); if (!*write_locked) { ret = bch2_btree_node_lock_write(trans, path, &path->l[0].b->c); @@ -300,7 +302,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) } if (write_locked) { - struct btree_path *path = iter.path; + struct btree_path *path = btree_iter_path(trans, &iter); if (path->btree_id != i->btree || bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) { @@ -316,7 +318,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) } bch2_btree_iter_set_pos(&iter, k->k.k.p); - iter.path->preserve = false; + btree_iter_path(trans, &iter)->preserve = false; do { if (race_fault()) { @@ -338,8 +340,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) break; } - if (write_locked) - bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b); + if (write_locked) { + struct btree_path *path = btree_iter_path(trans, &iter); + bch2_btree_node_unlock_write(trans, path, path->l[0].b); + } bch2_trans_iter_exit(trans, &iter); if (ret) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 76e43318fc5f..bbf6fce3a789 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -460,7 +460,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, for_each_btree_key(trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ - struct btree_path_level *l = &iter.path->l[0]; + struct btree_path_level *l = + &btree_iter_path(trans, &iter)->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index e778978d44fa..f89d1029366b 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1668,7 +1668,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - BUG_ON(!iter->path->should_be_locked); + BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout); ret = PTR_ERR_OR_ZERO(i); From 7f9821a7c10bca1903bfa75126db199a40ec3b62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 16:10:24 -0500 Subject: [PATCH 155/226] bcachefs: btree_insert_entry -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 4 +- fs/bcachefs/btree_key_cache.c | 2 +- fs/bcachefs/btree_trans_commit.c | 67 ++++++++++++++++++-------------- fs/bcachefs/btree_types.h | 2 +- fs/bcachefs/btree_update.c | 27 ++++++------- fs/bcachefs/btree_update.h | 2 +- 6 files changed, 56 insertions(+), 48 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 909c215fbb28..e90282c85b3c 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -656,7 +656,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str i->btree_id == b->c.btree_id && bpos_cmp(i->k->k.p, b->data->min_key) >= 0 && bpos_cmp(i->k->k.p, b->data->max_key) <= 0) { - i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v; + i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v; if (unlikely(trans->journal_replay_not_finished)) { struct bkey_i *j_k = @@ -3008,7 +3008,7 @@ void bch2_trans_put(struct btree_trans *trans) s->max_mem = max(s->max_mem, trans->mem_max); trans_for_each_update(trans, i) - __btree_path_put(i->path, true); + __btree_path_put(trans->paths + i->path, true); trans->nr_updates = 0; check_btree_paths_leaked(trans); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 4fb708e91a8f..99dc0b1a7fc2 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -755,7 +755,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, struct btree_insert_entry *insert_entry) { struct bch_fs *c = trans->c; - struct bkey_cached *ck = (void *) insert_entry->path->l[0].b; + struct bkey_cached *ck = (void *) (trans->paths + insert_entry->path)->l[0].b; struct bkey_i *insert = insert_entry->k; bool kick_reclaim = false; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 9fe22628a946..0e3603d367f9 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -23,7 +23,7 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert #ifdef CONFIG_BCACHEFS_DEBUG struct bch_fs *c = trans->c; struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(i->path, &u); + struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u); if (unlikely(trans->journal_replay_not_finished)) { struct bkey_i *j_k = @@ -41,23 +41,23 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert #endif } -static inline struct btree_path_level *insert_l(struct btree_insert_entry *i) +static inline struct btree_path_level *insert_l(struct btree_trans *trans, struct btree_insert_entry *i) { - return i->path->l + i->level; + return (trans->paths + i->path)->l + i->level; } static inline bool same_leaf_as_prev(struct btree_trans *trans, struct btree_insert_entry *i) { return i != trans->updates && - insert_l(&i[0])->b == insert_l(&i[-1])->b; + insert_l(trans, &i[0])->b == insert_l(trans, &i[-1])->b; } static inline bool same_leaf_as_next(struct btree_trans *trans, struct btree_insert_entry *i) { return i + 1 < trans->updates + trans->nr_updates && - insert_l(&i[0])->b == insert_l(&i[1])->b; + insert_l(trans, &i[0])->b == insert_l(trans, &i[1])->b; } inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, @@ -84,7 +84,7 @@ static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btre if (same_leaf_as_prev(trans, i)) continue; - bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b); + bch2_btree_node_unlock_write(trans, trans->paths + i->path, insert_l(trans, i)->b); } trace_and_count(trans->c, trans_restart_would_deadlock_write, trans); @@ -99,11 +99,11 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans) if (same_leaf_as_prev(trans, i)) continue; - if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c)) + if (bch2_btree_node_lock_write(trans, trans->paths + i->path, &insert_l(trans, i)->b->c)) return trans_lock_write_fail(trans, i); if (!i->cached) - bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); + bch2_btree_node_prep_for_write(trans, trans->paths + i->path, insert_l(trans, i)->b); } trans->write_locked = true; @@ -115,8 +115,8 @@ static inline void bch2_trans_unlock_write(struct btree_trans *trans) if (likely(trans->write_locked)) { trans_for_each_update(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write_inlined(trans, i->path, - insert_l(i)->b); + bch2_btree_node_unlock_write_inlined(trans, + trans->paths + i->path, insert_l(trans, i)->b); trans->write_locked = false; } } @@ -307,10 +307,12 @@ inline void bch2_btree_insert_key_leaf(struct btree_trans *trans, static inline void btree_insert_entry_checks(struct btree_trans *trans, struct btree_insert_entry *i) { - BUG_ON(!bpos_eq(i->k->k.p, i->path->pos)); - BUG_ON(i->cached != i->path->cached); - BUG_ON(i->level != i->path->level); - BUG_ON(i->btree_id != i->path->btree_id); + struct btree_path *path = trans->paths + i->path; + + BUG_ON(!bpos_eq(i->k->k.p, path->pos)); + BUG_ON(i->cached != path->cached); + BUG_ON(i->level != path->level); + BUG_ON(i->btree_id != path->btree_id); EBUG_ON(!i->level && btree_type_has_snapshots(i->btree_id) && !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && @@ -453,11 +455,9 @@ static int run_one_mem_trigger(struct btree_trans *trans, old, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); } else { - struct bkey _deleted = KEY(0, 0, 0); + struct bkey _deleted = POS_KEY((trans->paths + i->path)->pos); struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; - _deleted.p = i->path->pos; - ret = bch2_mark_key(trans, i->btree_id, i->level, deleted, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|flags) ?: @@ -600,7 +600,7 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) */ BUG_ON(i->cached || i->level); - if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) { + if (gc_visited(c, gc_pos_btree_node(insert_l(trans, i)->b))) { ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); if (ret) break; @@ -640,8 +640,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, u64s += i->k->k.u64s; ret = !i->cached - ? btree_key_can_insert(trans, insert_l(i)->b, u64s) - : btree_key_can_insert_cached(trans, flags, i->path, u64s); + ? btree_key_can_insert(trans, insert_l(trans, i)->b, u64s) + : btree_key_can_insert_cached(trans, flags, trans->paths + i->path, u64s); if (ret) { *stopped_at = i; return ret; @@ -746,13 +746,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, } trans_for_each_update(trans, i) { + struct btree_path *path = trans->paths + i->path; + if (!i->cached) { - bch2_btree_insert_key_leaf(trans, i->path, i->k, trans->journal_res.seq); + bch2_btree_insert_key_leaf(trans, path, i->k, trans->journal_res.seq); } else if (!i->key_cache_already_flushed) bch2_btree_insert_key_cached(trans, flags, i); else { - bch2_btree_key_cache_drop(trans, i->path); - btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE); + bch2_btree_key_cache_drop(trans, path); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); } } @@ -821,7 +823,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags if (!same_leaf_as_next(trans, i)) { if (u64s_delta <= 0) { - ret = bch2_foreground_maybe_merge(trans, i->path, + ret = bch2_foreground_maybe_merge(trans, trans->paths + i->path, i->level, flags); if (unlikely(ret)) return ret; @@ -875,11 +877,14 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, struct bch_fs *c = trans->c; switch (ret) { - case -BCH_ERR_btree_insert_btree_node_full: - ret = bch2_btree_split_leaf(trans, i->path, flags); + case -BCH_ERR_btree_insert_btree_node_full: { + struct btree_path *path = trans->paths + i->path; + + ret = bch2_btree_split_leaf(trans, path, flags); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path); + trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, path); break; + } case -BCH_ERR_btree_insert_need_mark_replicas: ret = drop_locks_do(trans, bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas)); @@ -1015,13 +1020,15 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); trans_for_each_update(trans, i) { - EBUG_ON(!i->path->should_be_locked); + struct btree_path *path = trans->paths + i->path; - ret = bch2_btree_path_upgrade(trans, i->path, i->level + 1); + EBUG_ON(!path->should_be_locked); + + ret = bch2_btree_path_upgrade(trans, path, i->level + 1); if (unlikely(ret)) goto out; - EBUG_ON(!btree_node_intent_locked(i->path, i->level)); + EBUG_ON(!btree_node_intent_locked(path, i->level)); if (i->key_cache_already_flushed) continue; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 4b96437c087e..da7b836cbcf8 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -351,8 +351,8 @@ struct btree_insert_entry { * to the size of the key being overwritten in the btree: */ u8 old_btree_u64s; + btree_path_idx_t path; struct bkey_i *k; - struct btree_path *path; /* key being overwritten: */ struct bkey old_k; const struct bch_val *old_v; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 41f0009fb625..a5a99c3cb32e 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -24,7 +24,7 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, } static int __must_check -bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, +bch2_trans_update_by_path(struct btree_trans *, btree_path_idx_t, struct bkey_i *, enum btree_update_flags, unsigned long ip); @@ -266,7 +266,7 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, bch2_cut_front(new.k->p, update); - ret = bch2_trans_update_by_path(trans, btree_iter_path(trans, iter), update, + ret = bch2_trans_update_by_path(trans, iter->path, update, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| flags, _RET_IP_); if (ret) @@ -339,7 +339,6 @@ static int bch2_trans_update_extent(struct btree_trans *trans, } static noinline int flush_new_cached_update(struct btree_trans *trans, - struct btree_path *path, struct btree_insert_entry *i, enum btree_update_flags flags, unsigned long ip) @@ -348,7 +347,7 @@ static noinline int flush_new_cached_update(struct btree_trans *trans, int ret; btree_path_idx_t path_idx = - bch2_path_get(trans, path->btree_id, path->pos, 1, 0, + bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, BTREE_ITER_INTENT, _THIS_IP_); ret = bch2_btree_path_traverse(trans, path_idx, 0); if (ret) @@ -370,14 +369,14 @@ static noinline int flush_new_cached_update(struct btree_trans *trans, i->flags |= BTREE_TRIGGER_NORUN; btree_path_set_should_be_locked(btree_path); - ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); + ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip); out: bch2_path_put(trans, path_idx, true); return ret; } static int __must_check -bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, +bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, struct bkey_i *k, enum btree_update_flags flags, unsigned long ip) { @@ -385,6 +384,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, struct btree_insert_entry *i, n; int cmp; + struct btree_path *path = trans->paths + path_idx; EBUG_ON(!path->should_be_locked); EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); EBUG_ON(!bpos_eq(k->k.p, path->pos)); @@ -395,7 +395,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, .btree_id = path->btree_id, .level = path->level, .cached = path->cached, - .path = path, + .path = path_idx, .k = k, .ip_allocated = ip, }; @@ -419,7 +419,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, if (!cmp && i < trans->updates + trans->nr_updates) { EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); - bch2_path_put(trans, i->path->idx, true); + bch2_path_put(trans, i->path, true); i->flags = n.flags; i->cached = n.cached; i->k = n.k; @@ -443,7 +443,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, } } - __btree_path_get(i->path, true); + __btree_path_get(trans->paths + i->path, true); /* * If a key is present in the key cache, it must also exist in the @@ -453,7 +453,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, * work: */ if (path->cached && bkey_deleted(&i->old_k)) - return flush_new_cached_update(trans, path, i, flags, ip); + return flush_new_cached_update(trans, i, flags, ip); return 0; } @@ -501,7 +501,7 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_update_flags flags) { - struct btree_path *path = trans->paths + (iter->update_path ?: iter->path); + btree_path_idx_t path_idx = iter->update_path ?: iter->path; int ret; if (iter->flags & BTREE_ITER_IS_EXTENTS) @@ -521,6 +521,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter /* * Ensure that updates to cached btrees go to the key cache: */ + struct btree_path *path = trans->paths + path_idx; if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && !path->cached && !path->level && @@ -529,10 +530,10 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter if (ret) return ret; - path = trans->paths + iter->key_cache_path; + path_idx = iter->key_cache_path; } - return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); + return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_); } int bch2_btree_insert_clone_trans(struct btree_trans *trans, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 5c1ef9c84878..fbb83e1aa2b2 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -194,7 +194,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, static inline void bch2_trans_reset_updates(struct btree_trans *trans) { trans_for_each_update(trans, i) - bch2_path_put(trans, i->path->idx, true); + bch2_path_put(trans, i->path, true); trans->extra_journal_res = 0; trans->nr_updates = 0; From 1f75ba4e65c54ff0517d7cd073afceb0751d1a32 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 16:35:45 -0500 Subject: [PATCH 156/226] bcachefs: struct trans_for_each_path_inorder_iter reducing our usage of path->idx Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 23 ++++++++++++----------- fs/bcachefs/btree_iter.h | 15 +++++++++++---- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index e90282c85b3c..0f904a79d4e5 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -332,12 +332,12 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, struct bpos pos, bool key_cache) { struct btree_path *path; - unsigned idx; + struct trans_for_each_path_inorder_iter iter; struct printbuf buf = PRINTBUF; btree_trans_sort_paths(trans); - trans_for_each_path_inorder(trans, path, idx) { + trans_for_each_path_inorder(trans, path, iter) { int cmp = cmp_int(path->btree_id, id) ?: cmp_int(path->cached, key_cache); @@ -1435,13 +1435,13 @@ static noinline __cold void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans, bool nosort) { + struct trans_for_each_path_inorder_iter iter; struct btree_path *path; - unsigned idx; if (!nosort) btree_trans_sort_paths(trans); - trans_for_each_path_inorder(trans, path, idx) + trans_for_each_path_inorder(trans, path, iter) bch2_btree_path_to_text(out, path); } @@ -1555,14 +1555,15 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, struct btree_path *path; bool cached = flags & BTREE_ITER_CACHED; bool intent = flags & BTREE_ITER_INTENT; - btree_path_idx_t i, path_pos = 0; + struct trans_for_each_path_inorder_iter iter; + btree_path_idx_t path_pos = 0; bch2_trans_verify_not_in_restart(trans); bch2_trans_verify_locks(trans); btree_trans_sort_paths(trans); - trans_for_each_path_inorder(trans, path, i) { + trans_for_each_path_inorder(trans, path, iter) { if (__btree_path_cmp(path, btree_id, cached, @@ -1570,7 +1571,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, level) > 0) break; - path_pos = path->idx; + path_pos = iter.path_idx; } if (path_pos && @@ -1592,7 +1593,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, path->level = level; path->locks_want = locks_want; path->nodes_locked = 0; - for (i = 0; i < ARRAY_SIZE(path->l); i++) + for (unsigned i = 0; i < ARRAY_SIZE(path->l); i++) path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init); #ifdef TRACK_PATH_ALLOCATED path->ip_allocated = ip; @@ -2545,12 +2546,12 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans) static void btree_trans_verify_sorted(struct btree_trans *trans) { struct btree_path *path, *prev = NULL; - unsigned i; + struct trans_for_each_path_inorder_iter iter; if (!bch2_debug_check_iterators) return; - trans_for_each_path_inorder(trans, path, i) { + trans_for_each_path_inorder(trans, path, iter) { if (prev && btree_path_cmp(prev, path) > 0) { __bch2_dump_trans_paths_updates(trans, true); panic("trans paths out of order!\n"); @@ -3085,7 +3086,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) continue; prt_printf(out, " path %u %c l=%u %s:", - path->idx, + idx, path->cached ? 'c' : 'b', path->level, bch2_btree_id_str(path->btree_id)); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index e6ec971d66d3..a4d1d26c706d 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -125,10 +125,17 @@ static inline struct btree_path *prev_btree_path(struct btree_trans *trans, stru : NULL; } -#define trans_for_each_path_inorder(_trans, _path, _i) \ - for (_i = 0; \ - ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\ - _i++) +struct trans_for_each_path_inorder_iter { + btree_path_idx_t sorted_idx; + btree_path_idx_t path_idx; +}; + +#define trans_for_each_path_inorder(_trans, _path, _iter) \ + for (_iter = (struct trans_for_each_path_inorder_iter) { 0 }; \ + (_iter.path_idx = trans->sorted[_iter.sorted_idx], \ + _path = (_trans)->paths + _iter.path_idx, \ + _iter.sorted_idx < (_trans)->nr_sorted); \ + _iter.sorted_idx++) #define trans_for_each_path_inorder_reverse(_trans, _path, _i) \ for (_i = trans->nr_sorted - 1; \ From 311e446a414885aed71bc8106bdc4874774343c4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 23:29:06 -0500 Subject: [PATCH 157/226] bcachefs: bch2_btree_path_to_text() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 12 ++++++------ fs/bcachefs/btree_iter.h | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 0f904a79d4e5..95c0a50ae03f 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1413,11 +1413,12 @@ void bch2_dump_trans_updates(struct btree_trans *trans) printbuf_exit(&buf); } -noinline __cold -void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) +static void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) { + struct btree_path *path = trans->paths + path_idx; + prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ", - path->idx, path->ref, path->intent_ref, + path_idx, path->ref, path->intent_ref, path->preserve ? 'P' : ' ', path->should_be_locked ? 'S' : ' ', bch2_btree_id_str(path->btree_id), @@ -1436,13 +1437,12 @@ void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans, bool nosort) { struct trans_for_each_path_inorder_iter iter; - struct btree_path *path; if (!nosort) btree_trans_sort_paths(trans); - trans_for_each_path_inorder(trans, path, iter) - bch2_btree_path_to_text(out, path); + trans_for_each_path_idx_inorder(trans, iter) + bch2_btree_path_to_text(out, trans, iter.path_idx); } noinline __cold diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index a4d1d26c706d..173c91e5f6b0 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -125,6 +125,12 @@ static inline struct btree_path *prev_btree_path(struct btree_trans *trans, stru : NULL; } +#define trans_for_each_path_idx_inorder(_trans, _iter) \ + for (_iter = (struct trans_for_each_path_inorder_iter) { 0 }; \ + (_iter.path_idx = trans->sorted[_iter.sorted_idx], \ + _iter.sorted_idx < (_trans)->nr_sorted); \ + _iter.sorted_idx++) + struct trans_for_each_path_inorder_iter { btree_path_idx_t sorted_idx; btree_path_idx_t path_idx; @@ -835,7 +841,6 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, }) void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); -void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); From 4c5289e6323ca9d0d46b3663ace2fb44bb2594b7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 17:54:02 -0500 Subject: [PATCH 158/226] bcachefs: kill trans_for_each_path_from() dead code Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 173c91e5f6b0..b70aacafac20 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -74,14 +74,11 @@ __trans_next_path(struct btree_trans *trans, unsigned idx) return &trans->paths[idx]; } -#define trans_for_each_path_from(_trans, _path, _start) \ - for (_path = __trans_next_path((_trans), _start); \ +#define trans_for_each_path(_trans, _path) \ + for (_path = __trans_next_path((_trans), 1); \ (_path); \ _path = __trans_next_path((_trans), (_path)->idx + 1)) -#define trans_for_each_path(_trans, _path) \ - trans_for_each_path_from(_trans, _path, 1) - static inline struct btree_path * __trans_next_path_safe(struct btree_trans *trans, unsigned *idx) { From ccb7b08fbbb85e9b0bb2867497d98172a5737ad5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 23:37:45 -0500 Subject: [PATCH 159/226] bcachefs: trans_for_each_path() no longer uses path->idx path->idx is now a code smell: we should be using path_idx_t, since it's stable across btree path reallocation. This is also a bit faster, using the same loop counter vs. fetching path->idx from each path we iterate over. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 25 +++++++++++-------- fs/bcachefs/btree_iter.h | 36 +++++++++++++-------------- fs/bcachefs/btree_key_cache.c | 3 ++- fs/bcachefs/btree_locking.c | 38 ++++++++++++++++++----------- fs/bcachefs/btree_locking.h | 3 ++- fs/bcachefs/btree_update_interior.c | 8 +++--- 6 files changed, 64 insertions(+), 49 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 95c0a50ae03f..7723c03ec553 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -241,8 +241,9 @@ static void bch2_btree_path_verify(struct btree_trans *trans, void bch2_trans_verify_paths(struct btree_trans *trans) { struct btree_path *path; + unsigned iter; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, iter) bch2_btree_path_verify(trans, path); } @@ -962,7 +963,8 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) struct bch_fs *c = trans->c; struct btree_path *path; unsigned long trace_ip = _RET_IP_; - int i, ret = 0; + unsigned i; + int ret = 0; if (trans->in_traverse_all) return -BCH_ERR_transaction_restart_in_traverse_all; @@ -972,7 +974,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) trans->restarted = 0; trans->last_restarted_ip = 0; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) path->should_be_locked = false; btree_trans_sort_paths(trans); @@ -2530,9 +2532,9 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans) BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) - 1); - trans_for_each_path(trans, path) { + trans_for_each_path(trans, path, i) { BUG_ON(path->sorted_idx >= trans->nr_sorted); - BUG_ON(trans->sorted[path->sorted_idx] != path->idx); + BUG_ON(trans->sorted[path->sorted_idx] != i); } for (i = 0; i < trans->nr_sorted; i++) { @@ -2774,8 +2776,9 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans) if (trans->srcu_held) { struct bch_fs *c = trans->c; struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->cached && !btree_node_locked(path, 0)) path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); @@ -2807,6 +2810,7 @@ static void bch2_trans_srcu_lock(struct btree_trans *trans) u32 bch2_trans_begin(struct btree_trans *trans) { struct btree_path *path; + unsigned i; u64 now; bch2_trans_reset_updates(trans); @@ -2815,7 +2819,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->mem_top = 0; trans->journal_entries = NULL; - trans_for_each_path(trans, path) { + trans_for_each_path(trans, path, i) { path->should_be_locked = false; /* @@ -2832,7 +2836,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) * iterators if we do that */ if (!path->ref && !path->preserve) - __bch2_path_free(trans, path->idx); + __bch2_path_free(trans, i); else path->preserve = false; } @@ -2972,14 +2976,15 @@ static void check_btree_paths_leaked(struct btree_trans *trans) #ifdef CONFIG_BCACHEFS_DEBUG struct bch_fs *c = trans->c; struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->ref) goto leaked; return; leaked: bch_err(c, "btree paths leaked from %s!", trans->fn); - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->ref) printk(KERN_ERR " btree %s %pS\n", bch2_btree_id_str(path->btree_id), diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index b70aacafac20..a75d0e7d122a 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -63,22 +63,6 @@ static inline void btree_trans_sort_paths(struct btree_trans *trans) __bch2_btree_trans_sort_paths(trans); } -static inline struct btree_path * -__trans_next_path(struct btree_trans *trans, unsigned idx) -{ - idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, idx); - if (idx == BTREE_ITER_MAX) - return NULL; - EBUG_ON(idx > BTREE_ITER_MAX); - EBUG_ON(trans->paths[idx].idx != idx); - return &trans->paths[idx]; -} - -#define trans_for_each_path(_trans, _path) \ - for (_path = __trans_next_path((_trans), 1); \ - (_path); \ - _path = __trans_next_path((_trans), (_path)->idx + 1)) - static inline struct btree_path * __trans_next_path_safe(struct btree_trans *trans, unsigned *idx) { @@ -102,6 +86,19 @@ __trans_next_path_safe(struct btree_trans *trans, unsigned *idx) #define trans_for_each_path_safe(_trans, _path, _idx) \ trans_for_each_path_safe_from(_trans, _path, _idx, 1) +static inline struct btree_path * +__trans_next_path(struct btree_trans *trans, unsigned *idx) +{ + struct btree_path *path = __trans_next_path_safe(trans, idx); + EBUG_ON(path && path->idx != *idx); + return path; +} + +#define trans_for_each_path(_trans, _path, _iter) \ + for (_iter = 1; \ + (_path = __trans_next_path((_trans), &_iter)); \ + _iter++) + static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) { unsigned idx = path ? path->sorted_idx + 1 : 0; @@ -156,10 +153,11 @@ static inline struct btree_path * __trans_next_path_with_node(struct btree_trans *trans, struct btree *b, unsigned idx) { - struct btree_path *path = __trans_next_path(trans, idx); + struct btree_path *path = __trans_next_path(trans, &idx); - while (path && !__path_has_node(path, b)) - path = __trans_next_path(trans, path->idx + 1); + while ((path = __trans_next_path(trans, &idx)) && + !__path_has_node(path, b)) + idx++; return path; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 99dc0b1a7fc2..74e52fd28abe 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -690,8 +690,9 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, } } else { struct btree_path *path2; + unsigned i; evict: - trans_for_each_path(trans, path2) + trans_for_each_path(trans, path2, i) if (path2 != path) __bch2_btree_path_unlock(trans, path2); diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 24a91cc38538..e38b2b9570a8 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -32,13 +32,14 @@ struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans, { struct btree_path *path; struct six_lock_count ret; + unsigned i; memset(&ret, 0, sizeof(ret)); if (IS_ERR_OR_NULL(b)) return ret; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path != skip && &path->l[level].b->c == b) { int t = btree_node_locked_type(path, level); @@ -417,7 +418,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, struct btree_bkey_cached_common *b) { struct btree_path *linked; - unsigned i; + unsigned i, iter; int ret; /* @@ -431,7 +432,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, * already taken are no longer needed: */ - trans_for_each_path(trans, linked) { + trans_for_each_path(trans, linked, iter) { if (!linked->nodes_locked) continue; @@ -643,8 +644,6 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, unsigned new_locks_want, struct get_locks_fail *f) { - struct btree_path *linked; - if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f)) return true; @@ -667,8 +666,11 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, * before interior nodes - now that's handled by * bch2_btree_path_traverse_all(). */ - if (!path->cached && !trans->in_traverse_all) - trans_for_each_path(trans, linked) + if (!path->cached && !trans->in_traverse_all) { + struct btree_path *linked; + unsigned i; + + trans_for_each_path(trans, linked, i) if (linked != path && linked->cached == path->cached && linked->btree_id == path->btree_id && @@ -676,6 +678,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, linked->locks_want = new_locks_want; btree_path_get_locks(trans, linked, true, NULL); } + } return false; } @@ -716,22 +719,24 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, void bch2_trans_downgrade(struct btree_trans *trans) { struct btree_path *path; + unsigned i; if (trans->restarted) return; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) bch2_btree_path_downgrade(trans, path); } int bch2_trans_relock(struct btree_trans *trans) { struct btree_path *path; + unsigned i; if (unlikely(trans->restarted)) return -((int) trans->restarted); - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->should_be_locked && !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path); @@ -743,11 +748,12 @@ int bch2_trans_relock(struct btree_trans *trans) int bch2_trans_relock_notrace(struct btree_trans *trans) { struct btree_path *path; + unsigned i; if (unlikely(trans->restarted)) return -((int) trans->restarted); - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->should_be_locked && !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); @@ -758,16 +764,18 @@ int bch2_trans_relock_notrace(struct btree_trans *trans) void bch2_trans_unlock_noassert(struct btree_trans *trans) { struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) __bch2_btree_path_unlock(trans, path); } void bch2_trans_unlock(struct btree_trans *trans) { struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) __bch2_btree_path_unlock(trans, path); } @@ -780,8 +788,9 @@ void bch2_trans_unlock_long(struct btree_trans *trans) bool bch2_trans_locked(struct btree_trans *trans) { struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->nodes_locked) return true; return false; @@ -827,8 +836,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path) void bch2_trans_verify_locks(struct btree_trans *trans) { struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) bch2_btree_path_verify_locks(path); } diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index a49f1dd1d223..e9056fc3337d 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -242,8 +242,9 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, enum btree_node_locked_type want) { struct btree_path *path; + unsigned i; - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (&path->l[level].b->c == b && btree_node_locked_type(path, level) >= want) { six_lock_increment(&b->lock, (enum six_lock_type) want); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index eb93c326db06..83cb363837f8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -190,7 +190,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; - unsigned level = b->c.level; + unsigned i, level = b->c.level; bch2_btree_node_lock_write_nofail(trans, path, &b->c); bch2_btree_node_hash_remove(&c->btree_cache, b); @@ -198,7 +198,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, six_unlock_write(&b->c.lock); mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->l[level].b == b) { btree_node_unlock(trans, path, level); path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); @@ -212,7 +212,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, struct bch_fs *c = as->c; struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL]; struct btree_path *path; - unsigned level = b->c.level; + unsigned i, level = b->c.level; BUG_ON(!list_empty(&b->write_blocked)); BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as)); @@ -235,7 +235,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, six_unlock_intent(&b->c.lock); - trans_for_each_path(trans, path) + trans_for_each_path(trans, path, i) if (path->l[level].b == b) { btree_node_unlock(trans, path, level); path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); From b0b67378225d66a3e1900c188b27586f48c6b119 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 23:57:50 -0500 Subject: [PATCH 160/226] bcachefs: trans_for_each_path_with_node() no longer uses path->idx Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 9 ++++++--- fs/bcachefs/btree_iter.h | 17 ++++++++--------- fs/bcachefs/btree_locking.h | 3 ++- fs/bcachefs/btree_update_interior.c | 3 ++- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 7723c03ec553..f69b84711cc2 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -418,8 +418,9 @@ void bch2_btree_path_fix_key_modified(struct btree_trans *trans, struct bkey_packed *where) { struct btree_path *path; + unsigned i; - trans_for_each_path_with_node(trans, b, path) { + trans_for_each_path_with_node(trans, b, path, i) { __bch2_btree_path_fix_key_modified(path, b, where); bch2_btree_path_verify_level(trans, path, b->c.level); } @@ -526,6 +527,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, { struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where); struct btree_path *linked; + unsigned i; if (node_iter != &path->l[b->c.level].iter) { __bch2_btree_node_iter_fix(path, b, node_iter, t, @@ -535,7 +537,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, bch2_btree_node_iter_verify(node_iter, b); } - trans_for_each_path_with_node(trans, b, linked) { + trans_for_each_path_with_node(trans, b, linked, i) { __bch2_btree_node_iter_fix(linked, b, &linked->l[b->c.level].iter, t, where, clobber_u64s, new_u64s); @@ -714,8 +716,9 @@ void bch2_trans_node_add(struct btree_trans *trans, void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) { struct btree_path *path; + unsigned i; - trans_for_each_path_with_node(trans, b, path) + trans_for_each_path_with_node(trans, b, path, i) __btree_path_level_init(path, b->c.level); bch2_trans_revalidate_updates_in_node(trans, b); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index a75d0e7d122a..eb9aa3930ca2 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -151,22 +151,21 @@ static inline bool __path_has_node(const struct btree_path *path, static inline struct btree_path * __trans_next_path_with_node(struct btree_trans *trans, struct btree *b, - unsigned idx) + unsigned *idx) { - struct btree_path *path = __trans_next_path(trans, &idx); + struct btree_path *path; - while ((path = __trans_next_path(trans, &idx)) && + while ((path = __trans_next_path(trans, idx)) && !__path_has_node(path, b)) - idx++; + (*idx)++; return path; } -#define trans_for_each_path_with_node(_trans, _b, _path) \ - for (_path = __trans_next_path_with_node((_trans), (_b), 1); \ - (_path); \ - _path = __trans_next_path_with_node((_trans), (_b), \ - (_path)->idx + 1)) +#define trans_for_each_path_with_node(_trans, _b, _path, _iter) \ + for (_iter = 1; \ + (_path = __trans_next_path_with_node((_trans), (_b), &_iter));\ + _iter++) btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *, btree_path_idx_t, bool, unsigned long); diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index e9056fc3337d..fc92781e97ba 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -175,6 +175,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat struct btree *b) { struct btree_path *linked; + unsigned i; EBUG_ON(path->l[b->c.level].b != b); EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock)); @@ -182,7 +183,7 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); - trans_for_each_path_with_node(trans, b, linked) + trans_for_each_path_with_node(trans, b, linked, i) linked->l[b->c.level].lock_seq++; six_unlock_write(&b->c.lock); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 83cb363837f8..0649c219df33 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1646,13 +1646,14 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct keylist *keys) { struct btree_path *linked; + unsigned i; __bch2_btree_insert_keys_interior(as, trans, path, b, path->l[b->c.level].iter, keys); btree_update_updated_node(as, b); - trans_for_each_path_with_node(trans, b, linked) + trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); bch2_trans_verify_paths(trans); From 566eabd36faca007fb14d05e387b7aad4a7af219 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 00:02:07 -0500 Subject: [PATCH 161/226] bcachefs: bch2_path_get() no longer uses path->idx Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index f69b84711cc2..c997c59214af 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1561,7 +1561,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, bool cached = flags & BTREE_ITER_CACHED; bool intent = flags & BTREE_ITER_INTENT; struct trans_for_each_path_inorder_iter iter; - btree_path_idx_t path_pos = 0; + btree_path_idx_t path_pos = 0, path_idx; bch2_trans_verify_not_in_restart(trans); bch2_trans_verify_locks(trans); @@ -1584,10 +1584,11 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, trans->paths[path_pos].btree_id == btree_id && trans->paths[path_pos].level == level) { __btree_path_get(trans->paths + path_pos, intent); - path = trans->paths + bch2_btree_path_set_pos(trans, - path_pos, pos, intent, ip); + path_idx = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip); + path = trans->paths + path_idx; } else { - path = trans->paths + btree_path_alloc(trans, path_pos); + path_idx = btree_path_alloc(trans, path_pos); + path = trans->paths + path_idx; __btree_path_get(path, intent); path->pos = pos; @@ -1624,7 +1625,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, if (locks_want > path->locks_want) bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL); - return path->idx; + return path_idx; } struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) From 542e6396740aed017905e3ad02dbf07a4bf7369d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 00:03:44 -0500 Subject: [PATCH 162/226] bcachefs: bch2_btree_iter_peek_prev() no longer uses path->idx Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index c997c59214af..30242e95e617 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2245,10 +2245,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; struct bpos search_key = iter->pos; - struct btree_path *saved_path = NULL; struct bkey_s_c k; struct bkey saved_k; const struct bch_val *saved_v; + btree_path_idx_t saved_path = 0; int ret; EBUG_ON(btree_iter_path(trans, iter)->cached || @@ -2299,8 +2299,8 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { bch2_path_put_nokeep(trans, iter->path, iter->flags & BTREE_ITER_INTENT); - iter->path = saved_path->idx; - saved_path = NULL; + iter->path = saved_path; + saved_path = 0; iter->k = saved_k; k.v = saved_v; goto got_key; @@ -2310,9 +2310,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) iter->snapshot, k.k->p.snapshot)) { if (saved_path) - bch2_path_put_nokeep(trans, saved_path->idx, + bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_INTENT); - saved_path = trans->paths + btree_path_clone(trans, iter->path, + saved_path = btree_path_clone(trans, iter->path, iter->flags & BTREE_ITER_INTENT); path = btree_iter_path(trans, iter); saved_k = *k.k; @@ -2354,7 +2354,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) iter->pos.snapshot = iter->snapshot; out_no_locked: if (saved_path) - bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT); + bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_INTENT); bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); From d7e14035a4b43440a5ba741759501d86e96005d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 00:17:17 -0500 Subject: [PATCH 163/226] bcachefs: get_unlocked_mut_path() -> btree_path_idx_t Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 85 ++++++++++++++--------------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 0649c219df33..737ccbe1aef9 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -29,10 +29,10 @@ static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, struct keylist *, unsigned); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); -static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, - enum btree_id btree_id, - unsigned level, - struct bpos pos) +static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans, + enum btree_id btree_id, + unsigned level, + struct bpos pos) { btree_path_idx_t path_idx = bch2_path_get(trans, btree_id, pos, level + 1, level, BTREE_ITER_NOPRESERVE| @@ -42,7 +42,7 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, struct btree_path *path = trans->paths + path_idx; bch2_btree_path_downgrade(trans, path); __bch2_btree_path_unlock(trans, path); - return path; + return path_idx; } /* Debug code: */ @@ -647,10 +647,11 @@ static void btree_update_nodes_written(struct btree_update *as) "%s(): error %s", __func__, bch2_err_str(ret)); err: if (as->b) { - struct btree_path *path; b = as->b; - path = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p); + btree_path_idx_t path_idx = get_unlocked_mut_path(trans, + as->btree_id, b->c.level, b->key.k.p); + struct btree_path *path = trans->paths + path_idx; /* * @b is the node we did the final insert into: * @@ -720,7 +721,7 @@ static void btree_update_nodes_written(struct btree_update *as) btree_node_write_if_need(c, b, SIX_LOCK_intent); btree_node_unlock(trans, path, b->c.level); - bch2_path_put(trans, path->idx, true); + bch2_path_put(trans, path_idx, true); } bch2_journal_pin_drop(&c->journal, &as->journal); @@ -1476,7 +1477,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, struct bch_fs *c = as->c; struct btree *parent = btree_node_parent(path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; - struct btree_path *path1 = NULL, *path2 = NULL; + btree_path_idx_t path1 = 0, path2 = 0; u64 start_time = local_clock(); int ret = 0; @@ -1511,13 +1512,13 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p); six_lock_increment(&n1->c.lock, SIX_LOCK_intent); - mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, path1, n1); + mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_btree_path_level_init(trans, trans->paths + path1, n1); path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p); six_lock_increment(&n2->c.lock, SIX_LOCK_intent); - mark_btree_node_locked(trans, path2, n2->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, path2, n2); + mark_btree_node_locked(trans, trans->paths + path2, n2->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_btree_path_level_init(trans, trans->paths + path2, n2); /* * Note that on recursive parent_keys == keys, so we @@ -1534,11 +1535,11 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_update_add_new_node(as, n3); six_unlock_write(&n3->c.lock); - path2->locks_want++; - BUG_ON(btree_node_locked(path2, n3->c.level)); + trans->paths[path2].locks_want++; + BUG_ON(btree_node_locked(trans->paths + path2, n3->c.level)); six_lock_increment(&n3->c.lock, SIX_LOCK_intent); - mark_btree_node_locked(trans, path2, n3->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, path2, n3); + mark_btree_node_locked(trans, trans->paths + path2, n3->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_btree_path_level_init(trans, trans->paths + path2, n3); n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; @@ -1561,8 +1562,8 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p); six_lock_increment(&n1->c.lock, SIX_LOCK_intent); - mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, path1, n1); + mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_btree_path_level_init(trans, trans->paths + path1, n1); if (parent) bch2_keylist_add(&as->parent_keys, &n1->key); @@ -1604,8 +1605,8 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, if (n3) bch2_trans_node_add(trans, path, n3); if (n2) - bch2_trans_node_add(trans, path2, n2); - bch2_trans_node_add(trans, path1, n1); + bch2_trans_node_add(trans, trans->paths + path2, n2); + bch2_trans_node_add(trans, trans->paths + path1, n1); if (n3) six_unlock_intent(&n3->c.lock); @@ -1614,12 +1615,12 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, six_unlock_intent(&n1->c.lock); out: if (path2) { - __bch2_btree_path_unlock(trans, path2); - bch2_path_put(trans, path2->idx, true); + __bch2_btree_path_unlock(trans, trans->paths + path2); + bch2_path_put(trans, path2, true); } if (path1) { - __bch2_btree_path_unlock(trans, path1); - bch2_path_put(trans, path1->idx, true); + __bch2_btree_path_unlock(trans, trans->paths + path1); + bch2_path_put(trans, path1, true); } bch2_trans_verify_locks(trans); @@ -1770,7 +1771,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, enum btree_node_sibling sib) { struct bch_fs *c = trans->c; - struct btree_path *sib_path = NULL, *new_path = NULL; struct btree_update *as; struct bkey_format_state new_s; struct bkey_format new_f; @@ -1778,6 +1778,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, struct btree *b, *m, *n, *prev, *next, *parent; struct bpos sib_pos; size_t sib_u64s; + btree_path_idx_t sib_path = 0, new_path = 0; u64 start_time = local_clock(); int ret = 0; @@ -1796,20 +1797,18 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - btree_path_idx_t sib_path_idx = - bch2_path_get(trans, path->btree_id, sib_pos, - U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); - ret = bch2_btree_path_traverse(trans, sib_path_idx, false); + sib_path = bch2_path_get(trans, path->btree_id, sib_pos, + U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); + ret = bch2_btree_path_traverse(trans, sib_path, false); if (ret) goto err; - sib_path = trans->paths + sib_path_idx; - btree_path_set_should_be_locked(sib_path); + btree_path_set_should_be_locked(trans->paths + sib_path); - m = sib_path->l[level].b; + m = trans->paths[sib_path].l[level].b; if (btree_node_parent(path, b) != - btree_node_parent(sib_path, m)) { + btree_node_parent(trans->paths + sib_path, m)) { b->sib_u64s[sib] = U16_MAX; goto out; } @@ -1895,8 +1894,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p); six_lock_increment(&n->c.lock, SIX_LOCK_intent); - mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, new_path, n); + mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_btree_path_level_init(trans, trans->paths + new_path, n); bkey_init(&delete.k); delete.k.p = prev->key.k.p; @@ -1915,7 +1914,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bch2_btree_node_free_inmem(trans, path, b); - bch2_btree_node_free_inmem(trans, sib_path, m); + bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m); bch2_trans_node_add(trans, path, n); @@ -1929,8 +1928,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, out: err: if (new_path) - bch2_path_put(trans, new_path->idx, true); - bch2_path_put(trans, sib_path_idx, true); + bch2_path_put(trans, new_path, true); + bch2_path_put(trans, sib_path, true); bch2_trans_verify_locks(trans); return ret; err_free_update: @@ -1945,9 +1944,9 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_path *new_path = NULL; struct btree *n, *parent; struct btree_update *as; + btree_path_idx_t new_path = 0; int ret; flags |= BCH_TRANS_COMMIT_no_enospc; @@ -1969,8 +1968,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p); six_lock_increment(&n->c.lock, SIX_LOCK_intent); - mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); - bch2_btree_path_level_init(trans, new_path, n); + mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_btree_path_level_init(trans, trans->paths + new_path, n); trace_and_count(c, btree_node_rewrite, trans, b); @@ -1995,7 +1994,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, bch2_btree_update_done(as, trans); out: if (new_path) - bch2_path_put(trans, new_path->idx, true); + bch2_path_put(trans, new_path, true); bch2_trans_downgrade(trans); return ret; err: From 398c98347d464f7675216c5ea974a82bcdb2bef9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 00:23:33 -0500 Subject: [PATCH 164/226] bcachefs: kill btree_path.idx Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 7 +------ fs/bcachefs/btree_iter.h | 28 ++++++---------------------- fs/bcachefs/btree_locking.c | 2 +- fs/bcachefs/btree_locking.h | 1 - fs/bcachefs/btree_types.h | 1 - 5 files changed, 8 insertions(+), 31 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 30242e95e617..cfe81db883cc 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1324,8 +1324,6 @@ void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool in { struct btree_path *path = trans->paths + path_idx, *dup; - EBUG_ON(path->idx != path_idx); - if (!__btree_path_put(path, intent)) return; @@ -1352,8 +1350,6 @@ void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool in static void bch2_path_put_nokeep(struct btree_trans *trans, btree_path_idx_t path, bool intent) { - EBUG_ON(trans->paths[path].idx != path); - if (!__btree_path_put(trans->paths + path, intent)) return; @@ -1542,7 +1538,6 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, __set_bit(idx, trans->paths_allocated); struct btree_path *path = &trans->paths[idx]; - path->idx = idx; path->ref = 0; path->intent_ref = 0; path->nodes_locked = 0; @@ -3090,7 +3085,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn); - trans_for_each_path_safe(trans, path, idx) { + trans_for_each_path(trans, path, idx) { if (!path->nodes_locked) continue; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index eb9aa3930ca2..5ee118d3eaa5 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -64,40 +64,24 @@ static inline void btree_trans_sort_paths(struct btree_trans *trans) } static inline struct btree_path * -__trans_next_path_safe(struct btree_trans *trans, unsigned *idx) +__trans_next_path(struct btree_trans *trans, unsigned *idx) { *idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, *idx); - if (*idx == BTREE_ITER_MAX) - return NULL; - EBUG_ON(*idx > BTREE_ITER_MAX); - return &trans->paths[*idx]; + return *idx < BTREE_ITER_MAX ? &trans->paths[*idx] : NULL; } /* * This version is intended to be safe for use on a btree_trans that is owned by * another thread, for bch2_btree_trans_to_text(); */ -#define trans_for_each_path_safe_from(_trans, _path, _idx, _start) \ +#define trans_for_each_path_from(_trans, _path, _idx, _start) \ for (_idx = _start; \ - (_path = __trans_next_path_safe((_trans), &_idx)); \ + (_path = __trans_next_path((_trans), &_idx)); \ _idx++) -#define trans_for_each_path_safe(_trans, _path, _idx) \ - trans_for_each_path_safe_from(_trans, _path, _idx, 1) - -static inline struct btree_path * -__trans_next_path(struct btree_trans *trans, unsigned *idx) -{ - struct btree_path *path = __trans_next_path_safe(trans, idx); - EBUG_ON(path && path->idx != *idx); - return path; -} - -#define trans_for_each_path(_trans, _path, _iter) \ - for (_iter = 1; \ - (_path = __trans_next_path((_trans), &_iter)); \ - _iter++) +#define trans_for_each_path(_trans, _path, _idx) \ + trans_for_each_path_from(_trans, _path, _idx, 1) static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) { diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index e38b2b9570a8..e4a436bfaeee 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -302,7 +302,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) top = &g.g[g.nr - 1]; - trans_for_each_path_safe_from(top->trans, path, path_idx, top->path_idx) { + trans_for_each_path_from(top->trans, path, path_idx, top->path_idx) { if (!path->nodes_locked) continue; diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index fc92781e97ba..64810ea544c9 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -265,7 +265,6 @@ static inline int btree_node_lock(struct btree_trans *trans, int ret = 0; EBUG_ON(level >= BTREE_MAX_DEPTH); - EBUG_ON(!test_bit(path->idx, trans->paths_allocated)); if (likely(six_trylock_type(&b->lock, type)) || btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index da7b836cbcf8..54156a05ebf5 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -225,7 +225,6 @@ enum btree_path_uptodate { typedef u16 btree_path_idx_t; struct btree_path { - btree_path_idx_t idx; btree_path_idx_t sorted_idx; u8 ref; u8 intent_ref; From 6474b706108bac9e531a71ddeb8150f8fa17163c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 02:31:12 -0500 Subject: [PATCH 165/226] bcachefs: Clean up btree_trans Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 12 ++++--- fs/bcachefs/btree_trans_commit.c | 4 +-- fs/bcachefs/btree_types.h | 59 ++++++++++++++++---------------- fs/bcachefs/btree_update.c | 4 +-- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/fsck.c | 2 +- 6 files changed, 42 insertions(+), 41 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index cfe81db883cc..ced13bbc52f7 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1493,7 +1493,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) printbuf_exit(&buf); - trans->nr_max_paths = nr; + trans->nr_paths_max = nr; } noinline __cold @@ -1526,13 +1526,11 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, if (unlikely(idx == BTREE_ITER_MAX)) btree_path_overflow(trans); - BUG_ON(idx > BTREE_ITER_MAX); - /* * Do this before marking the new path as allocated, since it won't be * initialized yet: */ - if (unlikely(idx > trans->nr_max_paths)) + if (unlikely(idx > trans->nr_paths_max)) bch2_trans_update_max_paths(trans); __set_bit(idx, trans->paths_allocated); @@ -2918,6 +2916,10 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) atomic_inc_not_zero(&c->journal_keys.ref); closure_init_stack(&trans->ref); + trans->paths_allocated = trans->_paths_allocated; + trans->sorted = trans->_sorted; + trans->paths = trans->_paths; + trans->paths_allocated[0] = 1; s = btree_trans_stats(trans); @@ -2935,7 +2937,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) } if (s) { - trans->nr_max_paths = s->nr_max_paths; + trans->nr_paths_max = s->nr_max_paths; trans->journal_entries_size = s->journal_entries_size; } diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 0e3603d367f9..7e6ba3061e7c 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -1044,9 +1044,9 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) trans->journal_u64s += jset_u64s(i->old_k.u64s); } - if (trans->extra_journal_res) { + if (trans->extra_disk_res) { ret = bch2_disk_reservation_add(c, trans->disk_res, - trans->extra_journal_res, + trans->extra_disk_res, (flags & BCH_TRANS_COMMIT_no_enospc) ? BCH_DISK_RESERVATION_NOFAIL : 0); if (ret) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 54156a05ebf5..c1baece14120 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -374,21 +374,22 @@ struct btree_trans_commit_hook { struct btree_trans { struct bch_fs *c; - const char *fn; - struct closure ref; - struct list_head list; - u64 last_begin_time; - u8 lock_may_not_fail; - u8 lock_must_abort; - struct btree_bkey_cached_common *locking; - struct six_lock_waiter locking_wait; + unsigned long *paths_allocated; + u8 *sorted; + struct btree_path *paths; - int srcu_idx; + void *mem; + unsigned mem_top; + unsigned mem_max; + unsigned mem_bytes; + btree_path_idx_t nr_sorted; + btree_path_idx_t nr_paths_max; u8 fn_idx; - u8 nr_sorted; u8 nr_updates; + u8 lock_must_abort; + bool lock_may_not_fail:1; bool srcu_held:1; bool used_mempool:1; bool in_traverse_all:1; @@ -400,40 +401,38 @@ struct btree_trans { bool write_locked:1; enum bch_errcode restarted:16; u32 restart_count; + + u64 last_begin_time; unsigned long last_begin_ip; unsigned long last_restarted_ip; unsigned long srcu_lock_time; - /* - * For when bch2_trans_update notices we'll be splitting a compressed - * extent: - */ - unsigned extra_journal_res; - btree_path_idx_t nr_max_paths; - u16 journal_entries_u64s; - u16 journal_entries_size; - - unsigned long paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; - - unsigned mem_top; - unsigned mem_max; - unsigned mem_bytes; - void *mem; - - u8 sorted[BTREE_ITER_MAX + 8]; - struct btree_path paths[BTREE_ITER_MAX]; - struct btree_insert_entry updates[BTREE_ITER_MAX]; + const char *fn; + struct closure ref; + struct list_head list; + struct btree_bkey_cached_common *locking; + struct six_lock_waiter locking_wait; + int srcu_idx; /* update path: */ - struct btree_trans_commit_hook *hooks; + u16 journal_entries_u64s; + u16 journal_entries_size; struct jset_entry *journal_entries; + + struct btree_insert_entry updates[BTREE_ITER_MAX]; + struct btree_trans_commit_hook *hooks; struct journal_entry_pin *journal_pin; struct journal_res journal_res; u64 *journal_seq; struct disk_reservation *disk_res; unsigned journal_u64s; + unsigned extra_disk_res; /* XXX kill */ struct replicas_delta_list *fs_usage_deltas; + + unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; + u8 _sorted[BTREE_ITER_MAX + 8]; + struct btree_path _paths[BTREE_ITER_MAX]; }; static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index a5a99c3cb32e..37d4039dd0fe 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -200,7 +200,7 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *trans, */ if (nr_splits > 1 && (compressed_sectors = bch2_bkey_sectors_compressed(old))) - trans->extra_journal_res += compressed_sectors * (nr_splits - 1); + trans->extra_disk_res += compressed_sectors * (nr_splits - 1); if (front_split) { update = bch2_bkey_make_mut_noupdate(trans, old); @@ -386,7 +386,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, struct btree_path *path = trans->paths + path_idx; EBUG_ON(!path->should_be_locked); - EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + EBUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates)); EBUG_ON(!bpos_eq(k->k.p, path->pos)); n = (struct btree_insert_entry) { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index fbb83e1aa2b2..b9382b7b288b 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -196,10 +196,10 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans) trans_for_each_update(trans, i) bch2_path_put(trans, i->path, true); - trans->extra_journal_res = 0; trans->nr_updates = 0; trans->journal_entries_u64s = 0; trans->hooks = NULL; + trans->extra_disk_res = 0; if (trans->fs_usage_deltas) { trans->fs_usage_deltas->used = 0; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f89d1029366b..796e1a8447d6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1186,7 +1186,7 @@ static int overlapping_extents_found(struct btree_trans *trans, swap(k1, k2); } - trans->extra_journal_res += bch2_bkey_sectors_compressed(k2); + trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); ret = bch2_trans_update_extent_overwrite(trans, old_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, From fea153a84557c982542527143950dbef434731c2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Dec 2023 20:08:29 -0500 Subject: [PATCH 166/226] bcachefs: rcu protect trans->paths Upcoming patches are going to be changing trans->paths to a reallocatable buffer. We need to guard against use after free when it's used by other threads; this introduces RCU protection to those paths and changes them to check for trans->paths == NULL Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 22 ++++++++++++++++++++-- fs/bcachefs/btree_iter.h | 16 ++++++++++++++++ fs/bcachefs/btree_locking.c | 31 ++++++++++++++++++++++++------- fs/bcachefs/btree_types.h | 10 ++++++++-- 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index ced13bbc52f7..818676b97436 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2920,6 +2920,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->sorted = trans->_sorted; trans->paths = trans->_paths; + *trans_paths_nr(trans->paths) = BTREE_ITER_MAX; + trans->paths_allocated[0] = 1; s = btree_trans_stats(trans); @@ -3074,12 +3076,14 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) { - struct btree_path *path; struct btree_bkey_cached_common *b; static char lock_types[] = { 'r', 'i', 'w' }; struct task_struct *task = READ_ONCE(trans->locking_wait.task); unsigned l, idx; + /* before rcu_read_lock(): */ + bch2_printbuf_make_room(out, 4096); + if (!out->nr_tabstops) { printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 32); @@ -3087,7 +3091,18 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn); - trans_for_each_path(trans, path, idx) { + /* trans->paths is rcu protected vs. freeing */ + rcu_read_lock(); + out->atomic++; + + struct btree_path *paths = rcu_dereference(trans->paths); + if (!paths) + goto out; + + unsigned long *paths_allocated = trans_paths_allocated(paths); + + trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths), idx, 1) { + struct btree_path *path = paths + idx; if (!path->nodes_locked) continue; @@ -3120,6 +3135,9 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) bch2_btree_bkey_cached_common_to_text(out, b); prt_newline(out); } +out: + --out->atomic; + rcu_read_unlock(); } void bch2_fs_btree_iter_exit(struct bch_fs *c) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 5ee118d3eaa5..0fab4952f9f7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -63,6 +63,22 @@ static inline void btree_trans_sort_paths(struct btree_trans *trans) __bch2_btree_trans_sort_paths(trans); } +static inline unsigned long *trans_paths_nr(struct btree_path *paths) +{ + return &container_of(paths, struct btree_trans_paths, paths[0])->nr_paths; +} + +static inline unsigned long *trans_paths_allocated(struct btree_path *paths) +{ + unsigned long *v = trans_paths_nr(paths); + return v - BITS_TO_LONGS(*v); +} + +#define trans_for_each_path_idx_from(_paths_allocated, _nr, _idx, _start)\ + for (_idx = _start; \ + (_idx = find_next_bit(_paths_allocated, _nr, _idx)) < _nr; \ + _idx++) + static inline struct btree_path * __trans_next_path(struct btree_trans *trans, unsigned *idx) { diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index e4a436bfaeee..d68e1211029d 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -281,9 +281,8 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) struct lock_graph g; struct trans_waiting_for_lock *top; struct btree_bkey_cached_common *b; - struct btree_path *path; - unsigned path_idx; - int ret; + btree_path_idx_t path_idx; + int ret = 0; g.nr = 0; @@ -296,13 +295,26 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) } lock_graph_down(&g, trans); + + /* trans->paths is rcu protected vs. freeing */ + rcu_read_lock(); + if (cycle) + cycle->atomic++; next: if (!g.nr) - return 0; + goto out; top = &g.g[g.nr - 1]; - trans_for_each_path_from(top->trans, path, path_idx, top->path_idx) { + struct btree_path *paths = rcu_dereference(top->trans->paths); + if (!paths) + goto up; + + unsigned long *paths_allocated = trans_paths_allocated(paths); + + trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths), + path_idx, top->path_idx) { + struct btree_path *path = paths + path_idx; if (!path->nodes_locked) continue; @@ -368,18 +380,23 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) ret = lock_graph_descend(&g, trans, cycle); if (ret) - return ret; + goto out; goto next; } raw_spin_unlock(&b->lock.wait_lock); } } - +up: if (g.nr > 1 && cycle) print_chain(cycle, &g); lock_graph_up(&g); goto next; +out: + if (cycle) + --cycle->atomic; + rcu_read_unlock(); + return ret; } int bch2_six_check_for_deadlock(struct six_lock *lock, void *p) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index c1baece14120..28bac4ce20e1 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -372,12 +372,17 @@ struct btree_trans_commit_hook { #define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 +struct btree_trans_paths { + unsigned long nr_paths; + struct btree_path paths[]; +}; + struct btree_trans { struct bch_fs *c; unsigned long *paths_allocated; - u8 *sorted; struct btree_path *paths; + u8 *sorted; void *mem; unsigned mem_top; @@ -431,8 +436,9 @@ struct btree_trans { struct replicas_delta_list *fs_usage_deltas; unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; - u8 _sorted[BTREE_ITER_MAX + 8]; + struct btree_trans_paths trans_paths; struct btree_path _paths[BTREE_ITER_MAX]; + u8 _sorted[BTREE_ITER_MAX + 8]; }; static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) From 31403dca5bb1e55ea0ea6ad1264b81fa8c9a3768 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 11:11:22 -0500 Subject: [PATCH 167/226] bcachefs: optimize __bch2_trans_get(), kill DEBUG_TRANSACTIONS - Some tweaks to greatly reduce locking overhead for the list of btree transactions, so that it can always be enabled: leave btree_trans objects on the list when they're on the percpu single item freelist, and only check for duplicates in the same process when CONFIG_BCACHEFS_DEBUG is enabled - don't zero out the full btree_trans() unless we allocated it from the mempool Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 8 -- fs/bcachefs/btree_iter.c | 166 ++++++++++++++++++------------------ fs/bcachefs/btree_locking.c | 3 +- fs/bcachefs/btree_types.h | 11 ++- fs/bcachefs/debug.c | 18 ++-- 5 files changed, 100 insertions(+), 106 deletions(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 7d3ac9556c98..5cdfef3b551a 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -50,14 +50,6 @@ config BCACHEFS_POSIX_ACL depends on BCACHEFS_FS select FS_POSIX_ACL -config BCACHEFS_DEBUG_TRANSACTIONS - bool "bcachefs runtime info" - depends on BCACHEFS_FS - help - This makes the list of running btree transactions available in debugfs. - - This is a highly useful debugging feature but does add a small amount of overhead. - config BCACHEFS_DEBUG bool "bcachefs debugging" depends on BCACHEFS_FS diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 818676b97436..c35a262f6081 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2714,6 +2714,7 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) { + struct bch_fs *c = trans->c; unsigned new_top = trans->mem_top + size; unsigned old_bytes = trans->mem_bytes; unsigned new_bytes = roundup_pow_of_two(new_top); @@ -2721,17 +2722,19 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) void *new_mem; void *p; - trans->mem_max = max(trans->mem_max, new_top); - WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + struct btree_transaction_stats *s = btree_trans_stats(trans); + if (s) + s->max_mem = max(s->max_mem, new_bytes); + new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { bch2_trans_unlock(trans); new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { - new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); + new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); new_bytes = BTREE_TRANS_MEM_MAX; kfree(trans->mem); } @@ -2751,7 +2754,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) trans->mem_bytes = new_bytes; if (old_bytes) { - trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); + trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); } @@ -2860,25 +2863,6 @@ u32 bch2_trans_begin(struct btree_trans *trans) return trans->restart_count; } -static struct btree_trans *bch2_trans_alloc(struct bch_fs *c) -{ - struct btree_trans *trans; - - if (IS_ENABLED(__KERNEL__)) { - trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL); - if (trans) - return trans; - } - - trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); - /* - * paths need to be zeroed, bch2_check_for_deadlock looks at - * paths in other threads - */ - memset(&trans->paths, 0, sizeof(trans->paths)); - return trans; -} - const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; unsigned bch2_trans_get_fn_idx(const char *fn) @@ -2900,22 +2884,55 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) __acquires(&c->btree_trans_barrier) { struct btree_trans *trans; - struct btree_transaction_stats *s; - trans = bch2_trans_alloc(c); + if (IS_ENABLED(__KERNEL__)) { + trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL); + if (trans) { + memset(trans, 0, offsetof(struct btree_trans, updates)); + goto got_trans; + } + } + trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); memset(trans, 0, sizeof(*trans)); + closure_init_stack(&trans->ref); + + seqmutex_lock(&c->btree_trans_lock); + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { + struct btree_trans *pos; + pid_t pid = current->pid; + + trans->locking_wait.task = current; + + list_for_each_entry(pos, &c->btree_trans_list, list) { + struct task_struct *pos_task = READ_ONCE(pos->locking_wait.task); + /* + * We'd much prefer to be stricter here and completely + * disallow multiple btree_trans in the same thread - + * but the data move path calls bch2_write when we + * already have a btree_trans initialized. + */ + BUG_ON(pos_task && + pid == pos_task->pid && + bch2_trans_locked(pos)); + + if (pos_task && pid < pos_task->pid) { + list_add_tail(&trans->list, &pos->list); + goto list_add_done; + } + } + } + list_add_tail(&trans->list, &c->btree_trans_list); +list_add_done: + seqmutex_unlock(&c->btree_trans_lock); +got_trans: trans->c = c; - trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns) - ? bch2_btree_transaction_fns[fn_idx] : NULL; trans->last_begin_time = local_clock(); trans->fn_idx = fn_idx; trans->locking_wait.task = current; trans->journal_replay_not_finished = unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) && atomic_inc_not_zero(&c->journal_keys.ref); - closure_init_stack(&trans->ref); - trans->paths_allocated = trans->_paths_allocated; trans->sorted = trans->_sorted; trans->paths = trans->_paths; @@ -2924,21 +2941,19 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->paths_allocated[0] = 1; - s = btree_trans_stats(trans); - if (s && s->max_mem) { - unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem); + if (fn_idx < BCH_TRANSACTIONS_NR) { + trans->fn = bch2_btree_transaction_fns[fn_idx]; - trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); + struct btree_transaction_stats *s = &c->btree_transaction_stats[fn_idx]; - if (!unlikely(trans->mem)) { - trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); - trans->mem_bytes = BTREE_TRANS_MEM_MAX; - } else { - trans->mem_bytes = expected_mem_bytes; + if (s->max_mem) { + unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem); + + trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); + if (likely(trans->mem)) + trans->mem_bytes = expected_mem_bytes; } - } - if (s) { trans->nr_paths_max = s->nr_max_paths; trans->journal_entries_size = s->journal_entries_size; } @@ -2946,31 +2961,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; - - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { - struct btree_trans *pos; - - seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(pos, &c->btree_trans_list, list) { - /* - * We'd much prefer to be stricter here and completely - * disallow multiple btree_trans in the same thread - - * but the data move path calls bch2_write when we - * already have a btree_trans initialized. - */ - BUG_ON(trans->locking_wait.task->pid == pos->locking_wait.task->pid && - bch2_trans_locked(pos)); - - if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) { - list_add_tail(&trans->list, &pos->list); - goto list_add_done; - } - } - list_add_tail(&trans->list, &c->btree_trans_list); -list_add_done: - seqmutex_unlock(&c->btree_trans_lock); - } - return trans; } @@ -3001,24 +2991,13 @@ void bch2_trans_put(struct btree_trans *trans) __releases(&c->btree_trans_barrier) { struct bch_fs *c = trans->c; - struct btree_transaction_stats *s = btree_trans_stats(trans); bch2_trans_unlock(trans); - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { - seqmutex_lock(&c->btree_trans_lock); - list_del(&trans->list); - seqmutex_unlock(&c->btree_trans_lock); - } - - closure_sync(&trans->ref); - - if (s) - s->max_mem = max(s->max_mem, trans->mem_max); - trans_for_each_update(trans, i) __btree_path_put(trans->paths + i->path, true); - trans->nr_updates = 0; + trans->nr_updates = 0; + trans->locking_wait.task = NULL; check_btree_paths_leaked(trans); @@ -3047,8 +3026,16 @@ void bch2_trans_put(struct btree_trans *trans) /* Userspace doesn't have a real percpu implementation: */ if (IS_ENABLED(__KERNEL__)) trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans); - if (trans) + + if (trans) { + closure_sync(&trans->ref); + + seqmutex_lock(&c->btree_trans_lock); + list_del(&trans->list); + seqmutex_unlock(&c->btree_trans_lock); + mempool_free(trans, &c->btree_trans_pool); + } } static void __maybe_unused @@ -3146,15 +3133,26 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) struct btree_trans *trans; int cpu; + if (c->btree_trans_bufs) + for_each_possible_cpu(cpu) { + struct btree_trans *trans = + per_cpu_ptr(c->btree_trans_bufs, cpu)->trans; + + if (trans) { + closure_sync(&trans->ref); + + seqmutex_lock(&c->btree_trans_lock); + list_del(&trans->list); + seqmutex_unlock(&c->btree_trans_lock); + } + kfree(trans); + } + free_percpu(c->btree_trans_bufs); + trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list); if (trans) panic("%s leaked btree_trans\n", trans->fn); - if (c->btree_trans_bufs) - for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans); - free_percpu(c->btree_trans_bufs); - for (s = c->btree_transaction_stats; s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); s++) { diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index d68e1211029d..1ed8327a9fa2 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -95,9 +95,10 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g) struct trans_waiting_for_lock *i; for (i = g->g; i != g->g + g->nr; i++) { + struct task_struct *task = i->trans->locking_wait.task; if (i != g->g) prt_str(out, "<- "); - prt_printf(out, "%u ", i->trans->locking_wait.task->pid); + prt_printf(out, "%u ", task ?task->pid : 0); } prt_newline(out); } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 28bac4ce20e1..3baf688177c4 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -386,7 +386,6 @@ struct btree_trans { void *mem; unsigned mem_top; - unsigned mem_max; unsigned mem_bytes; btree_path_idx_t nr_sorted; @@ -413,8 +412,6 @@ struct btree_trans { unsigned long srcu_lock_time; const char *fn; - struct closure ref; - struct list_head list; struct btree_bkey_cached_common *locking; struct six_lock_waiter locking_wait; int srcu_idx; @@ -424,7 +421,6 @@ struct btree_trans { u16 journal_entries_size; struct jset_entry *journal_entries; - struct btree_insert_entry updates[BTREE_ITER_MAX]; struct btree_trans_commit_hook *hooks; struct journal_entry_pin *journal_pin; @@ -435,6 +431,13 @@ struct btree_trans { unsigned extra_disk_res; /* XXX kill */ struct replicas_delta_list *fs_usage_deltas; + /* Entries before this are zeroed out on every bch2_trans_get() call */ + + struct btree_insert_entry updates[BTREE_ITER_MAX]; + + struct list_head list; + struct closure ref; + unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; struct btree_trans_paths trans_paths; struct btree_path _paths[BTREE_ITER_MAX]; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index bbf6fce3a789..c0b4d9057f29 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -592,7 +592,6 @@ static const struct file_operations cached_btree_nodes_ops = { .read = bch2_cached_btree_nodes_read, }; -#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -608,7 +607,9 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, restart: seqmutex_lock(&c->btree_trans_lock); list_for_each_entry(trans, &c->btree_trans_list, list) { - if (trans->locking_wait.task->pid <= i->iter) + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= i->iter) continue; closure_get(&trans->ref); @@ -626,11 +627,11 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task); + bch2_prt_task_backtrace(&i->buf, task); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); - i->iter = trans->locking_wait.task->pid; + i->iter = task->pid; closure_put(&trans->ref); @@ -654,7 +655,6 @@ static const struct file_operations btree_transactions_ops = { .release = bch2_dump_release, .read = bch2_btree_transactions_read, }; -#endif /* CONFIG_BCACHEFS_DEBUG_TRANSACTIONS */ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) @@ -811,7 +811,9 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, restart: seqmutex_lock(&c->btree_trans_lock); list_for_each_entry(trans, &c->btree_trans_list, list) { - if (trans->locking_wait.task->pid <= i->iter) + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= i->iter) continue; closure_get(&trans->ref); @@ -826,7 +828,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, bch2_check_for_deadlock(trans, &i->buf); - i->iter = trans->locking_wait.task->pid; + i->iter = task->pid; closure_put(&trans->ref); @@ -873,10 +875,8 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir, c->btree_debug, &cached_btree_nodes_ops); -#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir, c->btree_debug, &btree_transactions_ops); -#endif debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); From 5cc6daf74979ca951ce2550847995b3b18398af4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Dec 2023 20:30:44 -0500 Subject: [PATCH 168/226] bcachefs: trans->updates will also be resizable the reflink triggers are also bumping up against the maximum number of paths in a transaction - and generating proportional numbers of updates. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 3 ++- fs/bcachefs/btree_types.h | 4 ++-- fs/bcachefs/btree_update.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index c35a262f6081..3c45d9e195d0 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2888,7 +2888,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) if (IS_ENABLED(__KERNEL__)) { trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL); if (trans) { - memset(trans, 0, offsetof(struct btree_trans, updates)); + memset(trans, 0, offsetof(struct btree_trans, list)); goto got_trans; } } @@ -2936,6 +2936,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->paths_allocated = trans->_paths_allocated; trans->sorted = trans->_sorted; trans->paths = trans->_paths; + trans->updates = trans->_updates; *trans_paths_nr(trans->paths) = BTREE_ITER_MAX; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 3baf688177c4..81f54a2c4e81 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -383,6 +383,7 @@ struct btree_trans { unsigned long *paths_allocated; struct btree_path *paths; u8 *sorted; + struct btree_insert_entry *updates; void *mem; unsigned mem_top; @@ -433,8 +434,6 @@ struct btree_trans { /* Entries before this are zeroed out on every bch2_trans_get() call */ - struct btree_insert_entry updates[BTREE_ITER_MAX]; - struct list_head list; struct closure ref; @@ -442,6 +441,7 @@ struct btree_trans { struct btree_trans_paths trans_paths; struct btree_path _paths[BTREE_ITER_MAX]; u8 _sorted[BTREE_ITER_MAX + 8]; + struct btree_insert_entry _updates[BTREE_ITER_MAX]; }; static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 37d4039dd0fe..140a44aeb321 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -386,7 +386,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, struct btree_path *path = trans->paths + path_idx; EBUG_ON(!path->should_be_locked); - EBUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates)); + EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); EBUG_ON(!bpos_eq(k->k.p, path->pos)); n = (struct btree_insert_entry) { From 2c3b0fc3bd0a5db0d10260b08a7139fdb7a4d3a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 17:10:31 -0500 Subject: [PATCH 169/226] bcachefs: trans->nr_paths Start to plumb through dynamically growable btree_paths; this patch replaces most BTREE_ITER_MAX references with trans->nr_paths. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 9 +++++---- fs/bcachefs/btree_iter.h | 22 +++++++++++++++++++--- fs/bcachefs/btree_types.h | 1 + fs/bcachefs/btree_update.c | 2 +- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 3c45d9e195d0..12a357f357cd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1475,7 +1475,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) { struct btree_transaction_stats *s = btree_trans_stats(trans); struct printbuf buf = PRINTBUF; - size_t nr = bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX); + size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths); if (!s) return; @@ -1521,9 +1521,9 @@ static noinline void btree_path_overflow(struct btree_trans *trans) static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, btree_path_idx_t pos) { - btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, BTREE_ITER_MAX); + btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths); - if (unlikely(idx == BTREE_ITER_MAX)) + if (unlikely(idx == trans->nr_paths)) btree_path_overflow(trans); /* @@ -2527,7 +2527,7 @@ static void btree_trans_verify_sorted_refs(struct btree_trans *trans) struct btree_path *path; unsigned i; - BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) - 1); + BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, trans->nr_paths) - 1); trans_for_each_path(trans, path, i) { BUG_ON(path->sorted_idx >= trans->nr_sorted); @@ -2933,6 +2933,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->journal_replay_not_finished = unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) && atomic_inc_not_zero(&c->journal_keys.ref); + trans->nr_paths = ARRAY_SIZE(trans->_paths); trans->paths_allocated = trans->_paths_allocated; trans->sorted = trans->_sorted; trans->paths = trans->_paths; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 0fab4952f9f7..573c44d80cc3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -82,9 +82,25 @@ static inline unsigned long *trans_paths_allocated(struct btree_path *paths) static inline struct btree_path * __trans_next_path(struct btree_trans *trans, unsigned *idx) { - *idx = find_next_bit(trans->paths_allocated, BTREE_ITER_MAX, *idx); + unsigned long *w = trans->paths_allocated + *idx / BITS_PER_LONG; + /* + * Open coded find_next_bit(), because + * - this is fast path, we can't afford the function call + * - and we know that nr_paths is a multiple of BITS_PER_LONG, + */ + while (*idx < trans->nr_paths) { + unsigned long v = *w >> (*idx & (BITS_PER_LONG - 1)); + if (v) { + *idx += __ffs(v); + return trans->paths + *idx; + } - return *idx < BTREE_ITER_MAX ? &trans->paths[*idx] : NULL; + *idx += BITS_PER_LONG; + *idx &= ~(BITS_PER_LONG - 1); + w++; + } + + return NULL; } /* @@ -626,7 +642,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *); static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) > BTREE_ITER_MAX - 8) + if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_MAX - 8) return __bch2_btree_trans_too_many_iters(trans); return 0; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 81f54a2c4e81..e4ebfc25df8e 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -390,6 +390,7 @@ struct btree_trans { unsigned mem_bytes; btree_path_idx_t nr_sorted; + btree_path_idx_t nr_paths; btree_path_idx_t nr_paths_max; u8 fn_idx; u8 nr_updates; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 140a44aeb321..83aa0fb8ba93 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -386,7 +386,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, struct btree_path *path = trans->paths + path_idx; EBUG_ON(!path->should_be_locked); - EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + EBUG_ON(trans->nr_updates >= trans->nr_paths); EBUG_ON(!bpos_eq(k->k.p, path->pos)); n = (struct btree_insert_entry) { From ff70ad2c8dfdcc24f98b645481116d4c2ea20e37 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Dec 2023 15:21:40 -0500 Subject: [PATCH 170/226] bcachefs: Fix interior update path btree_path uses Since the btree_paths array is now about to become growable, we have to be careful not to refer to paths by pointer across contexts where they may be reallocated. This fixes the remaining btree_interior_update() paths - split and merge. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 12 ++--- fs/bcachefs/btree_update_interior.c | 71 ++++++++++++++++------------- fs/bcachefs/btree_update_interior.h | 11 +++-- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 7e6ba3061e7c..2ec9ff06f1a6 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -823,7 +823,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags if (!same_leaf_as_next(trans, i)) { if (u64s_delta <= 0) { - ret = bch2_foreground_maybe_merge(trans, trans->paths + i->path, + ret = bch2_foreground_maybe_merge(trans, i->path, i->level, flags); if (unlikely(ret)) return ret; @@ -877,14 +877,12 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, struct bch_fs *c = trans->c; switch (ret) { - case -BCH_ERR_btree_insert_btree_node_full: { - struct btree_path *path = trans->paths + i->path; - - ret = bch2_btree_split_leaf(trans, path, flags); + case -BCH_ERR_btree_insert_btree_node_full: + ret = bch2_btree_split_leaf(trans, i->path, flags); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, path); + trace_and_count(c, trans_restart_btree_node_split, trans, + trace_ip, trans->paths + i->path); break; - } case -BCH_ERR_btree_insert_need_mark_replicas: ret = drop_locks_do(trans, bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas)); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 737ccbe1aef9..2a93eb92d112 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -25,7 +25,7 @@ #include static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, - struct btree_path *, struct btree *, + btree_path_idx_t, struct btree *, struct keylist *, unsigned); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); @@ -1454,10 +1454,12 @@ static void __btree_split_node(struct btree_update *as, */ static void btree_split_insert_keys(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, + btree_path_idx_t path_idx, struct btree *b, struct keylist *keys) { + struct btree_path *path = trans->paths + path_idx; + if (!bch2_keylist_empty(keys) && bpos_le(bch2_keylist_front(keys)->k.p, b->data->max_key)) { struct btree_node_iter node_iter; @@ -1471,18 +1473,18 @@ static void btree_split_insert_keys(struct btree_update *as, } static int btree_split(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, struct btree *b, + btree_path_idx_t path, struct btree *b, struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; - struct btree *parent = btree_node_parent(path, b); + struct btree *parent = btree_node_parent(trans->paths + path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; btree_path_idx_t path1 = 0, path2 = 0; u64 start_time = local_clock(); int ret = 0; BUG_ON(!parent && (b != btree_node_root(c, b))); - BUG_ON(parent && !btree_node_intent_locked(path, b->c.level + 1)); + BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1)); bch2_btree_interior_update_will_free_node(as, b); @@ -1510,12 +1512,12 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, six_unlock_write(&n2->c.lock); six_unlock_write(&n1->c.lock); - path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p); + path1 = get_unlocked_mut_path(trans, as->btree_id, n1->c.level, n1->key.k.p); six_lock_increment(&n1->c.lock, SIX_LOCK_intent); mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); bch2_btree_path_level_init(trans, trans->paths + path1, n1); - path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p); + path2 = get_unlocked_mut_path(trans, as->btree_id, n2->c.level, n2->key.k.p); six_lock_increment(&n2->c.lock, SIX_LOCK_intent); mark_btree_node_locked(trans, trans->paths + path2, n2->c.level, BTREE_NODE_INTENT_LOCKED); bch2_btree_path_level_init(trans, trans->paths + path2, n2); @@ -1560,7 +1562,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_update_add_new_node(as, n1); six_unlock_write(&n1->c.lock); - path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p); + path1 = get_unlocked_mut_path(trans, as->btree_id, n1->c.level, n1->key.k.p); six_lock_increment(&n1->c.lock, SIX_LOCK_intent); mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); bch2_btree_path_level_init(trans, trans->paths + path1, n1); @@ -1577,10 +1579,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, if (ret) goto err; } else if (n3) { - bch2_btree_set_root(as, trans, path, n3); + bch2_btree_set_root(as, trans, trans->paths + path, n3); } else { /* Root filled up but didn't need to be split */ - bch2_btree_set_root(as, trans, path, n1); + bch2_btree_set_root(as, trans, trans->paths + path, n1); } if (n3) { @@ -1600,10 +1602,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, * node after another thread has locked and updated the new node, thus * seeing stale data: */ - bch2_btree_node_free_inmem(trans, path, b); + bch2_btree_node_free_inmem(trans, trans->paths + path, b); if (n3) - bch2_trans_node_add(trans, path, n3); + bch2_trans_node_add(trans, trans->paths + path, n3); if (n2) bch2_trans_node_add(trans, trans->paths + path2, n2); bch2_trans_node_add(trans, trans->paths + path1, n1); @@ -1665,7 +1667,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, * * @as: btree_update object * @trans: btree_trans object - * @path: path that points to current node + * @path_idx: path that points to current node * @b: node to insert keys into * @keys: list of keys to insert * @flags: transaction commit flags @@ -1677,10 +1679,11 @@ bch2_btree_insert_keys_interior(struct btree_update *as, * for leaf nodes -- inserts into interior nodes have to be atomic. */ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, struct btree *b, + btree_path_idx_t path_idx, struct btree *b, struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; + struct btree_path *path = trans->paths + path_idx; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; @@ -1733,19 +1736,22 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); } - return btree_split(as, trans, path, b, keys, flags); + return btree_split(as, trans, path_idx, b, keys, flags); } int bch2_btree_split_leaf(struct btree_trans *trans, - struct btree_path *path, + btree_path_idx_t path, unsigned flags) { - struct btree *b = path_l(path)->b; + /* btree_split & merge may both cause paths array to be reallocated */ + + struct btree *b = path_l(trans->paths + path)->b; struct btree_update *as; unsigned l; int ret = 0; - as = bch2_btree_update_start(trans, path, path->level, + as = bch2_btree_update_start(trans, trans->paths + path, + trans->paths[path].level, true, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -1758,14 +1764,16 @@ int bch2_btree_split_leaf(struct btree_trans *trans, bch2_btree_update_done(as, trans); - for (l = path->level + 1; btree_node_intent_locked(path, l) && !ret; l++) + for (l = trans->paths[path].level + 1; + btree_node_intent_locked(&trans->paths[path], l) && !ret; + l++) ret = bch2_foreground_maybe_merge(trans, path, l, flags); return ret; } int __bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_path *path, + btree_path_idx_t path, unsigned level, unsigned flags, enum btree_node_sibling sib) @@ -1778,14 +1786,15 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, struct btree *b, *m, *n, *prev, *next, *parent; struct bpos sib_pos; size_t sib_u64s; + enum btree_id btree = trans->paths[path].btree_id; btree_path_idx_t sib_path = 0, new_path = 0; u64 start_time = local_clock(); int ret = 0; - BUG_ON(!path->should_be_locked); - BUG_ON(!btree_node_locked(path, level)); + BUG_ON(!trans->paths[path].should_be_locked); + BUG_ON(!btree_node_locked(&trans->paths[path], level)); - b = path->l[level].b; + b = trans->paths[path].l[level].b; if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) || (sib == btree_next_sib && bpos_eq(b->data->max_key, SPOS_MAX))) { @@ -1797,7 +1806,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - sib_path = bch2_path_get(trans, path->btree_id, sib_pos, + sib_path = bch2_path_get(trans, btree, sib_pos, U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); ret = bch2_btree_path_traverse(trans, sib_path, false); if (ret) @@ -1807,7 +1816,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, m = trans->paths[sib_path].l[level].b; - if (btree_node_parent(path, b) != + if (btree_node_parent(trans->paths + path, b) != btree_node_parent(trans->paths + sib_path, m)) { b->sib_u64s[sib] = U16_MAX; goto out; @@ -1861,8 +1870,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) goto out; - parent = btree_node_parent(path, b); - as = bch2_btree_update_start(trans, path, level, false, + parent = btree_node_parent(trans->paths + path, b); + as = bch2_btree_update_start(trans, trans->paths + path, level, false, BCH_TRANS_COMMIT_no_enospc|flags); ret = PTR_ERR_OR_ZERO(as); if (ret) @@ -1892,7 +1901,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_update_add_new_node(as, n); six_unlock_write(&n->c.lock); - new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p); + new_path = get_unlocked_mut_path(trans, btree, n->c.level, n->key.k.p); six_lock_increment(&n->c.lock, SIX_LOCK_intent); mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); bch2_btree_path_level_init(trans, trans->paths + new_path, n); @@ -1913,10 +1922,10 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_update_get_open_buckets(as, n); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); - bch2_btree_node_free_inmem(trans, path, b); + bch2_btree_node_free_inmem(trans, trans->paths + path, b); bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m); - bch2_trans_node_add(trans, path, n); + bch2_trans_node_add(trans, trans->paths + path, n); bch2_trans_verify_paths(trans); @@ -1975,7 +1984,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - ret = bch2_btree_insert_node(as, trans, btree_iter_path(trans, iter), + ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys, flags); if (ret) goto err; diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index a6668992a272..adfc62083844 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -117,16 +117,17 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, struct btree *, struct bkey_format); -int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned); +int bch2_btree_split_leaf(struct btree_trans *, btree_path_idx_t, unsigned); -int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *, +int __bch2_foreground_maybe_merge(struct btree_trans *, btree_path_idx_t, unsigned, unsigned, enum btree_node_sibling); static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, - struct btree_path *path, + btree_path_idx_t path_idx, unsigned level, unsigned flags, enum btree_node_sibling sib) { + struct btree_path *path = trans->paths + path_idx; struct btree *b; EBUG_ON(!btree_node_locked(path, level)); @@ -135,11 +136,11 @@ static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) return 0; - return __bch2_foreground_maybe_merge(trans, path, level, flags, sib); + return __bch2_foreground_maybe_merge(trans, path_idx, level, flags, sib); } static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_path *path, + btree_path_idx_t path, unsigned level, unsigned flags) { From 0c99e17d3bd3b60ee7461cb8e87ff6badf228422 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 19:26:30 -0500 Subject: [PATCH 171/226] bcachefs: growable btree_paths XXX: we're allocating memory with btree locks held - bad We need to plumb through an error path so we can do allocate_dropping_locks() - but we're merging this now because it fixes a transaction path overflow caused by indirect extent fragmentation, and the resize path is rare. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 77 ++++++++++++++++++++++++++++++------- fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/btree_types.h | 13 ++++--- fs/bcachefs/extent_update.c | 2 +- 4 files changed, 72 insertions(+), 22 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 12a357f357cd..596fbb9a611f 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1209,7 +1209,6 @@ static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_i bool intent) { btree_path_idx_t new = btree_path_alloc(trans, src); - btree_path_copy(trans, trans->paths + new, trans->paths + src); __btree_path_get(trans->paths + new, intent); return new; @@ -1515,7 +1514,47 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *trans) static noinline void btree_path_overflow(struct btree_trans *trans) { bch2_dump_trans_paths_updates(trans); - panic("trans path overflow\n"); + bch_err(trans->c, "trans path overflow"); +} + +static noinline void btree_paths_realloc(struct btree_trans *trans) +{ + unsigned nr = trans->nr_paths * 2; + + void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) + + sizeof(struct btree_trans_paths) + + nr * sizeof(struct btree_path) + + nr * sizeof(btree_path_idx_t) + 8 + + nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL); + + unsigned long *paths_allocated = p; + memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long)); + p += BITS_TO_LONGS(nr) * sizeof(unsigned long); + + p += sizeof(struct btree_trans_paths); + struct btree_path *paths = p; + *trans_paths_nr(paths) = nr; + memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path)); + p += nr * sizeof(struct btree_path); + + btree_path_idx_t *sorted = p; + memcpy(sorted, trans->sorted, trans->nr_sorted * sizeof(btree_path_idx_t)); + p += nr * sizeof(btree_path_idx_t) + 8; + + struct btree_insert_entry *updates = p; + memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_insert_entry)); + + unsigned long *old = trans->paths_allocated; + + rcu_assign_pointer(trans->paths_allocated, paths_allocated); + rcu_assign_pointer(trans->paths, paths); + rcu_assign_pointer(trans->sorted, sorted); + rcu_assign_pointer(trans->updates, updates); + + trans->nr_paths = nr; + + if (old != trans->_paths_allocated) + kfree_rcu_mightsleep(old); } static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, @@ -1523,8 +1562,14 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, { btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths); - if (unlikely(idx == trans->nr_paths)) - btree_path_overflow(trans); + if (unlikely(idx == trans->nr_paths)) { + if (trans->nr_paths == BTREE_ITER_MAX) { + btree_path_overflow(trans); + return 0; + } + + btree_paths_realloc(trans); + } /* * Do this before marking the new path as allocated, since it won't be @@ -2607,21 +2652,18 @@ void __bch2_btree_trans_sort_paths(struct btree_trans *trans) static inline void btree_path_list_remove(struct btree_trans *trans, struct btree_path *path) { - unsigned i; - EBUG_ON(path->sorted_idx >= trans->nr_sorted); #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS trans->nr_sorted--; memmove_u64s_down_small(trans->sorted + path->sorted_idx, trans->sorted + path->sorted_idx + 1, - DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); + DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, + sizeof(u64) / sizeof(btree_path_idx_t))); #else array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); #endif - for (i = path->sorted_idx; i < trans->nr_sorted; i++) + for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++) trans->paths[trans->sorted[i]].sorted_idx = i; - - path->sorted_idx = U8_MAX; } static inline void btree_path_list_add(struct btree_trans *trans, @@ -2629,21 +2671,21 @@ static inline void btree_path_list_add(struct btree_trans *trans, btree_path_idx_t path_idx) { struct btree_path *path = trans->paths + path_idx; - unsigned i; path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted; #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, trans->sorted + path->sorted_idx, - DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); + DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, + sizeof(u64) / sizeof(btree_path_idx_t))); trans->nr_sorted++; trans->sorted[path->sorted_idx] = path_idx; #else array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx); #endif - for (i = path->sorted_idx; i < trans->nr_sorted; i++) + for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++) trans->paths[trans->sorted[i]].sorted_idx = i; btree_trans_verify_sorted_refs(trans); @@ -2939,7 +2981,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->paths = trans->_paths; trans->updates = trans->_updates; - *trans_paths_nr(trans->paths) = BTREE_ITER_MAX; + *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL; trans->paths_allocated[0] = 1; @@ -3020,6 +3062,13 @@ void bch2_trans_put(struct btree_trans *trans) if (unlikely(trans->journal_replay_not_finished)) bch2_journal_keys_put(c); + unsigned long *paths_allocated = trans->paths_allocated; + trans->paths_allocated = NULL; + trans->paths = NULL; + + if (paths_allocated != trans->_paths_allocated) + kfree_rcu_mightsleep(paths_allocated); + if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) mempool_free(trans->mem, &c->btree_trans_mem_pool); else diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 573c44d80cc3..da2b74fa63fc 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -642,7 +642,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *); static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_MAX - 8) + if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8) return __bch2_btree_trans_too_many_iters(trans); return 0; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e4ebfc25df8e..d530307046f4 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -358,7 +358,8 @@ struct btree_insert_entry { unsigned long ip_allocated; }; -#define BTREE_ITER_MAX 64 +#define BTREE_ITER_INITIAL 64 +#define BTREE_ITER_MAX (1U << 10) struct btree_trans_commit_hook; typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); @@ -382,7 +383,7 @@ struct btree_trans { unsigned long *paths_allocated; struct btree_path *paths; - u8 *sorted; + btree_path_idx_t *sorted; struct btree_insert_entry *updates; void *mem; @@ -438,11 +439,11 @@ struct btree_trans { struct list_head list; struct closure ref; - unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; + unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)]; struct btree_trans_paths trans_paths; - struct btree_path _paths[BTREE_ITER_MAX]; - u8 _sorted[BTREE_ITER_MAX + 8]; - struct btree_insert_entry _updates[BTREE_ITER_MAX]; + struct btree_path _paths[BTREE_ITER_INITIAL]; + btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4]; + struct btree_insert_entry _updates[BTREE_ITER_INITIAL]; }; static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 21af6fb8cecf..b9033bb4f11c 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -100,7 +100,7 @@ static int count_iters_for_insert(struct btree_trans *trans, return ret2 ?: ret; } -#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) +#define EXTENT_ITERS_MAX (BTREE_ITER_INITIAL / 3) int bch2_extent_atomic_end(struct btree_trans *trans, struct btree_iter *iter, From eb6863598a9dff88bdb4483cc09a3ec8e1150b48 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 00:57:37 -0500 Subject: [PATCH 172/226] bcachefs: bch2_btree_trans_peek_updates refactoring the BTREE_ITER_WITH_UPDATES code, prep for removing the flag and making it always-on Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 596fbb9a611f..1d2be541d5a2 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1887,6 +1887,23 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) return ret; } +static noinline +void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c *k) +{ + struct btree_path *path = btree_iter_path(trans, iter); + struct bpos end = path_l(path)->b->key.k.p; + + trans_for_each_update(trans, i) + if (!i->key_cache_already_flushed && + i->btree_id == iter->btree_id && + bpos_ge(i->k->k.p, path->pos) && + bpos_le(i->k->k.p, k->k ? k->k->p : end)) { + iter->k = i->k->k; + *k = bkey_i_to_s_c(i->k); + } +} + static noinline struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) { @@ -2012,7 +2029,6 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) { struct btree_trans *trans = iter->trans; - struct bkey_i *next_update; struct bkey_s_c k, k2; int ret; @@ -2062,14 +2078,9 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) k = btree_trans_peek_journal(trans, iter, k); - next_update = btree_trans_peek_updates(iter); - - if (next_update && - bpos_le(next_update->k.p, - k.k ? k.k->p : l->b->key.k.p)) { - iter->k = next_update->k; - k = bkey_i_to_s_c(next_update); - } + if (unlikely((iter->flags & BTREE_ITER_WITH_UPDATES) && + trans->nr_updates)) + bch2_btree_trans_peek_updates(trans, iter, &k); if (k.k && bkey_deleted(k.k)) { /* From 359e89add5b89fb467f08391ab9d80ba6e775295 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 00:57:37 -0500 Subject: [PATCH 173/226] bcachefs: bch2_btree_trans_peek_prev_updates bch2_btree_iter_peek_prev() now supports BTREE_ITER_WITH_UPDATES Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1d2be541d5a2..a5a0c8e77930 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1887,6 +1887,22 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) return ret; } +static noinline +void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c *k) +{ + struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key; + + trans_for_each_update(trans, i) + if (!i->key_cache_already_flushed && + i->btree_id == iter->btree_id && + bpos_le(i->k->k.p, iter->pos) && + bpos_ge(i->k->k.p, k->k ? k->k->p : end)) { + iter->k = i->k->k; + *k = bkey_i_to_s_c(i->k); + } +} + static noinline void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c *k) @@ -2302,7 +2318,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) EBUG_ON(btree_iter_path(trans, iter)->cached || btree_iter_path(trans, iter)->level); - EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); if (iter->flags & BTREE_ITER_WITH_JOURNAL) return bkey_s_c_err(-EIO); @@ -2335,6 +2350,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) : bpos_gt(k.k->p, search_key))) k = btree_path_level_prev(trans, path, &path->l[0], &iter->k); + if (unlikely((iter->flags & BTREE_ITER_WITH_UPDATES) && + trans->nr_updates)) + bch2_btree_trans_peek_prev_updates(trans, iter, &k); + if (likely(k.k)) { if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { if (k.k->p.snapshot == iter->snapshot) From c558c577cbea95d8a8efc161cad93bab344a61d0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 00:57:37 -0500 Subject: [PATCH 174/226] bcachefs: bch2_btree_trans_peek_slot_updates refactoring the BTREE_ITER_WITH_UPDATES code, prep for removing the flag and making it always-on Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 46 +++++++++++++--------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index a5a0c8e77930..d9a9e84a2a28 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1921,32 +1921,16 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter } static noinline -struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) +void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c *k) { - struct btree_trans *trans = iter->trans; - struct bkey_i *ret = NULL; - - trans_for_each_update(trans, i) { - if (i->btree_id < iter->btree_id) - continue; - if (i->btree_id > iter->btree_id) - break; - if (bpos_lt(i->k->k.p, btree_iter_path(trans, iter)->pos)) - continue; - if (i->key_cache_already_flushed) - continue; - if (!ret || bpos_lt(i->k->k.p, ret->k.p)) - ret = i->k; - } - - return ret; -} - -static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter) -{ - return iter->flags & BTREE_ITER_WITH_UPDATES - ? __bch2_btree_trans_peek_updates(iter) - : NULL; + trans_for_each_update(trans, i) + if (!i->key_cache_already_flushed && + i->btree_id == iter->btree_id && + bpos_eq(i->k->k.p, iter->pos)) { + iter->k = i->k->k; + *k = bkey_i_to_s_c(i->k); + } } static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, @@ -2478,13 +2462,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if ((iter->flags & BTREE_ITER_CACHED) || !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { - struct bkey_i *next_update; + k = bkey_s_c_null; - if ((next_update = btree_trans_peek_updates(iter)) && - bpos_eq(next_update->k.p, iter->pos)) { - iter->k = next_update->k; - k = bkey_i_to_s_c(next_update); - goto out; + if (unlikely((iter->flags & BTREE_ITER_WITH_UPDATES) && + trans->nr_updates)) { + bch2_btree_trans_peek_slot_updates(trans, iter, &k); + if (k.k) + goto out; } if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) && From d296e7b18521d60e3c428945c19a66ecf012c002 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Dec 2023 14:13:48 -0500 Subject: [PATCH 175/226] bcachefs: Fix reattach_inode() for snapshots reattach_inode() was broken w.r.t. snapshots - we'd lookup the subvolume to look up lost+found, but if we're in an interior node snapshot that didn't make any sense. Instead, this adds a dirent path for creating in a specific snapshot, skipping the subvolume; and we also make sure to create lost+found in the root snapshot, to avoid conflicts with lost+found being created in overlapping snapshots. Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 28 +++++++++++ fs/bcachefs/dirent.h | 4 ++ fs/bcachefs/fsck.c | 108 ++++++++++++++++++++++++------------------- 3 files changed, 93 insertions(+), 47 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 51fe04f45690..4ae1e9f002a0 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -198,6 +198,34 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, return dirent; } +int bch2_dirent_create_snapshot(struct btree_trans *trans, + u64 dir, u32 snapshot, + const struct bch_hash_info *hash_info, + u8 type, const struct qstr *name, u64 dst_inum, + u64 *dir_offset, + bch_str_hash_flags_t str_hash_flags) +{ + subvol_inum zero_inum = { 0 }; + struct bkey_i_dirent *dirent; + int ret; + + dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum); + ret = PTR_ERR_OR_ZERO(dirent); + if (ret) + return ret; + + dirent->k.p.inode = dir; + dirent->k.p.snapshot = snapshot; + + ret = bch2_hash_set_snapshot(trans, bch2_dirent_hash_desc, hash_info, + zero_inum, snapshot, + &dirent->k_i, str_hash_flags, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + *dir_offset = dirent->k.p.offset; + + return ret; +} + int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, const struct bch_hash_info *hash_info, u8 type, const struct qstr *name, u64 dst_inum, diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 10dc3ad7e80f..21ffeb78f02e 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -35,6 +35,10 @@ static inline unsigned dirent_val_u64s(unsigned len) int bch2_dirent_read_target(struct btree_trans *, subvol_inum, struct bkey_s_c_dirent, subvol_inum *); +int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32, + const struct bch_hash_info *, u8, + const struct qstr *, u64, u64 *, + bch_str_hash_flags_t); int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, const struct qstr *, u64, u64 *, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 796e1a8447d6..dc825b14f24e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -59,21 +59,6 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, return ret ?: subdirs; } -static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, - u32 *subvol) -{ - struct bch_snapshot s; - int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, - POS(0, snapshot), 0, - snapshot, &s); - if (!ret) - *subvol = le32_to_cpu(s.subvol); - else if (bch2_err_matches(ret, ENOENT)) - bch_err(trans->c, "snapshot %u not found", snapshot); - return ret; - -} - static int __subvol_lookup(struct btree_trans *trans, u32 subvol, u32 *snapshot, u64 *inum) { @@ -227,35 +212,42 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) } /* Get lost+found, create if it doesn't exist: */ -static int lookup_lostfound(struct btree_trans *trans, u32 subvol, +static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked *lostfound) { struct bch_fs *c = trans->c; - struct bch_inode_unpacked root; - struct bch_hash_info root_hash_info; struct qstr lostfound_str = QSTR("lost+found"); - subvol_inum root_inum = { .subvol = subvol }; u64 inum = 0; unsigned d_type = 0; - u32 snapshot; int ret; - ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum); + struct bch_snapshot_tree st; + ret = bch2_snapshot_tree_lookup(trans, + bch2_snapshot_tree(c, snapshot), &st); if (ret) return ret; - ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot); + subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; + u32 subvol_snapshot; + + ret = __subvol_lookup(trans, le32_to_cpu(st.master_subvol), + &subvol_snapshot, &root_inum.inum); + bch_err_msg(c, ret, "looking up root subvol"); if (ret) return ret; - root_hash_info = bch2_hash_info_init(c, &root); + struct bch_inode_unpacked root_inode; + struct bch_hash_info root_hash_info; + ret = __lookup_inode(trans, root_inum.inum, &root_inode, &snapshot); + if (ret) + return ret; + + root_hash_info = bch2_hash_info_init(c, &root_inode); ret = __lookup_dirent(trans, root_hash_info, root_inum, - &lostfound_str, &inum, &d_type); - if (bch2_err_matches(ret, ENOENT)) { - bch_notice(c, "creating lost+found"); + &lostfound_str, &inum, &d_type); + if (bch2_err_matches(ret, ENOENT)) goto create_lostfound; - } bch_err_fn(c, ret); if (ret) @@ -273,13 +265,43 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, return __lookup_inode(trans, inum, lostfound, &snapshot); create_lostfound: - bch2_inode_init_early(c, lostfound); + /* + * XXX: we could have a nicer log message here if we had a nice way to + * walk backpointers to print a path + */ + bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); - ret = bch2_create_trans(trans, root_inum, &root, - lostfound, &lostfound_str, - 0, 0, S_IFDIR|0700, 0, NULL, NULL, - (subvol_inum) { }, 0); + u64 now = bch2_current_time(c); + struct btree_iter lostfound_iter = { NULL }; + u64 cpu = raw_smp_processor_id(); + + bch2_inode_init_early(c, lostfound); + bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); + lostfound->bi_dir = root_inode.bi_inum; + + root_inode.bi_nlink++; + + ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); + if (ret) + goto err; + + bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); + ret = bch2_btree_iter_traverse(&lostfound_iter); + if (ret) + goto err; + + ret = bch2_dirent_create_snapshot(trans, + root_inode.bi_inum, snapshot, &root_hash_info, + mode_to_type(lostfound->bi_mode), + &lostfound_str, + lostfound->bi_inum, + &lostfound->bi_dir_offset, + BCH_HASH_SET_MUST_CREATE) ?: + bch2_inode_write_flags(trans, &lostfound_iter, lostfound, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); +err: bch_err_msg(c, ret, "creating lost+found"); + bch2_trans_iter_exit(trans, &lostfound_iter); return ret; } @@ -292,14 +314,9 @@ static int __reattach_inode(struct btree_trans *trans, char name_buf[20]; struct qstr name; u64 dir_offset = 0; - u32 subvol; int ret; - ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol); - if (ret) - return ret; - - ret = lookup_lostfound(trans, subvol, &lostfound); + ret = lookup_lostfound(trans, inode_snapshot, &lostfound); if (ret) return ret; @@ -316,15 +333,12 @@ static int __reattach_inode(struct btree_trans *trans, snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); name = (struct qstr) QSTR(name_buf); - ret = bch2_dirent_create(trans, - (subvol_inum) { - .subvol = subvol, - .inum = lostfound.bi_inum, - }, - &dir_hash, - inode_d_type(inode), - &name, inode->bi_inum, &dir_offset, - BCH_HASH_SET_MUST_CREATE); + ret = bch2_dirent_create_snapshot(trans, + lostfound.bi_inum, inode_snapshot, + &dir_hash, + inode_d_type(inode), + &name, inode->bi_inum, &dir_offset, + BCH_HASH_SET_MUST_CREATE); if (ret) return ret; From c98d132ed1e37bb1c6b2ddbb50f9eae2283f0df6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 22:52:43 -0500 Subject: [PATCH 176/226] bcachefs: check_directory_structure() can now be run online Now that we have dynamically resizable btree paths, check_directory_structure() can check one path - inode up to the root - in a single transaction. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 106 +++++++++++++---------------------- fs/bcachefs/recovery_types.h | 2 +- 2 files changed, 41 insertions(+), 67 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index dc825b14f24e..de1617ec1b59 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -59,8 +59,8 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, return ret ?: subdirs; } -static int __subvol_lookup(struct btree_trans *trans, u32 subvol, - u32 *snapshot, u64 *inum) +static int subvol_lookup(struct btree_trans *trans, u32 subvol, + u32 *snapshot, u64 *inum) { struct bch_subvolume s; int ret; @@ -72,12 +72,6 @@ static int __subvol_lookup(struct btree_trans *trans, u32 subvol, return ret; } -static int subvol_lookup(struct btree_trans *trans, u32 subvol, - u32 *snapshot, u64 *inum) -{ - return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum)); -} - static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, struct bch_inode_unpacked *inode) { @@ -105,7 +99,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, return ret; } -static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, +static int lookup_inode(struct btree_trans *trans, u64 inode_nr, struct bch_inode_unpacked *inode, u32 *snapshot) { @@ -130,13 +124,6 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, return ret; } -static int lookup_inode(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode, - u32 *snapshot) -{ - return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot)); -} - static int __lookup_dirent(struct btree_trans *trans, struct bch_hash_info hash_info, subvol_inum dir, struct qstr *name, @@ -230,15 +217,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; u32 subvol_snapshot; - ret = __subvol_lookup(trans, le32_to_cpu(st.master_subvol), - &subvol_snapshot, &root_inum.inum); + ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol), + &subvol_snapshot, &root_inum.inum); bch_err_msg(c, ret, "looking up root subvol"); if (ret) return ret; struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - ret = __lookup_inode(trans, root_inum.inum, &root_inode, &snapshot); + ret = lookup_inode(trans, root_inum.inum, &root_inode, &snapshot); + bch_err_msg(c, ret, "looking up root inode"); if (ret) return ret; @@ -262,7 +250,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - return __lookup_inode(trans, inum, lostfound, &snapshot); + return lookup_inode(trans, inum, lostfound, &snapshot); create_lostfound: /* @@ -305,7 +293,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; } -static int __reattach_inode(struct btree_trans *trans, +static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { @@ -348,16 +336,6 @@ static int __reattach_inode(struct btree_trans *trans, return __write_inode(trans, inode, inode_snapshot); } -static int reattach_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 inode_snapshot) -{ - int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __reattach_inode(trans, inode, inode_snapshot)); - bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum); - return ret; -} - static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { @@ -1735,7 +1713,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, u32 target_snapshot; u64 target_inum; - ret = __subvol_lookup(trans, target_subvol, + ret = subvol_lookup(trans, target_subvol, &target_snapshot, &target_inum); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -1747,7 +1725,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - ret = __lookup_inode(trans, target_inum, + ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -1907,7 +1885,7 @@ static int check_root_trans(struct btree_trans *trans) u64 inum; int ret; - ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); + ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -1929,7 +1907,7 @@ static int check_root_trans(struct btree_trans *trans) goto err; } - ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); + ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -2020,10 +1998,10 @@ static int check_path(struct btree_trans *trans, break; } - ret = lockrestart_do(trans, - PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter, - SPOS(inode->bi_dir, inode->bi_dir_offset, - parent_snapshot))).k)); + d = dirent_get_by_pos(trans, &dirent_iter, + SPOS(inode->bi_dir, inode->bi_dir_offset, + parent_snapshot)); + ret = bkey_err(d.s_c); if (ret && !bch2_err_matches(ret, ENOENT)) break; @@ -2060,7 +2038,8 @@ static int check_path(struct btree_trans *trans, ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot); if (ret) { /* Should have been caught in dirents pass */ - bch_err(c, "error looking up parent directory: %i", ret); + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error looking up parent directory: %i", ret); break; } @@ -2072,19 +2051,19 @@ static int check_path(struct btree_trans *trans, pr_err("%llu:%u", i->inum, i->snapshot); pr_err("%llu:%u", inode->bi_inum, snapshot); - if (!fsck_err(c, dir_loop, - "directory structure loop")) + if (!fsck_err(c, dir_loop, "directory structure loop")) return 0; - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - remove_backpointer(trans, inode)); - if (ret) { - bch_err(c, "error removing dirent: %i", ret); + ret = remove_backpointer(trans, inode); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err_msg(c, ret, "removing dirent"); + if (ret) break; - } ret = reattach_inode(trans, inode, snapshot); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum); + break; } } fsck_err: @@ -2099,31 +2078,26 @@ static int check_path(struct btree_trans *trans, */ int bch2_check_directory_structure(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_s_c k; struct bch_inode_unpacked u; pathbuf path = { 0, }; int ret; - for_each_btree_key_old(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (!bkey_is_inode(k.k)) - continue; + ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + if (!bkey_is_inode(k.k)) + continue; - BUG_ON(bch2_inode_unpack(k, &u)); + BUG_ON(bch2_inode_unpack(k, &u)); - if (u.bi_flags & BCH_INODE_unlinked) - continue; + if (u.bi_flags & BCH_INODE_unlinked) + continue; - ret = check_path(trans, &path, &u, iter.pos.snapshot); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); + check_path(trans, &path, &u, iter.pos.snapshot); + }))); darray_exit(&path); bch_err_fn(c, ret); diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 6dfc4f10948d..fa0c8efd2a1b 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -43,7 +43,7 @@ x(check_dirents, 27, PASS_FSCK) \ x(check_xattrs, 28, PASS_FSCK) \ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ - x(check_directory_structure, 30, PASS_FSCK) \ + x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ x(fix_reflink_p, 33, 0) \ From 5e329145148d7e9fe5a07ecfc08682ef7334a4d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Dec 2023 00:16:32 -0500 Subject: [PATCH 177/226] bcachefs: Check journal entries for invalid keys in trans commit path Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 13 +++++++++++ fs/bcachefs/btree_trans_commit.c | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c134da7d8194..2c09b40a15b9 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -2172,6 +2172,19 @@ enum { BCH_JSET_ENTRY_NR }; +static inline bool jset_entry_is_key(struct jset_entry *e) +{ + switch (e->type) { + case BCH_JSET_ENTRY_btree_keys: + case BCH_JSET_ENTRY_btree_root: + case BCH_JSET_ENTRY_overwrite: + case BCH_JSET_ENTRY_write_buffer_keys: + return true; + } + + return false; +} + /* * Journal sequence numbers can be blacklisted: bsets record the max sequence * number of all the journal entries they contain updates for, so that on diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 2ec9ff06f1a6..6e872c817dce 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -12,6 +12,7 @@ #include "errcode.h" #include "error.h" #include "journal.h" +#include "journal_io.h" #include "journal_reclaim.h" #include "replicas.h" #include "snapshot.h" @@ -798,6 +799,27 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, return -EINVAL; } +static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, + struct jset_entry *i) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "invalid bkey on insert from %s", trans->fn); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + bch2_journal_entry_to_text(&buf, c, i); + prt_newline(&buf); + + bch2_print_string_as_lines(KERN_ERR, buf.buf); + + bch2_inconsistent_error(c); + bch2_dump_trans_updates(trans); + + return -EINVAL; +} + static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -998,6 +1020,23 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) return ret; } + for (struct jset_entry *i = trans->journal_entries; + i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + i = vstruct_next(i)) { + enum bkey_invalid_flags invalid_flags = 0; + + if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) + invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT; + + if (unlikely(bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags))) + ret = bch2_trans_commit_journal_entry_invalid(trans, i); + + if (ret) + return ret; + } + if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { ret = do_bch2_trans_commit_to_journal_replay(trans); goto out_reset; From 62719cf33c3ad62986130a19496cd864a0ed06c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 17:50:29 -0500 Subject: [PATCH 178/226] bcachefs: Fix nochanges/read_only interaction nochanges means "we cannot issue writes at all"; it's possible to go into a pseudo read-write mode where we pin dirty metadata in memory, which is used for fsck in dry run mode and doing journal replay on a read only mount, but we do not want to allow an actual read-write mount in nochanges mode. But we do always want to allow early read-write, during recovery - this patch clarifies that. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- fs/bcachefs/opts.h | 2 +- fs/bcachefs/recovery.c | 4 +++- fs/bcachefs/super.c | 23 ++++++++++++----------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 12de97bc93b9..da11757682c0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1630,12 +1630,12 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) struct bch_opts opts = bch2_opts_empty(); int ret; - opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); - ret = bch2_parse_mount_opts(c, &opts, data); if (ret) goto err; + opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); + if (opts.read_only != c->opts.read_only) { down_write(&c->state_lock); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index cf69b92cbd03..42cad83efb48 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -389,7 +389,7 @@ enum fsck_err_opts { BCH2_NO_SB_OPT, BCH_SB_SECTOR, \ "offset", "Sector offset of superblock") \ x(read_only, u8, \ - OPT_FS, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, NULL) \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a792f42ab5ff..d2914e911461 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1076,7 +1076,9 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_journal_keys_put_initial(c); kfree(clean); - if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) { + if (!ret && + test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && + !c->opts.nochanges) { bch2_fs_read_write_early(c); bch2_delete_dead_snapshots_async(c); } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 592005b8e448..492dfd4b5fde 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -433,16 +433,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (test_bit(BCH_FS_rw, &c->flags)) return 0; - if (c->opts.norecovery) - return -BCH_ERR_erofs_norecovery; - - /* - * nochanges is used for fsck -n mode - we have to allow going rw - * during recovery for that to work: - */ - if (c->opts.nochanges && (!early || c->opts.read_only)) - return -BCH_ERR_erofs_nochanges; - bch_info(c, "going read-write"); ret = bch2_sb_members_v2_init(c); @@ -510,6 +500,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) int bch2_fs_read_write(struct bch_fs *c) { + if (c->opts.norecovery) + return -BCH_ERR_erofs_norecovery; + + if (c->opts.nochanges) + return -BCH_ERR_erofs_nochanges; + return __bch2_fs_read_write(c, false); } @@ -1033,7 +1029,7 @@ int bch2_fs_start(struct bch_fs *c) set_bit(BCH_FS_started, &c->flags); - if (c->opts.read_only || c->opts.nochanges) { + if (c->opts.read_only) { bch2_fs_read_only(c); } else { ret = !test_bit(BCH_FS_rw, &c->flags) @@ -1946,6 +1942,11 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, BUG_ON(darray_push(&sbs, sb)); } + if (opts.nochanges && !opts.read_only) { + ret = -BCH_ERR_erofs_nochanges; + goto err_print; + } + darray_for_each(sbs, sb) if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq)) best = sb; From 6b00de06f51c5388f1a7eddd4ad7df8e3b8863b5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 27 Jun 2023 21:02:27 -0400 Subject: [PATCH 179/226] bcachefs: bch_member->seq Add new fields for split brain detection: - bch_member->seq, which tracks the sequence number of the last superblock write that happened to each member device - bch_sb->write_time, which tracks the time of the last superblock write, to allow detection of when two members have diverged but had the same number of superblock writes. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 8 ++++++-- fs/bcachefs/sb-members.c | 5 +++++ fs/bcachefs/super-io.c | 11 +++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 2c09b40a15b9..eb2df422ae5a 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1303,6 +1303,7 @@ struct bch_member { __le64 errors[BCH_MEMBER_ERROR_NR]; __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; __le64 errors_reset_time; + __le64 seq; }; #define BCH_MEMBER_V1_BYTES 56 @@ -1729,7 +1730,9 @@ struct bch_sb_field_downgrade { x(deleted_inodes, BCH_VERSION(1, 2), \ BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ x(rebalance_work, BCH_VERSION(1, 3), \ - BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) + BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \ + x(member_seq, BCH_VERSION(1, 4), \ + 0) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1795,7 +1798,8 @@ struct bch_sb { __le32 time_base_hi; __le32 time_precision; - __le64 flags[8]; + __le64 flags[7]; + __le64 write_time; __le64 features[2]; __le64 compat[2]; diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 7c5db669a467..4c19a8096c1d 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -235,6 +235,11 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "(never)"); prt_newline(out); + prt_printf(out, "Last superblock write:"); + prt_tab(out); + prt_u64(out, le64_to_cpu(m.seq)); + prt_newline(out); + prt_printf(out, "State:"); prt_tab(out); prt_printf(out, "%s", diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 89d2d2a86eb0..aee3c634f4af 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -564,6 +564,7 @@ static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) dst->time_base_lo = src->time_base_lo; dst->time_base_hi = src->time_base_hi; dst->time_precision = src->time_precision; + dst->write_time = src->write_time; memcpy(dst->flags, src->flags, sizeof(dst->flags)); memcpy(dst->features, src->features, sizeof(dst->features)); @@ -942,6 +943,11 @@ int bch2_write_super(struct bch_fs *c) le64_add_cpu(&c->disk_sb.sb->seq, 1); + struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); + for_each_online_member(c, ca) + __bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq; + c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds()); + if (test_bit(BCH_FS_error, &c->flags)) SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); if (test_bit(BCH_FS_topology_error, &c->flags)) @@ -1304,6 +1310,11 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, "%llu", le64_to_cpu(sb->seq)); prt_newline(out); + prt_printf(out, "Time of last write:"); + prt_tab(out); + bch2_prt_datetime(out, le64_to_cpu(sb->write_time)); + prt_newline(out); + prt_printf(out, "Superblock size:"); prt_tab(out); prt_printf(out, "%zu", vstruct_bytes(sb)); From 0d529663f04be744d6af879889c5b16e46286ce1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 27 Jun 2023 21:02:27 -0400 Subject: [PATCH 180/226] bcachefs: Split brain detection Use the new bch_member->seq, sb->write_time fields to detect split brain and kick out devices when necessary. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/super.c | 75 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 9d53b7fd6e84..8c40c2067a04 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -170,6 +170,7 @@ x(EINVAL, device_size_too_small) \ x(EINVAL, device_not_a_member_of_filesystem) \ x(EINVAL, device_has_been_removed) \ + x(EINVAL, device_splitbrain) \ x(EINVAL, device_already_online) \ x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, invalid) \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 492dfd4b5fde..0f3a924ca1f9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1063,20 +1063,65 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) return 0; } -static int bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) +static int bch2_dev_in_fs(struct bch_sb_handle *fs, + struct bch_sb_handle *sb) { - struct bch_sb *newest = - le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; + if (fs == sb) + return 0; - if (!uuid_equal(&fs->uuid, &sb->uuid)) + if (!uuid_equal(&fs->sb->uuid, &sb->sb->uuid)) return -BCH_ERR_device_not_a_member_of_filesystem; - if (!bch2_dev_exists(newest, sb->dev_idx)) + if (!bch2_dev_exists(fs->sb, sb->sb->dev_idx)) return -BCH_ERR_device_has_been_removed; - if (fs->block_size != sb->block_size) + if (fs->sb->block_size != sb->sb->block_size) return -BCH_ERR_mismatched_block_size; + if (le16_to_cpu(fs->sb->version) < bcachefs_metadata_version_member_seq || + le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_member_seq) + return 0; + + if (fs->sb->seq == sb->sb->seq && + fs->sb->write_time != sb->sb->write_time) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "Split brain detected between %pg and %pg:", + sb->bdev, fs->bdev); + prt_newline(&buf); + prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq)); + prt_newline(&buf); + + prt_printf(&buf, "%pg ", fs->bdev); + bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; + prt_newline(&buf); + + prt_printf(&buf, "%pg ", sb->bdev); + bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; + prt_newline(&buf); + + prt_printf(&buf, "Not using older sb"); + + pr_err("%s", buf.buf); + printbuf_exit(&buf); + return -BCH_ERR_device_splitbrain; + } + + struct bch_member m = bch2_sb_member_get(fs->sb, sb->sb->dev_idx); + u64 seq_from_fs = le64_to_cpu(m.seq); + u64 seq_from_member = le64_to_cpu(sb->sb->seq); + + if (seq_from_fs && seq_from_fs < seq_from_member) { + pr_err("Split brain detected between %pg and %pg:\n" + "%pg believes seq of %pg to be %llu, but %pg has %llu\n" + "Not using %pg", + sb->bdev, fs->bdev, + fs->bdev, sb->bdev, seq_from_fs, + sb->bdev, seq_from_member, + sb->bdev); + return -BCH_ERR_device_splitbrain; + } + return 0; } @@ -1770,7 +1815,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) dev_idx = sb.sb->dev_idx; - ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); + ret = bch2_dev_in_fs(&c->disk_sb, &sb); bch_err_msg(c, ret, "bringing %s online", path); if (ret) goto err; @@ -1911,6 +1956,12 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) /* Filesystem open: */ +static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r) +{ + return cmp_int(le64_to_cpu(l->seq), le64_to_cpu(r->seq)) ?: + cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time)); +} + struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_opts opts) { @@ -1948,19 +1999,21 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } darray_for_each(sbs, sb) - if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq)) + if (!best || sb_cmp(sb->sb, best->sb) > 0) best = sb; darray_for_each_reverse(sbs, sb) { - if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) { - pr_info("%pg has been removed, skipping", sb->bdev); + ret = bch2_dev_in_fs(best, sb); + + if (ret == -BCH_ERR_device_has_been_removed || + ret == -BCH_ERR_device_splitbrain) { bch2_free_super(sb); darray_remove_item(&sbs, sb); best -= best > sb; + ret = 0; continue; } - ret = bch2_dev_in_fs(best->sb, sb->sb); if (ret) goto err_print; } From 83322e8ca8b687528765d7f4acf55ef3855004c4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 23:08:45 -0500 Subject: [PATCH 181/226] bcachefs: btree_trans always has stats reserve slot 0 for unknown (when we overflow), to avoid some branches Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 14 ++++---------- fs/bcachefs/btree_locking.h | 9 +++------ fs/bcachefs/btree_update.c | 4 +--- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index d9a9e84a2a28..fa29f00e9350 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1476,9 +1476,6 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) struct printbuf buf = PRINTBUF; size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths); - if (!s) - return; - bch2_trans_paths_to_text(&buf, trans); if (!buf.allocation_failure) { @@ -2781,8 +2778,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); struct btree_transaction_stats *s = btree_trans_stats(trans); - if (s) - s->max_mem = max(s->max_mem, new_bytes); + s->max_mem = max(s->max_mem, new_bytes); new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { @@ -2919,13 +2915,11 @@ u32 bch2_trans_begin(struct btree_trans *trans) return trans->restart_count; } -const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; +const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR] = { "(unknown)" }; unsigned bch2_trans_get_fn_idx(const char *fn) { - unsigned i; - - for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++) + for (unsigned i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++) if (!bch2_btree_transaction_fns[i] || bch2_btree_transaction_fns[i] == fn) { bch2_btree_transaction_fns[i] = fn; @@ -2933,7 +2927,7 @@ unsigned bch2_trans_get_fn_idx(const char *fn) } pr_warn_once("BCH_TRANSACTIONS_NR not big enough!"); - return i; + return 0; } struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 64810ea544c9..cc5500a957a1 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -122,12 +122,9 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans, struct btree_path *path, unsigned level) { #ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS - struct btree_transaction_stats *s = btree_trans_stats(trans); - - if (s) - __bch2_time_stats_update(&s->lock_hold_times, - path->l[level].lock_taken_time, - local_clock()); + __bch2_time_stats_update(&btree_trans_stats(trans)->lock_hold_times, + path->l[level].lock_taken_time, + local_clock()); #endif } diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 83aa0fb8ba93..c3ff365acce9 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -557,9 +557,7 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi if (new_top > trans->journal_entries_size) { trans->journal_entries_size = roundup_pow_of_two(new_top); - struct btree_transaction_stats *s = btree_trans_stats(trans); - if (s) - s->journal_entries_size = trans->journal_entries_size; + btree_trans_stats(trans)->journal_entries_size = trans->journal_entries_size; } struct jset_entry *n = From 89056f245bce2cb833726927783337c5dc59eac0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Dec 2023 22:43:33 -0500 Subject: [PATCH 182/226] bcachefs: track transaction durations Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/btree_iter.c | 9 ++++++++- fs/bcachefs/debug.c | 29 ++++++++++++++++++----------- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 53a2624176ab..840f605eff1f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -647,6 +647,7 @@ struct btree_debug { #define BCH_TRANSACTIONS_NR 128 struct btree_transaction_stats { + struct bch2_time_stats duration; struct bch2_time_stats lock_hold_times; struct mutex lock; unsigned nr_max_paths; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index fa29f00e9350..7e5c797cfaf2 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2894,9 +2894,15 @@ u32 bch2_trans_begin(struct btree_trans *trans) } now = local_clock(); + + if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LATENCY_ACCT) && + time_after64(now, trans->last_begin_time + 10)) + __bch2_time_stats_update(&btree_trans_stats(trans)->duration, + trans->last_begin_time, now); + if (!trans->restarted && (need_resched() || - now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) { + time_after64(now, trans->last_begin_time + BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS))) { drop_locks_do(trans, (cond_resched(), 0)); now = local_clock(); } @@ -3232,6 +3238,7 @@ void bch2_fs_btree_iter_init_early(struct bch_fs *c) for (s = c->btree_transaction_stats; s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); s++) { + bch2_time_stats_init(&s->duration); bch2_time_stats_init(&s->lock_hold_times); mutex_init(&s->lock); } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index c0b4d9057f29..de5bfc0d4684 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -693,7 +693,7 @@ static const struct file_operations journal_pins_ops = { .read = bch2_journal_pins_read, }; -static int lock_held_stats_open(struct inode *inode, struct file *file) +static int btree_transaction_stats_open(struct inode *inode, struct file *file) { struct bch_fs *c = inode->i_private; struct dump_iter *i; @@ -703,7 +703,7 @@ static int lock_held_stats_open(struct inode *inode, struct file *file) if (!i) return -ENOMEM; - i->iter = 0; + i->iter = 1; i->c = c; i->buf = PRINTBUF; file->private_data = i; @@ -711,7 +711,7 @@ static int lock_held_stats_open(struct inode *inode, struct file *file) return 0; } -static int lock_held_stats_release(struct inode *inode, struct file *file) +static int btree_transaction_stats_release(struct inode *inode, struct file *file) { struct dump_iter *i = file->private_data; @@ -721,8 +721,8 @@ static int lock_held_stats_release(struct inode *inode, struct file *file) return 0; } -static ssize_t lock_held_stats_read(struct file *file, char __user *buf, - size_t size, loff_t *ppos) +static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; @@ -755,6 +755,13 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, prt_printf(&i->buf, "Max mem used: %u", s->max_mem); prt_newline(&i->buf); + prt_printf(&i->buf, "Transaction duration:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->duration); + printbuf_indent_sub(&i->buf, 2); + if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { prt_printf(&i->buf, "Lock hold times:"); prt_newline(&i->buf); @@ -786,11 +793,11 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, return i->ret; } -static const struct file_operations lock_held_stats_op = { - .owner = THIS_MODULE, - .open = lock_held_stats_open, - .release = lock_held_stats_release, - .read = lock_held_stats_read, +static const struct file_operations btree_transaction_stats_op = { + .owner = THIS_MODULE, + .open = btree_transaction_stats_open, + .release = btree_transaction_stats_release, + .read = btree_transaction_stats_read, }; static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, @@ -882,7 +889,7 @@ void bch2_fs_debug_init(struct bch_fs *c) c->btree_debug, &journal_pins_ops); debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, - c, &lock_held_stats_op); + c, &btree_transaction_stats_op); debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir, c->btree_debug, &btree_deadlock_ops); From 371650143d173ccdddc07405d5ee55e390a86921 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 18:23:34 -0500 Subject: [PATCH 183/226] bcachefs: wb_key_cmp -> wb_key_ref_cmp Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 16506032670f..d0551a34c8c5 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -17,14 +17,14 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *, static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *); -static inline bool __wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) +static inline bool __wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) { return (cmp_int(l->hi, r->hi) ?: cmp_int(l->mi, r->mi) ?: cmp_int(l->lo, r->lo)) >= 0; } -static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) +static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) { #ifdef CONFIG_X86_64 int cmp; @@ -39,10 +39,10 @@ static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_re : [l] "r" (l), [r] "r" (r) : "rax", "cc"); - EBUG_ON(cmp != __wb_key_cmp(l, r)); + EBUG_ON(cmp != __wb_key_ref_cmp(l, r)); return cmp; #else - return __wb_key_cmp(l, r); + return __wb_key_ref_cmp(l, r); #endif } @@ -87,12 +87,12 @@ static noinline void wb_sort(struct wb_key_ref *base, size_t num) * average, 3/4 worst-case.) */ for (b = a; c = 2*b + 1, (d = c + 1) < n;) - b = wb_key_cmp(base + c, base + d) ? c : d; + b = wb_key_ref_cmp(base + c, base + d) ? c : d; if (d == n) /* Special case last leaf with no sibling */ b = c; /* Now backtrack from "b" to the correct location for "a" */ - while (b != a && wb_key_cmp(base + a, base + b)) + while (b != a && wb_key_ref_cmp(base + a, base + b)) b = (b - 1) / 2; c = b; /* Where "a" belongs */ while (b != a) { /* Shift it into place */ From f412392f6ea3168c0b9730532bc6298c07c4e378 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 20:31:21 -0500 Subject: [PATCH 184/226] bcachefs: __journal_keys_sort() refactoring Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 4c084ce493a4..719a94a84950 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -467,9 +467,7 @@ static void __journal_keys_sort(struct journal_keys *keys) src = dst = keys->d; while (src < keys->d + keys->nr) { while (src + 1 < keys->d + keys->nr && - src[0].btree_id == src[1].btree_id && - src[0].level == src[1].level && - bpos_eq(src[0].k->k.p, src[1].k->k.p)) + !journal_key_cmp(src, src + 1)) src++; *dst++ = *src++; From 8feaebb0ae8882d688b4152c5d693e248d17e623 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 20:26:30 -0500 Subject: [PATCH 185/226] bcachefs: __bch2_journal_key_to_wb -> bch2_journal_key_to_wb_slowpath Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_write_buffer.c | 2 +- fs/bcachefs/btree_write_buffer.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index d0551a34c8c5..5c1169c78daf 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -484,7 +484,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work) bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); } -int __bch2_journal_key_to_wb(struct bch_fs *c, +int bch2_journal_key_to_wb_slowpath(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) { diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 1f645f529ed2..eebcd2b15249 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -29,7 +29,7 @@ struct journal_keys_to_wb { u64 seq; }; -int __bch2_journal_key_to_wb(struct bch_fs *, +int bch2_journal_key_to_wb_slowpath(struct bch_fs *, struct journal_keys_to_wb *, enum btree_id, struct bkey_i *); @@ -40,7 +40,7 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, EBUG_ON(!dst->seq); if (unlikely(!dst->room)) - return __bch2_journal_key_to_wb(c, dst, btree, k); + return bch2_journal_key_to_wb_slowpath(c, dst, btree, k); struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys); wb_k->journal_seq = dst->seq; From f60250de329ae6dbf8aeb49ebb13bf0b79e86a1d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 19:16:14 -0500 Subject: [PATCH 186/226] bcachefs: Fix printing of device durability BCH_MEMBER_DURABILITY() was not present initially; a value of 0 means use the default, nonzero means use v - 1. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 4c19a8096c1d..a44a238bf8b5 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -266,7 +266,7 @@ static void member_to_text(struct printbuf *out, prt_str(out, "Durability:"); prt_tab(out); - prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m)); + prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); prt_newline(out); prt_printf(out, "Discard:"); From 96f37eabe7a5cb4746f369e959f935be464950be Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 31 Dec 2023 10:04:54 -0500 Subject: [PATCH 187/226] bcachefs: factor out thread_with_file, thread_with_stdio thread_with_stdio now knows how to handle input - fsck can now prompt to fix errors. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/bcachefs.h | 20 +- fs/bcachefs/chardev.c | 225 +++----------------- fs/bcachefs/error.c | 86 +++++--- fs/bcachefs/opts.h | 4 +- fs/bcachefs/super.c | 19 +- fs/bcachefs/thread_with_file.c | 296 +++++++++++++++++++++++++++ fs/bcachefs/thread_with_file.h | 41 ++++ fs/bcachefs/thread_with_file_types.h | 16 ++ 9 files changed, 461 insertions(+), 247 deletions(-) create mode 100644 fs/bcachefs/thread_with_file.c create mode 100644 fs/bcachefs/thread_with_file.h create mode 100644 fs/bcachefs/thread_with_file_types.h diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index b81268418174..7423a3557c68 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -82,6 +82,7 @@ bcachefs-y := \ super-io.o \ sysfs.o \ tests.o \ + thread_with_file.o \ trace.o \ two_state_shared_lock.o \ util.o \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 840f605eff1f..ffef6182a477 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -464,6 +464,7 @@ enum bch_time_stats { #include "replicas_types.h" #include "subvolume_types.h" #include "super_types.h" +#include "thread_with_file_types.h" /* Number of nodes btree coalesce will try to coalesce at once */ #define GC_MERGE_NODES 4U @@ -478,12 +479,6 @@ enum bch_time_stats { struct btree; -struct log_output { - spinlock_t lock; - wait_queue_head_t wait; - struct printbuf buf; -}; - enum gc_phase { GC_PHASE_NOT_RUNNING, GC_PHASE_START, @@ -739,8 +734,8 @@ struct bch_fs { struct super_block *vfs_sb; dev_t dev; char name[40]; - struct log_output *output; - struct task_struct *output_filter; + struct stdio_redirect *stdio; + struct task_struct *stdio_filter; /* ro/rw, add/remove/resize devices: */ struct rw_semaphore state_lock; @@ -1252,6 +1247,15 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev) return dev < c->sb.nr_devices && c->devs[dev]; } +static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) +{ + struct stdio_redirect *stdio = c->stdio; + + if (c->stdio_filter && c->stdio_filter != current) + stdio = NULL; + return stdio; +} + #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 22a52bc8406b..46db563e0497 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -11,16 +11,13 @@ #include "replicas.h" #include "super.h" #include "super-io.h" +#include "thread_with_file.h" -#include #include #include -#include #include #include -#include #include -#include #include #include #include @@ -31,65 +28,6 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long return copy_to_user(to, from, n) ? -EFAULT : 0; } -struct thread_with_file { - struct task_struct *task; - int ret; - bool done; -}; - -static void thread_with_file_exit(struct thread_with_file *thr) -{ - if (thr->task) { - kthread_stop(thr->task); - put_task_struct(thr->task); - } -} - -__printf(4, 0) -static int run_thread_with_file(struct thread_with_file *thr, - const struct file_operations *fops, - int (*fn)(void *), const char *fmt, ...) -{ - va_list args; - struct file *file = NULL; - int ret, fd = -1; - struct printbuf name = PRINTBUF; - unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK; - - va_start(args, fmt); - prt_vprintf(&name, fmt, args); - va_end(args); - - thr->ret = 0; - thr->task = kthread_create(fn, thr, name.buf); - ret = PTR_ERR_OR_ZERO(thr->task); - if (ret) - goto err; - - ret = get_unused_fd_flags(fd_flags); - if (ret < 0) - goto err_stop_task; - fd = ret; - - file = anon_inode_getfile(name.buf, fops, thr, fd_flags); - ret = PTR_ERR_OR_ZERO(file); - if (ret) - goto err_put_fd; - - fd_install(fd, file); - get_task_struct(thr->task); - wake_up_process(thr->task); - printbuf_exit(&name); - return fd; -err_put_fd: - put_unused_fd(fd); -err_stop_task: - kthread_stop(thr->task); -err: - printbuf_exit(&name); - return ret; -} - /* returns with ref on ca->ref */ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, unsigned flags) @@ -200,132 +138,33 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg #endif struct fsck_thread { - struct thread_with_file thr; - struct printbuf buf; + struct thread_with_stdio thr; struct bch_fs *c; char **devs; size_t nr_devs; struct bch_opts opts; - - struct log_output output; - DARRAY(char) output2; }; -static void bch2_fsck_thread_free(struct fsck_thread *thr) +static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) { - thread_with_file_exit(&thr->thr); + struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); if (thr->devs) for (size_t i = 0; i < thr->nr_devs; i++) kfree(thr->devs[i]); - darray_exit(&thr->output2); - printbuf_exit(&thr->output.buf); kfree(thr->devs); kfree(thr); } -static int bch2_fsck_thread_release(struct inode *inode, struct file *file) -{ - struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); - - bch2_fsck_thread_free(thr); - return 0; -} - -static bool fsck_thread_ready(struct fsck_thread *thr) -{ - return thr->output.buf.pos || - thr->output2.nr || - thr->thr.done; -} - -static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); - size_t copied = 0, b; - int ret = 0; - - if ((file->f_flags & O_NONBLOCK) && - !fsck_thread_ready(thr)) - return -EAGAIN; - - ret = wait_event_interruptible(thr->output.wait, - fsck_thread_ready(thr)); - if (ret) - return ret; - - if (thr->thr.done) - return 0; - - while (len) { - ret = darray_make_room(&thr->output2, thr->output.buf.pos); - if (ret) - break; - - spin_lock_irq(&thr->output.lock); - b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos); - - memcpy(&darray_top(thr->output2), thr->output.buf.buf, b); - memmove(thr->output.buf.buf, - thr->output.buf.buf + b, - thr->output.buf.pos - b); - - thr->output2.nr += b; - thr->output.buf.pos -= b; - spin_unlock_irq(&thr->output.lock); - - b = min(len, thr->output2.nr); - if (!b) - break; - - b -= copy_to_user(buf, thr->output2.data, b); - if (!b) { - ret = -EFAULT; - break; - } - - copied += b; - buf += b; - len -= b; - - memmove(thr->output2.data, - thr->output2.data + b, - thr->output2.nr - b); - thr->output2.nr -= b; - } - - return copied ?: ret; -} - -static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait) -{ - struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); - - poll_wait(file, &thr->output.wait, wait); - - return fsck_thread_ready(thr) - ? EPOLLIN|EPOLLHUP - : 0; -} - -static const struct file_operations fsck_thread_ops = { - .release = bch2_fsck_thread_release, - .read = bch2_fsck_thread_read, - .poll = bch2_fsck_thread_poll, - .llseek = no_llseek, -}; - static int bch2_fsck_offline_thread_fn(void *arg) { struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr); struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts); - thr->thr.ret = PTR_ERR_OR_ZERO(c); - if (!thr->thr.ret) + thr->thr.thr.ret = PTR_ERR_OR_ZERO(c); + if (!thr->thr.thr.ret) bch2_fs_stop(c); - thr->thr.done = true; - wake_up(&thr->output.wait); + thread_with_stdio_done(&thr->thr); return 0; } @@ -354,11 +193,6 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a thr->opts = bch2_opts_empty(); thr->nr_devs = arg.nr_devs; - thr->output.buf = PRINTBUF; - thr->output.buf.atomic++; - spin_lock_init(&thr->output.lock); - init_waitqueue_head(&thr->output.wait); - darray_init(&thr->output2); if (copy_from_user(devs, &user_arg->devs[0], array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) { @@ -384,16 +218,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a goto err; } - opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output); + opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); - ret = run_thread_with_file(&thr->thr, - &fsck_thread_ops, - bch2_fsck_offline_thread_fn, - "bch-fsck"); + ret = bch2_run_thread_with_stdio(&thr->thr, + bch2_fsck_thread_exit, + bch2_fsck_offline_thread_fn); err: if (ret < 0) { if (thr) - bch2_fsck_thread_free(thr); + bch2_fsck_thread_exit(&thr->thr); pr_err("ret %s", bch2_err_str(ret)); } kfree(devs); @@ -592,7 +425,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file) { struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); - thread_with_file_exit(&ctx->thr); + bch2_thread_with_file_exit(&ctx->thr); kfree(ctx); return 0; } @@ -642,10 +475,9 @@ static long bch2_ioctl_data(struct bch_fs *c, ctx->c = c; ctx->arg = arg; - ret = run_thread_with_file(&ctx->thr, - &bcachefs_data_ops, - bch2_data_thread, - "bch-data/%s", c->name); + ret = bch2_run_thread_with_file(&ctx->thr, + &bcachefs_data_ops, + bch2_data_thread); if (ret < 0) kfree(ctx); return ret; @@ -936,8 +768,8 @@ static int bch2_fsck_online_thread_fn(void *arg) struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr); struct bch_fs *c = thr->c; - c->output_filter = current; - c->output = &thr->output; + c->stdio_filter = current; + c->stdio = &thr->thr.stdio; /* * XXX: can we figure out a way to do this without mucking with c->opts? @@ -949,11 +781,10 @@ static int bch2_fsck_online_thread_fn(void *arg) c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; bch2_run_online_recovery_passes(c); - c->output = NULL; - c->output_filter = NULL; + c->stdio = NULL; + c->stdio_filter = NULL; - thr->thr.done = true; - wake_up(&thr->output.wait); + thread_with_stdio_done(&thr->thr); up(&c->online_fsck_mutex); bch2_ro_ref_put(c); @@ -988,11 +819,6 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c, thr->c = c; thr->opts = bch2_opts_empty(); - thr->output.buf = PRINTBUF; - thr->output.buf.atomic++; - spin_lock_init(&thr->output.lock); - init_waitqueue_head(&thr->output.wait); - darray_init(&thr->output2); if (arg.opts) { char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); @@ -1005,15 +831,14 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c, goto err; } - ret = run_thread_with_file(&thr->thr, - &fsck_thread_ops, - bch2_fsck_online_thread_fn, - "bch-fsck"); + ret = bch2_run_thread_with_stdio(&thr->thr, + bch2_fsck_thread_exit, + bch2_fsck_online_thread_fn); err: if (ret < 0) { bch_err_fn(c, ret); if (thr) - bch2_fsck_thread_free(thr); + bch2_fsck_thread_exit(&thr->thr); up(&c->online_fsck_mutex); bch2_ro_ref_put(c); } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index aa4f7f4925f6..8a8bcbcdff2a 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "error.h" #include "super.h" +#include "thread_with_file.h" #define FSCK_ERR_RATELIMIT_NR 10 @@ -69,29 +70,11 @@ enum ask_yn { YN_ALLYES, }; -#ifdef __KERNEL__ -#define bch2_fsck_ask_yn() YN_NO -#else - -#include "tools-util.h" - -enum ask_yn bch2_fsck_ask_yn(void) +static enum ask_yn parse_yn_response(char *buf) { - char *buf = NULL; - size_t buflen = 0; - bool ret; - - while (true) { - fputs(" (y,n, or Y,N for all errors of this type) ", stdout); - fflush(stdout); - - if (getline(&buf, &buflen, stdin) < 0) - die("error reading from standard input"); - - strim(buf); - if (strlen(buf) != 1) - continue; + buf = strim(buf); + if (strlen(buf) == 1) switch (buf[0]) { case 'n': return YN_NO; @@ -102,7 +85,51 @@ enum ask_yn bch2_fsck_ask_yn(void) case 'Y': return YN_ALLYES; } - } + return -1; +} + +#ifdef __KERNEL__ +static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) +{ + struct stdio_redirect *stdio = c->stdio; + + if (c->stdio_filter && c->stdio_filter != current) + stdio = NULL; + + if (!stdio) + return YN_NO; + + char buf[100]; + int ret; + + do { + bch2_print(c, " (y,n, or Y,N for all errors of this type) "); + + int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); + if (r < 0) + return YN_NO; + buf[r] = '\0'; + } while ((ret = parse_yn_response(buf)) < 0); + + return ret; +} +#else + +#include "tools-util.h" + +static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) +{ + char *buf = NULL; + size_t buflen = 0; + int ret; + + do { + fputs(" (y,n, or Y,N for all errors of this type) ", stdout); + fflush(stdout); + + if (getline(&buf, &buflen, stdin) < 0) + die("error reading from standard input"); + } while ((ret = parse_yn_response(buf)) < 0); free(buf); return ret; @@ -221,10 +248,13 @@ int bch2_fsck_err(struct bch_fs *c, int ask; prt_str(out, ": fix?"); - bch2_print_string_as_lines(KERN_ERR, out->buf); + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s", out->buf); + else + bch2_print_string_as_lines(KERN_ERR, out->buf); print = false; - ask = bch2_fsck_ask_yn(); + ask = bch2_fsck_ask_yn(c); if (ask >= YN_ALLNO && s) s->fix = ask == YN_ALLNO @@ -253,8 +283,12 @@ int bch2_fsck_err(struct bch_fs *c, !(flags & FSCK_CAN_IGNORE))) ret = -BCH_ERR_fsck_errors_not_fixed; - if (print) - bch2_print_string_as_lines(KERN_ERR, out->buf); + if (print) { + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s\n", out->buf); + else + bch2_print_string_as_lines(KERN_ERR, out->buf); + } if (!test_bit(BCH_FS_fsck_done, &c->flags) && (ret != -BCH_ERR_fsck_fix && diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 42cad83efb48..a6f5185ecd67 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -414,11 +414,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Allocate the buckets_nouse bitmap") \ - x(log_output, u64, \ + x(stdio, u64, \ 0, \ OPT_UINT(0, S64_MAX), \ BCH2_NO_SB_OPT, false, \ - NULL, "Pointer to a struct log_output") \ + NULL, "Pointer to a struct stdio_redirect") \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0f3a924ca1f9..64ff7da49860 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -88,14 +88,11 @@ const char * const bch2_fs_flag_strs[] = { void __bch2_print(struct bch_fs *c, const char *fmt, ...) { - struct log_output *output = c->output; + struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); + va_list args; - - if (c->output_filter && c->output_filter != current) - output = NULL; - va_start(args, fmt); - if (likely(!output)) { + if (likely(!stdio)) { vprintk(fmt, args); } else { unsigned long flags; @@ -103,11 +100,11 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...) if (fmt[0] == KERN_SOH[0]) fmt += 2; - spin_lock_irqsave(&output->lock, flags); - prt_vprintf(&output->buf, fmt, args); - spin_unlock_irqrestore(&output->lock, flags); + spin_lock_irqsave(&stdio->output_lock, flags); + prt_vprintf(&stdio->output_buf, fmt, args); + spin_unlock_irqrestore(&stdio->output_lock, flags); - wake_up(&output->wait); + wake_up(&stdio->output_wait); } va_end(args); } @@ -724,7 +721,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto out; } - c->output = (void *)(unsigned long) opts.log_output; + c->stdio = (void *)(unsigned long) opts.stdio; __module_get(THIS_MODULE); diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c new file mode 100644 index 000000000000..b24baeabf998 --- /dev/null +++ b/fs/bcachefs/thread_with_file.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "printbuf.h" +#include "thread_with_file.h" + +#include +#include +#include +#include +#include + +void bch2_thread_with_file_exit(struct thread_with_file *thr) +{ + if (thr->task) { + kthread_stop(thr->task); + put_task_struct(thr->task); + } +} + +int bch2_run_thread_with_file(struct thread_with_file *thr, + const struct file_operations *fops, + int (*fn)(void *)) +{ + struct file *file = NULL; + int ret, fd = -1; + unsigned fd_flags = O_CLOEXEC; + + if (fops->read && fops->write) + fd_flags |= O_RDWR; + else if (fops->read) + fd_flags |= O_RDONLY; + else if (fops->write) + fd_flags |= O_WRONLY; + + char name[TASK_COMM_LEN]; + get_task_comm(name, current); + + thr->ret = 0; + thr->task = kthread_create(fn, thr, "%s", name); + ret = PTR_ERR_OR_ZERO(thr->task); + if (ret) + return ret; + + ret = get_unused_fd_flags(fd_flags); + if (ret < 0) + goto err; + fd = ret; + + file = anon_inode_getfile(name, fops, thr, fd_flags); + ret = PTR_ERR_OR_ZERO(file); + if (ret) + goto err; + + fd_install(fd, file); + get_task_struct(thr->task); + wake_up_process(thr->task); + return fd; +err: + if (fd >= 0) + put_unused_fd(fd); + if (thr->task) + kthread_stop(thr->task); + return ret; +} + +static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr) +{ + return thr->stdio.output_buf.pos || + thr->output2.nr || + thr->thr.done; +} + +static ssize_t thread_with_stdio_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + size_t copied = 0, b; + int ret = 0; + + if ((file->f_flags & O_NONBLOCK) && + !thread_with_stdio_has_output(thr)) + return -EAGAIN; + + ret = wait_event_interruptible(thr->stdio.output_wait, + thread_with_stdio_has_output(thr)); + if (ret) + return ret; + + if (thr->thr.done) + return 0; + + while (len) { + ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos); + if (ret) + break; + + spin_lock_irq(&thr->stdio.output_lock); + b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos); + + memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b); + memmove(thr->stdio.output_buf.buf, + thr->stdio.output_buf.buf + b, + thr->stdio.output_buf.pos - b); + + thr->output2.nr += b; + thr->stdio.output_buf.pos -= b; + spin_unlock_irq(&thr->stdio.output_lock); + + b = min(len, thr->output2.nr); + if (!b) + break; + + b -= copy_to_user(buf, thr->output2.data, b); + if (!b) { + ret = -EFAULT; + break; + } + + copied += b; + buf += b; + len -= b; + + memmove(thr->output2.data, + thr->output2.data + b, + thr->output2.nr - b); + thr->output2.nr -= b; + } + + return copied ?: ret; +} + +static int thread_with_stdio_release(struct inode *inode, struct file *file) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + + bch2_thread_with_file_exit(&thr->thr); + printbuf_exit(&thr->stdio.input_buf); + printbuf_exit(&thr->stdio.output_buf); + darray_exit(&thr->output2); + thr->exit(thr); + return 0; +} + +#define WRITE_BUFFER 4096 + +static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr) +{ + return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done; +} + +static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + struct printbuf *buf = &thr->stdio.input_buf; + size_t copied = 0; + ssize_t ret = 0; + + while (len) { + if (thr->thr.done) { + ret = -EPIPE; + break; + } + + size_t b = len - fault_in_readable(ubuf, len); + if (!b) { + ret = -EFAULT; + break; + } + + spin_lock(&thr->stdio.input_lock); + if (buf->pos < WRITE_BUFFER) + bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos)); + b = min(len, printbuf_remaining_size(buf)); + + if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) { + ubuf += b; + len -= b; + copied += b; + buf->pos += b; + } + spin_unlock(&thr->stdio.input_lock); + + if (b) { + wake_up(&thr->stdio.input_wait); + } else { + if ((file->f_flags & O_NONBLOCK)) { + ret = -EAGAIN; + break; + } + + ret = wait_event_interruptible(thr->stdio.input_wait, + thread_with_stdio_has_input_space(thr)); + if (ret) + break; + } + } + + return copied ?: ret; +} + +static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_struct *wait) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + + poll_wait(file, &thr->stdio.output_wait, wait); + poll_wait(file, &thr->stdio.input_wait, wait); + + __poll_t mask = 0; + + if (thread_with_stdio_has_output(thr)) + mask |= EPOLLIN; + if (thread_with_stdio_has_input_space(thr)) + mask |= EPOLLOUT; + if (thr->thr.done) + mask |= EPOLLHUP|EPOLLERR; + return mask; +} + +static const struct file_operations thread_with_stdio_fops = { + .release = thread_with_stdio_release, + .read = thread_with_stdio_read, + .write = thread_with_stdio_write, + .poll = thread_with_stdio_poll, + .llseek = no_llseek, +}; + +int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, + void (*exit)(struct thread_with_stdio *), + int (*fn)(void *)) +{ + thr->stdio.input_buf = PRINTBUF; + thr->stdio.input_buf.atomic++; + spin_lock_init(&thr->stdio.input_lock); + init_waitqueue_head(&thr->stdio.input_wait); + + thr->stdio.output_buf = PRINTBUF; + thr->stdio.output_buf.atomic++; + spin_lock_init(&thr->stdio.output_lock); + init_waitqueue_head(&thr->stdio.output_wait); + + darray_init(&thr->output2); + thr->exit = exit; + + return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn); +} + +int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len) +{ + wait_event(stdio->input_wait, + stdio->input_buf.pos || stdio->done); + + if (stdio->done) + return -1; + + spin_lock(&stdio->input_lock); + int ret = min(len, stdio->input_buf.pos); + stdio->input_buf.pos -= ret; + memcpy(buf, stdio->input_buf.buf, ret); + memmove(stdio->input_buf.buf, + stdio->input_buf.buf + ret, + stdio->input_buf.pos); + spin_unlock(&stdio->input_lock); + + wake_up(&stdio->input_wait); + return ret; +} + +int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len) +{ + wait_event(stdio->input_wait, + stdio->input_buf.pos || stdio->done); + + if (stdio->done) + return -1; + + spin_lock(&stdio->input_lock); + int ret = min(len, stdio->input_buf.pos); + char *n = memchr(stdio->input_buf.buf, '\n', ret); + if (n) + ret = min(ret, n + 1 - stdio->input_buf.buf); + stdio->input_buf.pos -= ret; + memcpy(buf, stdio->input_buf.buf, ret); + memmove(stdio->input_buf.buf, + stdio->input_buf.buf + ret, + stdio->input_buf.pos); + spin_unlock(&stdio->input_lock); + + wake_up(&stdio->input_wait); + return ret; +} diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h new file mode 100644 index 000000000000..05879c5048c8 --- /dev/null +++ b/fs/bcachefs/thread_with_file.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_THREAD_WITH_FILE_H +#define _BCACHEFS_THREAD_WITH_FILE_H + +#include "thread_with_file_types.h" + +struct task_struct; + +struct thread_with_file { + struct task_struct *task; + int ret; + bool done; +}; + +void bch2_thread_with_file_exit(struct thread_with_file *); +int bch2_run_thread_with_file(struct thread_with_file *, + const struct file_operations *, + int (*fn)(void *)); + +struct thread_with_stdio { + struct thread_with_file thr; + struct stdio_redirect stdio; + DARRAY(char) output2; + void (*exit)(struct thread_with_stdio *); +}; + +static inline void thread_with_stdio_done(struct thread_with_stdio *thr) +{ + thr->thr.done = true; + thr->stdio.done = true; + wake_up(&thr->stdio.input_wait); + wake_up(&thr->stdio.output_wait); +} + +int bch2_run_thread_with_stdio(struct thread_with_stdio *, + void (*exit)(struct thread_with_stdio *), + int (*fn)(void *)); +int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t); +int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t); + +#endif /* _BCACHEFS_THREAD_WITH_FILE_H */ diff --git a/fs/bcachefs/thread_with_file_types.h b/fs/bcachefs/thread_with_file_types.h new file mode 100644 index 000000000000..90b5e645e98c --- /dev/null +++ b/fs/bcachefs/thread_with_file_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H +#define _BCACHEFS_THREAD_WITH_FILE_TYPES_H + +struct stdio_redirect { + spinlock_t output_lock; + wait_queue_head_t output_wait; + struct printbuf output_buf; + + spinlock_t input_lock; + wait_queue_head_t input_wait; + struct printbuf input_buf; + bool done; +}; + +#endif /* _BCACHEFS_THREAD_WITH_FILE_TYPES_H */ From eff1f728bedc014c783752af5d2a88c46586f654 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 17:08:58 -0500 Subject: [PATCH 188/226] bcachefs: Upgrading uses bch_sb.recovery_passes_required Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d2914e911461..066aef6f3c74 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -575,7 +575,7 @@ u64 bch2_recovery_passes_from_stable(u64 v) return ret; } -static bool check_version_upgrade(struct bch_fs *c) +static u64 check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; @@ -626,14 +626,12 @@ static bool check_version_upgrade(struct bch_fs *c) u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); if (recovery_passes) { - if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK) - prt_str(&buf, "fsck required"); - else { - prt_str(&buf, "running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, recovery_passes); - } + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, recovery_passes); - c->recovery_passes_explicit |= recovery_passes; + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(recovery_passes)); c->opts.fix_errors = FSCK_FIX_yes; } From 717296c34c8d9d13d7aad4d710b0c3bdb285783b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 23:19:09 -0500 Subject: [PATCH 189/226] bcachefs: trans_mark now takes bkey_s Prep work for disk space accounting rewrite: we're going to want to use a single callback for both of our current triggers, so we need to change them to have the same type signature first. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 22 +++++++++++----------- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/bkey_methods.h | 12 ++++++------ fs/bcachefs/btree_gc.c | 2 +- fs/bcachefs/btree_trans_commit.c | 4 ++-- fs/bcachefs/btree_update_interior.c | 4 ++-- fs/bcachefs/buckets.c | 18 +++++++++--------- fs/bcachefs/buckets.h | 8 ++++---- fs/bcachefs/inode.c | 8 ++++---- fs/bcachefs/inode.h | 2 +- fs/bcachefs/reflink.c | 28 ++++++++++++++-------------- fs/bcachefs/reflink.h | 14 +++++++------- 12 files changed, 62 insertions(+), 62 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1a127b0a08b3..1446501258ab 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -751,7 +751,7 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, int bch2_trans_mark_alloc(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { struct bch_fs *c = trans->c; @@ -764,10 +764,10 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, * Deletion only happens in the device removal path, with * BTREE_TRIGGER_NORUN: */ - BUG_ON(new->k.type != KEY_TYPE_alloc_v4); + BUG_ON(new.k->type != KEY_TYPE_alloc_v4); old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = &bkey_i_to_alloc_v4(new)->v; + new_a = bkey_s_to_alloc_v4(new).v; new_a->data_type = alloc_data_type(*new_a, new_a->data_type); @@ -780,7 +780,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, if (data_type_is_empty(new_a->data_type) && BCH_ALLOC_V4_NEED_INC_GEN(new_a) && - !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) { + !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { new_a->gen++; SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); } @@ -789,7 +789,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, (new_a->data_type == BCH_DATA_free && alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { ret = bch2_bucket_do_index(trans, old, old_a, false) ?: - bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true); + bch2_bucket_do_index(trans, new.s_c, new_a, true); if (ret) return ret; } @@ -802,27 +802,27 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, new_lru = alloc_lru_idx_read(*new_a); if (old_lru != new_lru) { - ret = bch2_lru_change(trans, new->k.p.inode, - bucket_to_u64(new->k.p), + ret = bch2_lru_change(trans, new.k->p.inode, + bucket_to_u64(new.k->p), old_lru, new_lru); if (ret) return ret; } new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, - bch_dev_bkey_exists(c, new->k.p.inode)); + bch_dev_bkey_exists(c, new.k->p.inode)); if (old_a->fragmentation_lru != new_a->fragmentation_lru) { ret = bch2_lru_change(trans, BCH_LRU_FRAGMENTATION_START, - bucket_to_u64(new->k.p), + bucket_to_u64(new.k->p), old_a->fragmentation_lru, new_a->fragmentation_lru); if (ret) return ret; } if (old_a->gen != new_a->gen) { - ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen); + ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); if (ret) return ret; } @@ -834,7 +834,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && old_a->cached_sectors) { - ret = bch2_update_cached_sectors_list(trans, new->k.p.inode, + ret = bch2_update_cached_sectors_list(trans, new.k->p.inode, -((s64) old_a->cached_sectors)); if (ret) return ret; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 96671f166dd8..7be590a4fa81 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -233,7 +233,7 @@ static inline bool bkey_is_alloc(const struct bkey *k) int bch2_alloc_read(struct bch_fs *); int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_do_discards(struct bch_fs *); diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 912adadfb4dd..fa68e28c4f82 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -29,7 +29,7 @@ struct bkey_ops { bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); void (*compat)(enum btree_id id, unsigned version, @@ -120,10 +120,10 @@ enum btree_update_flags { static inline int bch2_trans_mark_key(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { - const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type); + const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); return ops->trans_trigger ? ops->trans_trigger(trans, btree_id, level, old, new, flags) @@ -139,18 +139,18 @@ static inline int bch2_trans_mark_old(struct btree_trans *trans, bkey_init(&deleted.k); deleted.k.p = old.k->p; - return bch2_trans_mark_key(trans, btree_id, level, old, &deleted, + return bch2_trans_mark_key(trans, btree_id, level, old, bkey_i_to_s(&deleted), BTREE_TRIGGER_OVERWRITE|flags); } static inline int bch2_trans_mark_new(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_i *new, unsigned flags) + struct bkey_s new, unsigned flags) { struct bkey_i deleted; bkey_init(&deleted.k); - deleted.k.p = new->k.p; + deleted.k.p = new.k->p; return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, BTREE_TRIGGER_INSERT|flags); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 9f27cb3ea563..3143544ca24c 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1589,7 +1589,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, if (!r->refcount) new->k.type = KEY_TYPE_deleted; else - *bkey_refcount(new) = cpu_to_le64(r->refcount); + *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); } fsck_err: printbuf_exit(&buf); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 6e872c817dce..b62eba04e0be 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -494,7 +494,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ old_ops->trans_trigger == new_ops->trans_trigger) { i->overwrite_trigger_run = true; i->insert_trigger_run = true; - return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, + return bch2_trans_mark_key(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), BTREE_TRIGGER_INSERT| BTREE_TRIGGER_OVERWRITE| i->flags) ?: 1; @@ -503,7 +503,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1; } else if (!overwrite && !i->insert_trigger_run) { i->insert_trigger_run = true; - return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1; + return bch2_trans_mark_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), i->flags) ?: 1; } else { return 0; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 2a93eb92d112..7447949abbb0 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -576,7 +576,7 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, for_each_keylist_key(&as->new_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0); + ret = bch2_trans_mark_new(trans, as->btree_id, level, bkey_i_to_s(k), 0); if (ret) return ret; } @@ -2162,7 +2162,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, return ret; ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1, - new_key, 0); + bkey_i_to_s(new_key), 0); if (ret) return ret; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index c0dac04253f7..e96e41891878 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1423,15 +1423,15 @@ static int __trans_mark_extent(struct btree_trans *trans, int bch2_trans_mark_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) - + int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - (int) bch2_bkey_needs_rebalance(c, old); if (mod) { - int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0); + int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0); if (ret) return ret; } @@ -1519,7 +1519,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, int bch2_trans_mark_stripe(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { const struct bch_stripe *old_s = NULL; @@ -1530,8 +1530,8 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, if (old.k->type == KEY_TYPE_stripe) old_s = bkey_s_c_to_stripe(old).v; - if (new->k.type == KEY_TYPE_stripe) - new_s = &bkey_i_to_stripe(new)->v; + if (new.k->type == KEY_TYPE_stripe) + new_s = bkey_s_to_stripe(new).v; /* * If the pointers aren't changing, we don't need to do anything: @@ -1552,7 +1552,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, if (new_s) { s64 sectors = le16_to_cpu(new_s->sectors); - bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new)); + bch2_bkey_to_replicas(&r.e, new.s_c); ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); if (ret) return ret; @@ -1576,7 +1576,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, if (new_s) { ret = bch2_trans_mark_stripe_bucket(trans, - bkey_i_to_s_c_stripe(new), i, false); + bkey_s_to_stripe(new).c, i, false); if (ret) break; } @@ -1620,7 +1620,7 @@ static int __trans_mark_reservation(struct btree_trans *trans, int bch2_trans_mark_reservation(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, - struct bkey_i *new, + struct bkey_s new, unsigned flags) { return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 379101d7e585..c7168b86bb2e 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -336,9 +336,9 @@ int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); #define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ @@ -351,7 +351,7 @@ int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, s }) #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ - mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags) + mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new.s_c, _flags) void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index c39844b8e596..32e0736ba718 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -564,12 +564,12 @@ static inline bool bkey_is_deleted_inode(struct bkey_s_c k) int bch2_trans_mark_inode(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, - struct bkey_i *new, + struct bkey_s new, unsigned flags) { - int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); + s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); bool old_deleted = bkey_is_deleted_inode(old); - bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new)); + bool new_deleted = bkey_is_deleted_inode(new.s_c); if (nr) { int ret = bch2_replicas_deltas_realloc(trans, 0); @@ -582,7 +582,7 @@ int bch2_trans_mark_inode(struct btree_trans *trans, } if (old_deleted != new_deleted) { - int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted); + int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted); if (ret) return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 88818a332b1e..7533a16a78cb 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -18,7 +18,7 @@ int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 9f9c8a244c80..aa2ff7ee801f 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -93,7 +93,7 @@ static int trans_mark_reflink_p_segment(struct btree_trans *trans, if (ret) goto err; - refcount = bkey_refcount(k); + refcount = bkey_refcount(bkey_i_to_s(k)); if (!refcount) { bch2_bkey_val_to_text(&buf, c, p.s_c); bch2_trans_inconsistent(trans, @@ -161,11 +161,11 @@ static int __trans_mark_reflink_p(struct btree_trans *trans, int bch2_trans_mark_reflink_p(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, - struct bkey_i *new, + struct bkey_s new, unsigned flags) { if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; + struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v; v->front_pad = v->back_pad = 0; } @@ -305,29 +305,29 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r } #endif -static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags) +static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *flags) { if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { - new->k.type = KEY_TYPE_deleted; - new->k.size = 0; - set_bkey_val_u64s(&new->k, 0); + new.k->type = KEY_TYPE_deleted; + new.k->size = 0; + set_bkey_val_u64s(new.k, 0); *flags &= ~BTREE_TRIGGER_INSERT; } } int bch2_trans_mark_reflink_v(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { check_indirect_extent_deleting(new, &flags); if (old.k->type == KEY_TYPE_reflink_v && - new->k.type == KEY_TYPE_reflink_v && - old.k->u64s == new->k.u64s && + new.k->type == KEY_TYPE_reflink_v && + old.k->u64s == new.k->u64s && !memcmp(bkey_s_c_to_reflink_v(old).v->start, - bkey_i_to_reflink_v(new)->v.start, - bkey_val_bytes(&new->k) - 8)) + bkey_s_to_reflink_v(new).v->start, + bkey_val_bytes(new.k) - 8)) return 0; return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); @@ -355,7 +355,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out, int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { check_indirect_extent_deleting(new, &flags); @@ -398,7 +398,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - refcount = bkey_refcount(r_v); + refcount = bkey_refcount(bkey_i_to_s(r_v)); *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 6cc9c4a77265..fa2ec7f8314a 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -10,7 +10,7 @@ void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); @@ -28,7 +28,7 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ .key_invalid = bch2_reflink_v_invalid, \ @@ -45,7 +45,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trans_mark_indirect_inline_data(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ @@ -67,13 +67,13 @@ static inline const __le64 *bkey_refcount_c(struct bkey_s_c k) } } -static inline __le64 *bkey_refcount(struct bkey_i *k) +static inline __le64 *bkey_refcount(struct bkey_s k) { - switch (k->k.type) { + switch (k.k->type) { case KEY_TYPE_reflink_v: - return &bkey_i_to_reflink_v(k)->v.refcount; + return &bkey_s_to_reflink_v(k).v->refcount; case KEY_TYPE_indirect_inline_data: - return &bkey_i_to_indirect_inline_data(k)->v.refcount; + return &bkey_s_to_indirect_inline_data(k).v->refcount; default: return NULL; } From ad00bce07da8138c08f6585f153412b65c99b064 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 23:19:09 -0500 Subject: [PATCH 190/226] bcachefs: mark now takes bkey_s Prep work for disk space accounting rewrite: we're going to want to use a single callback for both of our current triggers, so we need to change them to have the same type signature first. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.h | 4 ++-- fs/bcachefs/btree_gc.c | 10 +++++++++- fs/bcachefs/btree_trans_commit.c | 6 +++--- fs/bcachefs/buckets.c | 11 ++++++----- fs/bcachefs/buckets.h | 12 ++++++------ fs/bcachefs/inode.c | 7 ++----- fs/bcachefs/inode.h | 2 +- fs/bcachefs/reflink.c | 2 +- fs/bcachefs/reflink.h | 2 +- fs/bcachefs/snapshot.c | 14 +++++++++++--- fs/bcachefs/snapshot.h | 2 +- 11 files changed, 43 insertions(+), 29 deletions(-) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index fa68e28c4f82..da934d8f75fe 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -31,7 +31,7 @@ struct bkey_ops { int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); void (*compat)(enum btree_id id, unsigned version, unsigned big_endian, int write, struct bkey_s); @@ -80,7 +80,7 @@ bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); static inline int bch2_mark_key(struct btree_trans *trans, enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 3143544ca24c..88e069d7a7bb 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -41,6 +41,14 @@ #define DROP_THIS_NODE 10 #define DROP_PREV_NODE 11 +static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) +{ + return (struct bkey_s) {{{ + (struct bkey *) k.k, + (struct bch_val *) k.v + }}}; +} + static bool should_restart_for_topology_repair(struct bch_fs *c) { return c->opts.fix_errors != FSCK_FIX_no && @@ -829,7 +837,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, } ret = commit_do(trans, NULL, NULL, 0, - bch2_mark_key(trans, btree_id, level, old, *k, flags)); + bch2_mark_key(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), flags)); fsck_err: err: bch_err_fn(c, ret); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index b62eba04e0be..5b5cc3f44a82 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -453,14 +453,14 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (old_ops->atomic_trigger == new_ops->atomic_trigger) { ret = bch2_mark_key(trans, i->btree_id, i->level, - old, bkey_i_to_s_c(new), + old, bkey_i_to_s(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); } else { struct bkey _deleted = POS_KEY((trans->paths + i->path)->pos); - struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; + struct bkey_s deleted = (struct bkey_s) { { { &_deleted, NULL } } }; ret = bch2_mark_key(trans, i->btree_id, i->level, - deleted, bkey_i_to_s_c(new), + deleted.s_c, bkey_i_to_s(new), BTREE_TRIGGER_INSERT|flags) ?: bch2_mark_key(trans, i->btree_id, i->level, old, deleted, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e96e41891878..7099a7db58a8 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -498,7 +498,7 @@ int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 int bch2_mark_alloc(struct btree_trans *trans, enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; @@ -524,7 +524,7 @@ int bch2_mark_alloc(struct btree_trans *trans, ca = bch_dev_bkey_exists(c, new.k->p.inode); old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = bch2_alloc_to_v4(new, &new_a_convert); + new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); bucket_journal_seq = new_a->journal_seq; @@ -1008,7 +1008,7 @@ static int __mark_extent(struct btree_trans *trans, int bch2_mark_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); @@ -1016,9 +1016,10 @@ int bch2_mark_extent(struct btree_trans *trans, int bch2_mark_stripe(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s _new, unsigned flags) { + struct bkey_s_c new = _new.s_c; bool gc = flags & BTREE_TRIGGER_GC; u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; @@ -1153,7 +1154,7 @@ static int __mark_reservation(struct btree_trans *trans, int bch2_mark_reservation(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index c7168b86bb2e..de1a28dd0e07 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -328,13 +328,13 @@ int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, struct gc_pos, unsigned); int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); @@ -346,12 +346,12 @@ int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, s if (_old.k->type) \ ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ if (!ret && _new.k->type) \ - ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \ + ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\ ret; \ }) #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ - mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new.s_c, _flags) + mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 32e0736ba718..e6978005bafd 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -592,7 +592,7 @@ int bch2_trans_mark_inode(struct btree_trans *trans, int bch2_mark_inode(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { struct bch_fs *c = trans->c; @@ -600,12 +600,9 @@ int bch2_mark_inode(struct btree_trans *trans, u64 journal_seq = trans->journal_res.seq; if (flags & BTREE_TRIGGER_INSERT) { - struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; - BUG_ON(!journal_seq); - BUG_ON(new.k->type != KEY_TYPE_inode_v3); - v->bi_journal_seq = cpu_to_le64(journal_seq); + bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(journal_seq); } if (flags & BTREE_TRIGGER_GC) { diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 7533a16a78cb..f1838dccfb3c 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -20,7 +20,7 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index aa2ff7ee801f..5c1ae3be8e48 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -268,7 +268,7 @@ static int __mark_reflink_p(struct btree_trans *trans, int bch2_mark_reflink_p(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index fa2ec7f8314a..130b44625c85 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -12,7 +12,7 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ .key_invalid = bch2_reflink_p_invalid, \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96df4052ff7b..b2306fabd1c9 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -276,7 +276,7 @@ static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) mutex_unlock(&c->snapshot_table_lock); } -int bch2_mark_snapshot(struct btree_trans *trans, +static int __bch2_mark_snapshot(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_s_c new, unsigned flags) @@ -330,6 +330,14 @@ int bch2_mark_snapshot(struct btree_trans *trans, return ret; } +int bch2_mark_snapshot(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) +{ + return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags); +} + int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot *s) { @@ -1055,7 +1063,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, + ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); if (ret) goto err; @@ -1664,7 +1672,7 @@ int bch2_snapshots_read(struct bch_fs *c) int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, - bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: + __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: bch2_snapshot_set_equiv(trans, k) ?: bch2_check_snapshot_needs_deletion(trans, k)) ?: for_each_btree_key(trans, iter, BTREE_ID_snapshots, diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 94f35b2cfbb3..bf5b200d189b 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -22,7 +22,7 @@ void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ .key_invalid = bch2_snapshot_invalid, \ From 089e311347ebe347a79545ca117a02811623c91e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 23:44:50 -0500 Subject: [PATCH 191/226] bcachefs: Kill BTREE_TRIGGER_NOATOMIC dead code Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.h | 2 -- fs/bcachefs/btree_gc.c | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index da934d8f75fe..896020055097 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -102,7 +102,6 @@ enum btree_update_flags { __BTREE_TRIGGER_GC, __BTREE_TRIGGER_BUCKET_INVALIDATE, - __BTREE_TRIGGER_NOATOMIC, }; #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) @@ -116,7 +115,6 @@ enum btree_update_flags { #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) -#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) static inline int bch2_trans_mark_key(struct btree_trans *trans, enum btree_id btree_id, unsigned level, diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 88e069d7a7bb..7858c4cccf3d 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -813,9 +813,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct bch_fs *c = trans->c; struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; - unsigned flags = - BTREE_TRIGGER_GC| - (initial ? BTREE_TRIGGER_NOATOMIC : 0); int ret = 0; deleted.p = k->k->p; @@ -837,7 +834,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, } ret = commit_do(trans, NULL, NULL, 0, - bch2_mark_key(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), flags)); + bch2_mark_key(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); fsck_err: err: bch_err_fn(c, ret); From c95e9ec48682425267e682e5489ee9dc42313335 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Dec 2023 23:54:52 -0500 Subject: [PATCH 192/226] bcachefs: BTREE_TRIGGER_TRANSACTIONAL New flag so that triggers can distinguish whether we're running transactional or atomic triggers (or gc) - unifying the callbacks. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 896020055097..d8f42ba1cd0f 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -95,11 +95,10 @@ enum btree_update_flags { __BTREE_UPDATE_NOJOURNAL, __BTREE_UPDATE_KEY_CACHE_RECLAIM, - __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ - + __BTREE_TRIGGER_NORUN, + __BTREE_TRIGGER_TRANSACTIONAL, __BTREE_TRIGGER_INSERT, __BTREE_TRIGGER_OVERWRITE, - __BTREE_TRIGGER_GC, __BTREE_TRIGGER_BUCKET_INVALIDATE, }; @@ -108,12 +107,31 @@ enum btree_update_flags { #define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) +/* Don't run triggers at all */ #define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) +/* + * If set, we're running transactional triggers as part of a transaction commit: + * triggers may generate new updates + * + * If cleared, and either BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE are set, + * we're running atomic triggers during a transaction commit: we have our + * journal reservation, we're holding btree node write locks, and we know the + * transaction is going to commit (returning an error here is a fatal error, + * causing us to go emergency read-only) + */ +#define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL) + +/* @new is entering the btree */ #define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) + +/* @old is leaving the btree */ #define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) +/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */ #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) + +/* signal from bucket invalidate path to alloc trigger */ #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) static inline int bch2_trans_mark_key(struct btree_trans *trans, @@ -124,7 +142,7 @@ static inline int bch2_trans_mark_key(struct btree_trans *trans, const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); return ops->trans_trigger - ? ops->trans_trigger(trans, btree_id, level, old, new, flags) + ? ops->trans_trigger(trans, btree_id, level, old, new, flags|BTREE_TRIGGER_TRANSACTIONAL) : 0; } From 282e7c37ebf5d76c4f38d6656354901919a2592f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 00:21:04 -0500 Subject: [PATCH 193/226] bcachefs: kill mem_trigger_run_overwrite_then_insert() now that type signatures are unified, redundant Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 4 ++-- fs/bcachefs/buckets.h | 5 +---- fs/bcachefs/reflink.c | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 7099a7db58a8..233590cff14e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1011,7 +1011,7 @@ int bch2_mark_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, unsigned flags) { - return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); + return trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); } int bch2_mark_stripe(struct btree_trans *trans, @@ -1157,7 +1157,7 @@ int bch2_mark_reservation(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, unsigned flags) { - return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); + return trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); } void bch2_trans_fs_usage_revert(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index de1a28dd0e07..c2ffa1bd57b6 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -339,7 +339,7 @@ int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ +#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ \ @@ -350,9 +350,6 @@ int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, s ret; \ }) -#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ - mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) - void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5c1ae3be8e48..1499356f2d13 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -271,7 +271,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, unsigned flags) { - return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); + return trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); } /* indirect extents */ From 08bc95901037bce2fa31a7bb4b559f55a31ae88d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 00:05:54 -0500 Subject: [PATCH 194/226] bcachefs: unify inode trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 67 ++++++++++++++++++--------------------------- fs/bcachefs/inode.h | 16 +++++------ 2 files changed, 33 insertions(+), 50 deletions(-) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index e6978005bafd..37dce96f48ac 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -561,61 +561,46 @@ static inline bool bkey_is_deleted_inode(struct bkey_s_c k) return bkey_inode_flags(k) & BCH_INODE_unlinked; } -int bch2_trans_mark_inode(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_s new, - unsigned flags) +int bch2_trigger_inode(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_s new, + unsigned flags) { s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); - bool old_deleted = bkey_is_deleted_inode(old); - bool new_deleted = bkey_is_deleted_inode(new.s_c); - if (nr) { - int ret = bch2_replicas_deltas_realloc(trans, 0); - struct replicas_delta_list *d = trans->fs_usage_deltas; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + if (nr) { + int ret = bch2_replicas_deltas_realloc(trans, 0); + if (ret) + return ret; - if (ret) - return ret; + trans->fs_usage_deltas->nr_inodes += nr; + } - d->nr_inodes += nr; + bool old_deleted = bkey_is_deleted_inode(old); + bool new_deleted = bkey_is_deleted_inode(new.s_c); + if (old_deleted != new_deleted) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted); + if (ret) + return ret; + } } - if (old_deleted != new_deleted) { - int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted); - if (ret) - return ret; - } + if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { + BUG_ON(!trans->journal_res.seq); - return 0; -} - -int bch2_mark_inode(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_fs_usage *fs_usage; - u64 journal_seq = trans->journal_res.seq; - - if (flags & BTREE_TRIGGER_INSERT) { - BUG_ON(!journal_seq); - - bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(journal_seq); + bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); } if (flags & BTREE_TRIGGER_GC) { + struct bch_fs *c = trans->c; + percpu_down_read(&c->mark_lock); - preempt_disable(); - - fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); - fs_usage->nr_inodes += bkey_is_inode(new.k); - fs_usage->nr_inodes -= bkey_is_inode(old.k); - - preempt_enable(); + this_cpu_add(c->usage_gc->nr_inodes, nr); percpu_up_read(&c->mark_lock); } + return 0; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f1838dccfb3c..fed552e0140a 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -17,32 +17,30 @@ int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, +int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trans_mark_inode, \ - .atomic_trigger = bch2_mark_inode, \ + .trans_trigger = bch2_trigger_inode, \ + .atomic_trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ .key_invalid = bch2_inode_v2_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trans_mark_inode, \ - .atomic_trigger = bch2_mark_inode, \ + .trans_trigger = bch2_trigger_inode, \ + .atomic_trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ .key_invalid = bch2_inode_v3_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trans_mark_inode, \ - .atomic_trigger = bch2_mark_inode, \ + .trans_trigger = bch2_trigger_inode, \ + .atomic_trigger = bch2_trigger_inode, \ .min_val_size = 48, \ }) From 7bc4d18af4131ce900d7d9d90e51135706818628 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 00:15:58 -0500 Subject: [PATCH 195/226] bcachefs: unify reflink_p trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/reflink.c | 130 ++++++++++++++++++------------------------ fs/bcachefs/reflink.h | 10 ++-- 2 files changed, 58 insertions(+), 82 deletions(-) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 1499356f2d13..74e08a9d7b02 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -74,7 +74,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r return true; } -static int trans_mark_reflink_p_segment(struct btree_trans *trans, +static int trans_trigger_reflink_p_segment(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 *idx, unsigned flags) { @@ -141,47 +141,16 @@ static int trans_mark_reflink_p_segment(struct btree_trans *trans, return ret; } -static int __trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx, end_idx; - int ret = 0; - - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - end_idx = le64_to_cpu(p.v->idx) + p.k->size + - le32_to_cpu(p.v->back_pad); - - while (idx < end_idx && !ret) - ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); - return ret; -} - -int bch2_trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_s new, - unsigned flags) -{ - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v; - - v->front_pad = v->back_pad = 0; - } - - return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); -} - -static s64 __bch2_mark_reflink_p(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, - u64 start, u64 end, - u64 *idx, unsigned flags, size_t r_idx) +static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + u64 *idx, unsigned flags, size_t r_idx) { struct bch_fs *c = trans->c; struct reflink_gc *r; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; - u64 next_idx = end; + u64 start = le64_to_cpu(p.v->idx); + u64 end = le64_to_cpu(p.v->idx) + p.k->size; + u64 next_idx = end + le32_to_cpu(p.v->back_pad); s64 ret = 0; struct printbuf buf = PRINTBUF; @@ -205,20 +174,24 @@ static s64 __bch2_mark_reflink_p(struct btree_trans *trans, " missing range %llu-%llu", (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), *idx, next_idx)) { - struct bkey_i_error *new; - - new = bch2_trans_kmalloc(trans, sizeof(*new)); - ret = PTR_ERR_OR_ZERO(new); + struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c); + ret = PTR_ERR_OR_ZERO(update); if (ret) goto err; - bkey_init(&new->k); - new->k.type = KEY_TYPE_error; - new->k.p = bkey_start_pos(p.k); - new->k.p.offset += *idx - start; - bch2_key_resize(&new->k, next_idx - *idx); - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, - BTREE_TRIGGER_NORUN); + if (next_idx <= start) { + bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx); + } else if (*idx >= end) { + bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end); + } else { + bkey_error_init(update); + update->k.p = p.k->p; + update->k.p.offset = next_idx; + update->k.size = next_idx - *idx; + set_bkey_val_u64s(&update->k, 0); + } + + ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_NORUN); } *idx = next_idx; @@ -228,50 +201,55 @@ static s64 __bch2_mark_reflink_p(struct btree_trans *trans, return ret; } -static int __mark_reflink_p(struct btree_trans *trans, +static int __trigger_reflink_p(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - struct reflink_gc *ref; - size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx), start = idx; - u64 end = le64_to_cpu(p.v->idx) + p.k->size; int ret = 0; - BUG_ON(!(flags & BTREE_TRIGGER_GC)); + u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) { - idx -= le32_to_cpu(p.v->front_pad); - end += le32_to_cpu(p.v->back_pad); + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + while (idx < end && !ret) + ret = trans_trigger_reflink_p_segment(trans, p, &idx, flags); } - l = 0; - r = c->reflink_gc_nr; - while (l < r) { - m = l + (r - l) / 2; + if (flags & BTREE_TRIGGER_GC) { + size_t l = 0, r = c->reflink_gc_nr; - ref = genradix_ptr(&c->reflink_gc_table, m); - if (ref->offset <= idx) - l = m + 1; - else - r = m; + while (l < r) { + size_t m = l + (r - l) / 2; + struct reflink_gc *ref = genradix_ptr(&c->reflink_gc_table, m); + if (ref->offset <= idx) + l = m + 1; + else + r = m; + } + + while (idx < end && !ret) + ret = gc_trigger_reflink_p_segment(trans, p, &idx, flags, l++); } - while (idx < end && !ret) - ret = __bch2_mark_reflink_p(trans, p, start, end, - &idx, flags, l++); - return ret; } -int bch2_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) +int bch2_trigger_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_s new, + unsigned flags) { - return trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); + if ((flags & BTREE_TRIGGER_TRANSACTIONAL) && + (flags & BTREE_TRIGGER_INSERT)) { + struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v; + + v->front_pad = v->back_pad = 0; + } + + return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags); } /* indirect extents */ diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 130b44625c85..fbb34f468f71 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -9,17 +9,15 @@ int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); -int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ .key_invalid = bch2_reflink_p_invalid, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ - .trans_trigger = bch2_trans_mark_reflink_p, \ - .atomic_trigger = bch2_mark_reflink_p, \ + .trans_trigger = bch2_trigger_reflink_p, \ + .atomic_trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) From 6cacd0c4141cbadc7246cf89a9a55220acb91908 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 00:50:21 -0500 Subject: [PATCH 196/226] bcachefs: unify reservation trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 96 +++++++++++++++++-------------------------- fs/bcachefs/buckets.h | 3 +- fs/bcachefs/extents.h | 4 +- 3 files changed, 40 insertions(+), 63 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 233590cff14e..ee28eacb5a21 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1121,45 +1121,6 @@ int bch2_mark_stripe(struct btree_trans *trans, return 0; } -static int __mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_fs_usage *fs_usage; - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - s64 sectors = (s64) k.k->size; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - if (flags & BTREE_TRIGGER_OVERWRITE) - sectors = -sectors; - sectors *= replicas; - - percpu_down_read(&c->mark_lock); - preempt_disable(); - - fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC); - replicas = clamp_t(unsigned, replicas, 1, - ARRAY_SIZE(fs_usage->persistent_reserved)); - - fs_usage->reserved += sectors; - fs_usage->persistent_reserved[replicas - 1] += sectors; - - preempt_enable(); - percpu_up_read(&c->mark_lock); - - return 0; -} - -int bch2_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - return trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); -} - void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1593,40 +1554,57 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, return ret; } -static int __trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +/* KEY_TYPE_reservation */ + +static int __trigger_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { + struct bch_fs *c = trans->c; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - s64 sectors = (s64) k.k->size; - struct replicas_delta_list *d; - int ret; + s64 sectors = (s64) k.k->size * replicas; if (flags & BTREE_TRIGGER_OVERWRITE) sectors = -sectors; - sectors *= replicas; - ret = bch2_replicas_deltas_realloc(trans, 0); - if (ret) - return ret; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + int ret = bch2_replicas_deltas_realloc(trans, 0); + if (ret) + return ret; - d = trans->fs_usage_deltas; - replicas = clamp_t(unsigned, replicas, 1, - ARRAY_SIZE(d->persistent_reserved)); + struct replicas_delta_list *d = trans->fs_usage_deltas; + replicas = min(replicas, ARRAY_SIZE(d->persistent_reserved)); + + d->persistent_reserved[replicas - 1] += sectors; + } + + if (flags & BTREE_TRIGGER_GC) { + percpu_down_read(&c->mark_lock); + preempt_disable(); + + struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc); + + replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved)); + fs_usage->reserved += sectors; + fs_usage->persistent_reserved[replicas - 1] += sectors; + + preempt_enable(); + percpu_up_read(&c->mark_lock); + } - d->persistent_reserved[replicas - 1] += sectors; return 0; } -int bch2_trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_s new, - unsigned flags) +int bch2_trigger_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { - return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); + return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags); } +/* Mark superblocks: */ + static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index c2ffa1bd57b6..8cf8d32b0eb3 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -333,12 +333,11 @@ int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, +int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 77ae4476578b..374ce357e8bb 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -454,8 +454,8 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .key_invalid = bch2_reservation_invalid, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ - .trans_trigger = bch2_trans_mark_reservation, \ - .atomic_trigger = bch2_mark_reservation, \ + .trans_trigger = bch2_trigger_reservation, \ + .atomic_trigger = bch2_trigger_reservation, \ .min_val_size = 8, \ }) From 6820ac2cdc3095ac9d2eac62dc7c89bdcee9b8c4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 00:59:17 -0500 Subject: [PATCH 197/226] bcachefs: move bch2_mark_alloc() to alloc_background.c Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 108 +++++++++++++++++++++++++ fs/bcachefs/alloc_background.h | 2 + fs/bcachefs/buckets.c | 144 +++++---------------------------- fs/bcachefs/buckets.h | 6 +- 4 files changed, 132 insertions(+), 128 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1446501258ab..39af38f3c8da 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -843,6 +843,114 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, return 0; } +int bch2_mark_alloc(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) +{ + bool gc = flags & BTREE_TRIGGER_GC; + u64 journal_seq = trans->journal_res.seq; + u64 bucket_journal_seq; + struct bch_fs *c = trans->c; + struct bch_alloc_v4 old_a_convert, new_a_convert; + const struct bch_alloc_v4 *old_a, *new_a; + struct bch_dev *ca; + int ret = 0; + + /* + * alloc btree is read in by bch2_alloc_read, not gc: + */ + if ((flags & BTREE_TRIGGER_GC) && + !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) + return 0; + + if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, + "alloc key for invalid device or bucket")) + return -EIO; + + ca = bch_dev_bkey_exists(c, new.k->p.inode); + + old_a = bch2_alloc_to_v4(old, &old_a_convert); + new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); + + bucket_journal_seq = new_a->journal_seq; + + if ((flags & BTREE_TRIGGER_INSERT) && + data_type_is_empty(old_a->data_type) != + data_type_is_empty(new_a->data_type) && + new.k->type == KEY_TYPE_alloc_v4) { + struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; + + EBUG_ON(!journal_seq); + + /* + * If the btree updates referring to a bucket weren't flushed + * before the bucket became empty again, then the we don't have + * to wait on a journal flush before we can reuse the bucket: + */ + v->journal_seq = bucket_journal_seq = + data_type_is_empty(new_a->data_type) && + (journal_seq == v->journal_seq || + bch2_journal_noflush_seq(&c->journal, v->journal_seq)) + ? 0 : journal_seq; + } + + if (!data_type_is_empty(old_a->data_type) && + data_type_is_empty(new_a->data_type) && + bucket_journal_seq) { + ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, + c->journal.flushed_seq_ondisk, + new.k->p.inode, new.k->p.offset, + bucket_journal_seq); + if (ret) { + bch2_fs_fatal_error(c, + "error setting bucket_needs_journal_commit: %i", ret); + return ret; + } + } + + percpu_down_read(&c->mark_lock); + if (!gc && new_a->gen != old_a->gen) + *bucket_gen(ca, new.k->p.offset) = new_a->gen; + + bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); + + if (gc) { + struct bucket *g = gc_bucket(ca, new.k->p.offset); + + bucket_lock(g); + + g->gen_valid = 1; + g->gen = new_a->gen; + g->data_type = new_a->data_type; + g->stripe = new_a->stripe; + g->stripe_redundancy = new_a->stripe_redundancy; + g->dirty_sectors = new_a->dirty_sectors; + g->cached_sectors = new_a->cached_sectors; + + bucket_unlock(g); + } + percpu_up_read(&c->mark_lock); + + if (new_a->data_type == BCH_DATA_free && + (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) + closure_wake_up(&c->freelist_wait); + + if (new_a->data_type == BCH_DATA_need_discard && + (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) + bch2_do_discards(c); + + if (old_a->data_type != BCH_DATA_cached && + new_a->data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) + bch2_do_invalidates(c); + + if (new_a->data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); + + return 0; +} + /* * This synthesizes deleted extents for holes, similar to BTREE_ITER_SLOTS for * extents style btrees, but works on non-extents btrees: diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 7be590a4fa81..09fb93d36cea 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -234,6 +234,8 @@ int bch2_alloc_read(struct bch_fs *); int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); +int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_do_discards(struct bch_fs *); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ee28eacb5a21..e8dc477e7b72 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -296,10 +296,10 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) } } -static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, - struct bch_alloc_v4 old, - struct bch_alloc_v4 new, - u64 journal_seq, bool gc) +void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, + const struct bch_alloc_v4 *old, + const struct bch_alloc_v4 *new, + u64 journal_seq, bool gc) { struct bch_fs_usage *fs_usage; struct bch_dev_usage *u; @@ -307,24 +307,24 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, preempt_disable(); fs_usage = fs_usage_ptr(c, journal_seq, gc); - if (data_type_is_hidden(old.data_type)) + if (data_type_is_hidden(old->data_type)) fs_usage->hidden -= ca->mi.bucket_size; - if (data_type_is_hidden(new.data_type)) + if (data_type_is_hidden(new->data_type)) fs_usage->hidden += ca->mi.bucket_size; u = dev_usage_ptr(ca, journal_seq, gc); - u->d[old.data_type].buckets--; - u->d[new.data_type].buckets++; + u->d[old->data_type].buckets--; + u->d[new->data_type].buckets++; - u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old); - u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new); + u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old); + u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new); - u->d[BCH_DATA_cached].sectors += new.cached_sectors; - u->d[BCH_DATA_cached].sectors -= old.cached_sectors; + u->d[BCH_DATA_cached].sectors += new->cached_sectors; + u->d[BCH_DATA_cached].sectors -= old->cached_sectors; - u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old); - u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new); + u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old); + u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new); preempt_enable(); } @@ -343,10 +343,10 @@ static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b) static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, struct bucket old, struct bucket new) { - bch2_dev_usage_update(c, ca, - bucket_m_to_alloc(old), - bucket_m_to_alloc(new), - 0, true); + struct bch_alloc_v4 old_a = bucket_m_to_alloc(old); + struct bch_alloc_v4 new_a = bucket_m_to_alloc(new); + + bch2_dev_usage_update(c, ca, &old_a, &new_a, 0, true); } static inline int __update_replicas(struct bch_fs *c, @@ -496,114 +496,6 @@ int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 return bch2_update_replicas_list(trans, &r.e, sectors); } -int bch2_mark_alloc(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - bool gc = flags & BTREE_TRIGGER_GC; - u64 journal_seq = trans->journal_res.seq; - u64 bucket_journal_seq; - struct bch_fs *c = trans->c; - struct bch_alloc_v4 old_a_convert, new_a_convert; - const struct bch_alloc_v4 *old_a, *new_a; - struct bch_dev *ca; - int ret = 0; - - /* - * alloc btree is read in by bch2_alloc_read, not gc: - */ - if ((flags & BTREE_TRIGGER_GC) && - !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) - return 0; - - if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, - "alloc key for invalid device or bucket")) - return -EIO; - - ca = bch_dev_bkey_exists(c, new.k->p.inode); - - old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); - - bucket_journal_seq = new_a->journal_seq; - - if ((flags & BTREE_TRIGGER_INSERT) && - data_type_is_empty(old_a->data_type) != - data_type_is_empty(new_a->data_type) && - new.k->type == KEY_TYPE_alloc_v4) { - struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; - - EBUG_ON(!journal_seq); - - /* - * If the btree updates referring to a bucket weren't flushed - * before the bucket became empty again, then the we don't have - * to wait on a journal flush before we can reuse the bucket: - */ - v->journal_seq = bucket_journal_seq = - data_type_is_empty(new_a->data_type) && - (journal_seq == v->journal_seq || - bch2_journal_noflush_seq(&c->journal, v->journal_seq)) - ? 0 : journal_seq; - } - - if (!data_type_is_empty(old_a->data_type) && - data_type_is_empty(new_a->data_type) && - bucket_journal_seq) { - ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - new.k->p.inode, new.k->p.offset, - bucket_journal_seq); - if (ret) { - bch2_fs_fatal_error(c, - "error setting bucket_needs_journal_commit: %i", ret); - return ret; - } - } - - percpu_down_read(&c->mark_lock); - if (!gc && new_a->gen != old_a->gen) - *bucket_gen(ca, new.k->p.offset) = new_a->gen; - - bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc); - - if (gc) { - struct bucket *g = gc_bucket(ca, new.k->p.offset); - - bucket_lock(g); - - g->gen_valid = 1; - g->gen = new_a->gen; - g->data_type = new_a->data_type; - g->stripe = new_a->stripe; - g->stripe_redundancy = new_a->stripe_redundancy; - g->dirty_sectors = new_a->dirty_sectors; - g->cached_sectors = new_a->cached_sectors; - - bucket_unlock(g); - } - percpu_up_read(&c->mark_lock); - - if (new_a->data_type == BCH_DATA_free && - (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) - closure_wake_up(&c->freelist_wait); - - if (new_a->data_type == BCH_DATA_need_discard && - (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) - bch2_do_discards(c); - - if (old_a->data_type != BCH_DATA_cached && - new_a->data_type == BCH_DATA_cached && - should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); - - if (new_a->data_type == BCH_DATA_need_gc_gens) - bch2_do_gc_gens(c); - - return 0; -} - int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, enum bch_data_type data_type, unsigned sectors, struct gc_pos pos, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 8cf8d32b0eb3..142cf170b198 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -302,6 +302,10 @@ u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *); struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); +void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *, + const struct bch_alloc_v4 *, + const struct bch_alloc_v4 *, u64, bool); + /* key/bucket marking: */ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, @@ -327,8 +331,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, size_t, enum bch_data_type, unsigned, struct gc_pos, unsigned); -int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, From 153d1c63c2aca97871be2f67551eb992f95488ab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 01:21:01 -0500 Subject: [PATCH 198/226] bcachefs: unify alloc trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 300 +++++++++++++++------------------ fs/bcachefs/alloc_background.h | 22 ++- 2 files changed, 147 insertions(+), 175 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 39af38f3c8da..f7fd534b85ed 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -749,173 +749,163 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, return ret; } -int bch2_trans_mark_alloc(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) +int bch2_trigger_alloc(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { struct bch_fs *c = trans->c; - struct bch_alloc_v4 old_a_convert, *new_a; - const struct bch_alloc_v4 *old_a; - u64 old_lru, new_lru; int ret = 0; - /* - * Deletion only happens in the device removal path, with - * BTREE_TRIGGER_NORUN: - */ - BUG_ON(new.k->type != KEY_TYPE_alloc_v4); - - old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = bkey_s_to_alloc_v4(new).v; - - new_a->data_type = alloc_data_type(*new_a, new_a->data_type); - - if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); - SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); - SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); - } - - if (data_type_is_empty(new_a->data_type) && - BCH_ALLOC_V4_NEED_INC_GEN(new_a) && - !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { - new_a->gen++; - SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); - } - - if (old_a->data_type != new_a->data_type || - (new_a->data_type == BCH_DATA_free && - alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { - ret = bch2_bucket_do_index(trans, old, old_a, false) ?: - bch2_bucket_do_index(trans, new.s_c, new_a, true); - if (ret) - return ret; - } - - if (new_a->data_type == BCH_DATA_cached && - !new_a->io_time[READ]) - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - - old_lru = alloc_lru_idx_read(*old_a); - new_lru = alloc_lru_idx_read(*new_a); - - if (old_lru != new_lru) { - ret = bch2_lru_change(trans, new.k->p.inode, - bucket_to_u64(new.k->p), - old_lru, new_lru); - if (ret) - return ret; - } - - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, - bch_dev_bkey_exists(c, new.k->p.inode)); - - if (old_a->fragmentation_lru != new_a->fragmentation_lru) { - ret = bch2_lru_change(trans, - BCH_LRU_FRAGMENTATION_START, - bucket_to_u64(new.k->p), - old_a->fragmentation_lru, new_a->fragmentation_lru); - if (ret) - return ret; - } - - if (old_a->gen != new_a->gen) { - ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); - if (ret) - return ret; - } - - /* - * need to know if we're getting called from the invalidate path or - * not: - */ - - if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && - old_a->cached_sectors) { - ret = bch2_update_cached_sectors_list(trans, new.k->p.inode, - -((s64) old_a->cached_sectors)); - if (ret) - return ret; - } - - return 0; -} - -int bch2_mark_alloc(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - bool gc = flags & BTREE_TRIGGER_GC; - u64 journal_seq = trans->journal_res.seq; - u64 bucket_journal_seq; - struct bch_fs *c = trans->c; - struct bch_alloc_v4 old_a_convert, new_a_convert; - const struct bch_alloc_v4 *old_a, *new_a; - struct bch_dev *ca; - int ret = 0; - - /* - * alloc btree is read in by bch2_alloc_read, not gc: - */ - if ((flags & BTREE_TRIGGER_GC) && - !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) - return 0; - if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, "alloc key for invalid device or bucket")) return -EIO; - ca = bch_dev_bkey_exists(c, new.k->p.inode); + struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); - old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); + struct bch_alloc_v4 old_a_convert; + const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - bucket_journal_seq = new_a->journal_seq; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; - if ((flags & BTREE_TRIGGER_INSERT) && - data_type_is_empty(old_a->data_type) != - data_type_is_empty(new_a->data_type) && - new.k->type == KEY_TYPE_alloc_v4) { - struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; + new_a->data_type = alloc_data_type(*new_a, new_a->data_type); - EBUG_ON(!journal_seq); + if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { + new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); + SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); + SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); + } + + if (data_type_is_empty(new_a->data_type) && + BCH_ALLOC_V4_NEED_INC_GEN(new_a) && + !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { + new_a->gen++; + SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); + } + + if (old_a->data_type != new_a->data_type || + (new_a->data_type == BCH_DATA_free && + alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { + ret = bch2_bucket_do_index(trans, old, old_a, false) ?: + bch2_bucket_do_index(trans, new.s_c, new_a, true); + if (ret) + return ret; + } + + if (new_a->data_type == BCH_DATA_cached && + !new_a->io_time[READ]) + new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + + u64 old_lru = alloc_lru_idx_read(*old_a); + u64 new_lru = alloc_lru_idx_read(*new_a); + if (old_lru != new_lru) { + ret = bch2_lru_change(trans, new.k->p.inode, + bucket_to_u64(new.k->p), + old_lru, new_lru); + if (ret) + return ret; + } + + new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, + bch_dev_bkey_exists(c, new.k->p.inode)); + if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + ret = bch2_lru_change(trans, + BCH_LRU_FRAGMENTATION_START, + bucket_to_u64(new.k->p), + old_a->fragmentation_lru, new_a->fragmentation_lru); + if (ret) + return ret; + } + + if (old_a->gen != new_a->gen) { + ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); + if (ret) + return ret; + } /* - * If the btree updates referring to a bucket weren't flushed - * before the bucket became empty again, then the we don't have - * to wait on a journal flush before we can reuse the bucket: + * need to know if we're getting called from the invalidate path or + * not: */ - v->journal_seq = bucket_journal_seq = - data_type_is_empty(new_a->data_type) && - (journal_seq == v->journal_seq || - bch2_journal_noflush_seq(&c->journal, v->journal_seq)) - ? 0 : journal_seq; - } - if (!data_type_is_empty(old_a->data_type) && - data_type_is_empty(new_a->data_type) && - bucket_journal_seq) { - ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - new.k->p.inode, new.k->p.offset, - bucket_journal_seq); - if (ret) { - bch2_fs_fatal_error(c, - "error setting bucket_needs_journal_commit: %i", ret); - return ret; + if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && + old_a->cached_sectors) { + ret = bch2_update_cached_sectors_list(trans, new.k->p.inode, + -((s64) old_a->cached_sectors)); + if (ret) + return ret; } } - percpu_down_read(&c->mark_lock); - if (!gc && new_a->gen != old_a->gen) - *bucket_gen(ca, new.k->p.offset) = new_a->gen; + if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { + struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + u64 journal_seq = trans->journal_res.seq; + u64 bucket_journal_seq = new_a->journal_seq; - bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); + if ((flags & BTREE_TRIGGER_INSERT) && + data_type_is_empty(old_a->data_type) != + data_type_is_empty(new_a->data_type) && + new.k->type == KEY_TYPE_alloc_v4) { + struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v; - if (gc) { + /* + * If the btree updates referring to a bucket weren't flushed + * before the bucket became empty again, then the we don't have + * to wait on a journal flush before we can reuse the bucket: + */ + v->journal_seq = bucket_journal_seq = + data_type_is_empty(new_a->data_type) && + (journal_seq == v->journal_seq || + bch2_journal_noflush_seq(&c->journal, v->journal_seq)) + ? 0 : journal_seq; + } + + if (!data_type_is_empty(old_a->data_type) && + data_type_is_empty(new_a->data_type) && + bucket_journal_seq) { + ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, + c->journal.flushed_seq_ondisk, + new.k->p.inode, new.k->p.offset, + bucket_journal_seq); + if (ret) { + bch2_fs_fatal_error(c, + "error setting bucket_needs_journal_commit: %i", ret); + return ret; + } + } + + percpu_down_read(&c->mark_lock); + if (new_a->gen != old_a->gen) + *bucket_gen(ca, new.k->p.offset) = new_a->gen; + + bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false); + + if (new_a->data_type == BCH_DATA_free && + (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) + closure_wake_up(&c->freelist_wait); + + if (new_a->data_type == BCH_DATA_need_discard && + (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) + bch2_do_discards(c); + + if (old_a->data_type != BCH_DATA_cached && + new_a->data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) + bch2_do_invalidates(c); + + if (new_a->data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); + percpu_up_read(&c->mark_lock); + } + + if ((flags & BTREE_TRIGGER_GC) && + (flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) { + struct bch_alloc_v4 new_a_convert; + const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); + + percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, new.k->p.offset); bucket_lock(g); @@ -929,24 +919,8 @@ int bch2_mark_alloc(struct btree_trans *trans, g->cached_sectors = new_a->cached_sectors; bucket_unlock(g); + percpu_up_read(&c->mark_lock); } - percpu_up_read(&c->mark_lock); - - if (new_a->data_type == BCH_DATA_free && - (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) - closure_wake_up(&c->freelist_wait); - - if (new_a->data_type == BCH_DATA_need_discard && - (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) - bch2_do_discards(c); - - if (old_a->data_type != BCH_DATA_cached && - new_a->data_type == BCH_DATA_cached && - should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); - - if (new_a->data_type == BCH_DATA_need_gc_gens) - bch2_do_gc_gens(c); return 0; } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 09fb93d36cea..dbdf153e58ac 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -182,24 +182,24 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v1_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trans_trigger = bch2_trigger_alloc, \ + .atomic_trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v2_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trans_trigger = bch2_trigger_alloc, \ + .atomic_trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v3_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trans_trigger = bch2_trigger_alloc, \ + .atomic_trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) @@ -207,8 +207,8 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .key_invalid = bch2_alloc_v4_invalid, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trans_trigger = bch2_trigger_alloc, \ + .atomic_trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) @@ -232,10 +232,8 @@ static inline bool bkey_is_alloc(const struct bkey *k) int bch2_alloc_read(struct bch_fs *); -int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); -int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_do_discards(struct bch_fs *); From f4f78779bb2ad1d1a56036c45f8e824d9700eeba Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 01:31:49 -0500 Subject: [PATCH 199/226] bcachefs: move stripe triggers to ec.c Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 356 +++--------------------------------------- fs/bcachefs/buckets.h | 12 +- fs/bcachefs/ec.c | 321 +++++++++++++++++++++++++++++++++++++ fs/bcachefs/ec.h | 3 + 4 files changed, 352 insertions(+), 340 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e8dc477e7b72..e035ba3c359b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -340,11 +340,11 @@ static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b) }; } -static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, - struct bucket old, struct bucket new) +void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, + struct bucket *old, struct bucket *new) { - struct bch_alloc_v4 old_a = bucket_m_to_alloc(old); - struct bch_alloc_v4 new_a = bucket_m_to_alloc(new); + struct bch_alloc_v4 old_a = bucket_m_to_alloc(*old); + struct bch_alloc_v4 new_a = bucket_m_to_alloc(*new); bch2_dev_usage_update(c, ca, &old_a, &new_a, 0, true); } @@ -364,9 +364,9 @@ static inline int __update_replicas(struct bch_fs *c, return 0; } -static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, - struct bch_replicas_entry_v1 *r, s64 sectors, - unsigned journal_seq, bool gc) +int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k, + struct bch_replicas_entry_v1 *r, s64 sectors, + unsigned journal_seq, bool gc) { struct bch_fs_usage *fs_usage; int idx, ret = 0; @@ -413,7 +413,7 @@ static inline int update_cached_sectors(struct bch_fs *c, bch2_replicas_entry_cached(&r.e, dev); - return update_replicas(c, k, &r.e, sectors, journal_seq, gc); + return bch2_update_replicas(c, k, &r.e, sectors, journal_seq, gc); } static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more, @@ -544,17 +544,17 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, err: bucket_unlock(g); if (!ret) - bch2_dev_usage_update_m(c, ca, old, new); + bch2_dev_usage_update_m(c, ca, &old, &new); percpu_up_read(&c->mark_lock); return ret; } -static int check_bucket_ref(struct btree_trans *trans, - struct bkey_s_c k, - const struct bch_extent_ptr *ptr, - s64 sectors, enum bch_data_type ptr_data_type, - u8 b_gen, u8 bucket_data_type, - u32 bucket_sectors) +int bch2_check_bucket_ref(struct btree_trans *trans, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 b_gen, u8 bucket_data_type, + u32 bucket_sectors) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); @@ -653,64 +653,6 @@ static int check_bucket_ref(struct btree_trans *trans, goto out; } -static int mark_stripe_bucket(struct btree_trans *trans, - struct bkey_s_c k, - unsigned ptr_idx, - unsigned flags) -{ - struct bch_fs *c = trans->c; - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned nr_data = s->nr_blocks - s->nr_redundant; - bool parity = ptr_idx >= nr_data; - enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; - s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; - const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket old, new, *g; - struct printbuf buf = PRINTBUF; - int ret = 0; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - /* * XXX doesn't handle deletion */ - - percpu_down_read(&c->mark_lock); - g = PTR_GC_BUCKET(ca, ptr); - - if (g->dirty_sectors || - (g->stripe && g->stripe != k.k->p.offset)) { - bch2_fs_inconsistent(c, - "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", - ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - ret = -EINVAL; - goto err; - } - - bucket_lock(g); - old = *g; - - ret = check_bucket_ref(trans, k, ptr, sectors, data_type, - g->gen, g->data_type, - g->dirty_sectors); - if (ret) - goto err; - - g->data_type = data_type; - g->dirty_sectors += sectors; - - g->stripe = k.k->p.offset; - g->stripe_redundancy = s->nr_redundant; - new = *g; -err: - bucket_unlock(g); - if (!ret) - bch2_dev_usage_update_m(c, ca, old, new); - percpu_up_read(&c->mark_lock); - printbuf_exit(&buf); - return ret; -} - static int __mark_pointer(struct btree_trans *trans, struct bkey_s_c k, const struct bch_extent_ptr *ptr, @@ -721,7 +663,7 @@ static int __mark_pointer(struct btree_trans *trans, u32 *dst_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, + int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, bucket_gen, *bucket_data_type, *dst_sectors); if (ret) @@ -770,7 +712,7 @@ static int bch2_mark_pointer(struct btree_trans *trans, new = *g; bucket_unlock(g); if (!ret) - bch2_dev_usage_update_m(c, ca, old, new); + bch2_dev_usage_update_m(c, ca, &old, &new); percpu_up_read(&c->mark_lock); return ret; @@ -812,7 +754,7 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, mutex_unlock(&c->ec_stripes_heap_lock); r.e.data_type = data_type; - update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); + bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); return 0; } @@ -884,7 +826,7 @@ static int __mark_extent(struct btree_trans *trans, } if (r.e.nr_devs) { - ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); + ret = bch2_update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); if (ret) { struct printbuf buf = PRINTBUF; @@ -906,113 +848,6 @@ int bch2_mark_extent(struct btree_trans *trans, return trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); } -int bch2_mark_stripe(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s _new, - unsigned flags) -{ - struct bkey_s_c new = _new.s_c; - bool gc = flags & BTREE_TRIGGER_GC; - u64 journal_seq = trans->journal_res.seq; - struct bch_fs *c = trans->c; - u64 idx = new.k->p.offset; - const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe - ? bkey_s_c_to_stripe(old).v : NULL; - const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe - ? bkey_s_c_to_stripe(new).v : NULL; - unsigned i; - int ret; - - BUG_ON(gc && old_s); - - if (!gc) { - struct stripe *m = genradix_ptr(&c->stripes, idx); - - if (!m) { - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; - - bch2_bkey_val_to_text(&buf1, c, old); - bch2_bkey_val_to_text(&buf2, c, new); - bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" - "old %s\n" - "new %s", idx, buf1.buf, buf2.buf); - printbuf_exit(&buf2); - printbuf_exit(&buf1); - bch2_inconsistent_error(c); - return -1; - } - - if (!new_s) { - bch2_stripes_heap_del(c, m, idx); - - memset(m, 0, sizeof(*m)); - } else { - m->sectors = le16_to_cpu(new_s->sectors); - m->algorithm = new_s->algorithm; - m->nr_blocks = new_s->nr_blocks; - m->nr_redundant = new_s->nr_redundant; - m->blocks_nonempty = 0; - - for (i = 0; i < new_s->nr_blocks; i++) - m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); - - if (!old_s) - bch2_stripes_heap_insert(c, m, idx); - else - bch2_stripes_heap_update(c, m, idx); - } - } else { - struct gc_stripe *m = - genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); - - if (!m) { - bch_err(c, "error allocating memory for gc_stripes, idx %llu", - idx); - return -BCH_ERR_ENOMEM_mark_stripe; - } - /* - * This will be wrong when we bring back runtime gc: we should - * be unmarking the old key and then marking the new key - */ - m->alive = true; - m->sectors = le16_to_cpu(new_s->sectors); - m->nr_blocks = new_s->nr_blocks; - m->nr_redundant = new_s->nr_redundant; - - for (i = 0; i < new_s->nr_blocks; i++) - m->ptrs[i] = new_s->ptrs[i]; - - bch2_bkey_to_replicas(&m->r.e, new); - - /* - * gc recalculates this field from stripe ptr - * references: - */ - memset(m->block_sectors, 0, sizeof(m->block_sectors)); - - for (i = 0; i < new_s->nr_blocks; i++) { - ret = mark_stripe_bucket(trans, new, i, flags); - if (ret) - return ret; - } - - ret = update_replicas(c, new, &m->r.e, - ((s64) m->sectors * m->nr_redundant), - journal_seq, gc); - if (ret) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, new); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); - printbuf_exit(&buf); - return ret; - } - } - - return 0; -} - void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1293,159 +1128,6 @@ int bch2_trans_mark_extent(struct btree_trans *trans, return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); } -static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, - struct bkey_s_c_stripe s, - unsigned idx, bool deleting) -{ - struct bch_fs *c = trans->c; - const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; - struct btree_iter iter; - struct bkey_i_alloc_v4 *a; - enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant - ? BCH_DATA_parity : 0; - s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; - int ret = 0; - - if (deleting) - sectors = -sectors; - - a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr)); - if (IS_ERR(a)) - return PTR_ERR(a); - - ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, - a->v.gen, a->v.data_type, - a->v.dirty_sectors); - if (ret) - goto err; - - if (!deleting) { - if (bch2_trans_inconsistent_on(a->v.stripe || - a->v.stripe_redundancy, trans, - "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", - iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], - a->v.dirty_sectors, - a->v.stripe, s.k->p.offset)) { - ret = -EIO; - goto err; - } - - if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, - "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", - iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], - a->v.dirty_sectors, - s.k->p.offset)) { - ret = -EIO; - goto err; - } - - a->v.stripe = s.k->p.offset; - a->v.stripe_redundancy = s.v->nr_redundant; - a->v.data_type = BCH_DATA_stripe; - } else { - if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || - a->v.stripe_redundancy != s.v->nr_redundant, trans, - "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", - iter.pos.inode, iter.pos.offset, a->v.gen, - s.k->p.offset, a->v.stripe)) { - ret = -EIO; - goto err; - } - - a->v.stripe = 0; - a->v.stripe_redundancy = 0; - a->v.data_type = alloc_data_type(a->v, BCH_DATA_user); - } - - a->v.dirty_sectors += sectors; - if (data_type) - a->v.data_type = !deleting ? data_type : 0; - - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto err; -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -int bch2_trans_mark_stripe(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - const struct bch_stripe *old_s = NULL; - struct bch_stripe *new_s = NULL; - struct bch_replicas_padded r; - unsigned i, nr_blocks; - int ret = 0; - - if (old.k->type == KEY_TYPE_stripe) - old_s = bkey_s_c_to_stripe(old).v; - if (new.k->type == KEY_TYPE_stripe) - new_s = bkey_s_to_stripe(new).v; - - /* - * If the pointers aren't changing, we don't need to do anything: - */ - if (new_s && old_s && - new_s->nr_blocks == old_s->nr_blocks && - new_s->nr_redundant == old_s->nr_redundant && - !memcmp(old_s->ptrs, new_s->ptrs, - new_s->nr_blocks * sizeof(struct bch_extent_ptr))) - return 0; - - BUG_ON(new_s && old_s && - (new_s->nr_blocks != old_s->nr_blocks || - new_s->nr_redundant != old_s->nr_redundant)); - - nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; - - if (new_s) { - s64 sectors = le16_to_cpu(new_s->sectors); - - bch2_bkey_to_replicas(&r.e, new.s_c); - ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); - if (ret) - return ret; - } - - if (old_s) { - s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); - - bch2_bkey_to_replicas(&r.e, old); - ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); - if (ret) - return ret; - } - - for (i = 0; i < nr_blocks; i++) { - if (new_s && old_s && - !memcmp(&new_s->ptrs[i], - &old_s->ptrs[i], - sizeof(new_s->ptrs[i]))) - continue; - - if (new_s) { - ret = bch2_trans_mark_stripe_bucket(trans, - bkey_s_to_stripe(new).c, i, false); - if (ret) - break; - } - - if (old_s) { - ret = bch2_trans_mark_stripe_bucket(trans, - bkey_s_c_to_stripe(old), i, true); - if (ret) - break; - } - } - - return ret; -} - /* KEY_TYPE_reservation */ static int __trigger_reservation(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 142cf170b198..3c0b868cf858 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -305,6 +305,8 @@ bch2_fs_usage_read_short(struct bch_fs *); void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *, const struct bch_alloc_v4 *, const struct bch_alloc_v4 *, u64, bool); +void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *, + struct bucket *, struct bucket *); /* key/bucket marking: */ @@ -320,6 +322,9 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, : c->usage[journal_seq & JOURNAL_BUF_MASK]); } +int bch2_update_replicas(struct bch_fs *, struct bkey_s_c, + struct bch_replicas_entry_v1 *, s64, + unsigned, bool); int bch2_update_replicas_list(struct btree_trans *, struct bch_replicas_entry_v1 *, s64); int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64); @@ -327,19 +332,20 @@ int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); void bch2_fs_usage_initialize(struct bch_fs *); +int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c, + const struct bch_extent_ptr *, + s64, enum bch_data_type, u8, u8, u32); + int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, size_t, enum bch_data_type, unsigned, struct gc_pos, unsigned); int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index e89185a28e08..0c757e195356 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -3,6 +3,7 @@ /* erasure coding */ #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" #include "bkey_buf.h" @@ -156,6 +157,326 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, } } +/* Triggers: */ + +static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, + struct bkey_s_c_stripe s, + unsigned idx, bool deleting) +{ + struct bch_fs *c = trans->c; + const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; + struct btree_iter iter; + struct bkey_i_alloc_v4 *a; + enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant + ? BCH_DATA_parity : 0; + s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; + int ret = 0; + + if (deleting) + sectors = -sectors; + + a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr)); + if (IS_ERR(a)) + return PTR_ERR(a); + + ret = bch2_check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, + a->v.gen, a->v.data_type, + a->v.dirty_sectors); + if (ret) + goto err; + + if (!deleting) { + if (bch2_trans_inconsistent_on(a->v.stripe || + a->v.stripe_redundancy, trans, + "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], + a->v.dirty_sectors, + a->v.stripe, s.k->p.offset)) { + ret = -EIO; + goto err; + } + + if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, + "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], + a->v.dirty_sectors, + s.k->p.offset)) { + ret = -EIO; + goto err; + } + + a->v.stripe = s.k->p.offset; + a->v.stripe_redundancy = s.v->nr_redundant; + a->v.data_type = BCH_DATA_stripe; + } else { + if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || + a->v.stripe_redundancy != s.v->nr_redundant, trans, + "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", + iter.pos.inode, iter.pos.offset, a->v.gen, + s.k->p.offset, a->v.stripe)) { + ret = -EIO; + goto err; + } + + a->v.stripe = 0; + a->v.stripe_redundancy = 0; + a->v.data_type = alloc_data_type(a->v, BCH_DATA_user); + } + + a->v.dirty_sectors += sectors; + if (data_type) + a->v.data_type = !deleting ? data_type : 0; + + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + if (ret) + goto err; +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_trans_mark_stripe(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) +{ + const struct bch_stripe *old_s = NULL; + struct bch_stripe *new_s = NULL; + struct bch_replicas_padded r; + unsigned i, nr_blocks; + int ret = 0; + + if (old.k->type == KEY_TYPE_stripe) + old_s = bkey_s_c_to_stripe(old).v; + if (new.k->type == KEY_TYPE_stripe) + new_s = bkey_s_to_stripe(new).v; + + /* + * If the pointers aren't changing, we don't need to do anything: + */ + if (new_s && old_s && + new_s->nr_blocks == old_s->nr_blocks && + new_s->nr_redundant == old_s->nr_redundant && + !memcmp(old_s->ptrs, new_s->ptrs, + new_s->nr_blocks * sizeof(struct bch_extent_ptr))) + return 0; + + BUG_ON(new_s && old_s && + (new_s->nr_blocks != old_s->nr_blocks || + new_s->nr_redundant != old_s->nr_redundant)); + + nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; + + if (new_s) { + s64 sectors = le16_to_cpu(new_s->sectors); + + bch2_bkey_to_replicas(&r.e, new.s_c); + ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + if (ret) + return ret; + } + + if (old_s) { + s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); + + bch2_bkey_to_replicas(&r.e, old); + ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + if (ret) + return ret; + } + + for (i = 0; i < nr_blocks; i++) { + if (new_s && old_s && + !memcmp(&new_s->ptrs[i], + &old_s->ptrs[i], + sizeof(new_s->ptrs[i]))) + continue; + + if (new_s) { + ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_to_stripe(new).c, i, false); + if (ret) + break; + } + + if (old_s) { + ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_c_to_stripe(old), i, true); + if (ret) + break; + } + } + + return ret; +} + +static int mark_stripe_bucket(struct btree_trans *trans, + struct bkey_s_c k, + unsigned ptr_idx, + unsigned flags) +{ + struct bch_fs *c = trans->c; + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned nr_data = s->nr_blocks - s->nr_redundant; + bool parity = ptr_idx >= nr_data; + enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; + s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; + const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket old, new, *g; + struct printbuf buf = PRINTBUF; + int ret = 0; + + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + /* * XXX doesn't handle deletion */ + + percpu_down_read(&c->mark_lock); + g = PTR_GC_BUCKET(ca, ptr); + + if (g->dirty_sectors || + (g->stripe && g->stripe != k.k->p.offset)) { + bch2_fs_inconsistent(c, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EINVAL; + goto err; + } + + bucket_lock(g); + old = *g; + + ret = bch2_check_bucket_ref(trans, k, ptr, sectors, data_type, + g->gen, g->data_type, + g->dirty_sectors); + if (ret) + goto err; + + g->data_type = data_type; + g->dirty_sectors += sectors; + + g->stripe = k.k->p.offset; + g->stripe_redundancy = s->nr_redundant; + new = *g; +err: + bucket_unlock(g); + if (!ret) + bch2_dev_usage_update_m(c, ca, &old, &new); + percpu_up_read(&c->mark_lock); + printbuf_exit(&buf); + return ret; +} + +int bch2_mark_stripe(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s _new, + unsigned flags) +{ + struct bkey_s_c new = _new.s_c; + bool gc = flags & BTREE_TRIGGER_GC; + u64 journal_seq = trans->journal_res.seq; + struct bch_fs *c = trans->c; + u64 idx = new.k->p.offset; + const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe + ? bkey_s_c_to_stripe(old).v : NULL; + const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe + ? bkey_s_c_to_stripe(new).v : NULL; + unsigned i; + int ret; + + BUG_ON(gc && old_s); + + if (!gc) { + struct stripe *m = genradix_ptr(&c->stripes, idx); + + if (!m) { + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + + bch2_bkey_val_to_text(&buf1, c, old); + bch2_bkey_val_to_text(&buf2, c, new); + bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" + "old %s\n" + "new %s", idx, buf1.buf, buf2.buf); + printbuf_exit(&buf2); + printbuf_exit(&buf1); + bch2_inconsistent_error(c); + return -1; + } + + if (!new_s) { + bch2_stripes_heap_del(c, m, idx); + + memset(m, 0, sizeof(*m)); + } else { + m->sectors = le16_to_cpu(new_s->sectors); + m->algorithm = new_s->algorithm; + m->nr_blocks = new_s->nr_blocks; + m->nr_redundant = new_s->nr_redundant; + m->blocks_nonempty = 0; + + for (i = 0; i < new_s->nr_blocks; i++) + m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); + + if (!old_s) + bch2_stripes_heap_insert(c, m, idx); + else + bch2_stripes_heap_update(c, m, idx); + } + } else { + struct gc_stripe *m = + genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); + + if (!m) { + bch_err(c, "error allocating memory for gc_stripes, idx %llu", + idx); + return -BCH_ERR_ENOMEM_mark_stripe; + } + /* + * This will be wrong when we bring back runtime gc: we should + * be unmarking the old key and then marking the new key + */ + m->alive = true; + m->sectors = le16_to_cpu(new_s->sectors); + m->nr_blocks = new_s->nr_blocks; + m->nr_redundant = new_s->nr_redundant; + + for (i = 0; i < new_s->nr_blocks; i++) + m->ptrs[i] = new_s->ptrs[i]; + + bch2_bkey_to_replicas(&m->r.e, new); + + /* + * gc recalculates this field from stripe ptr + * references: + */ + memset(m->block_sectors, 0, sizeof(m->block_sectors)); + + for (i = 0; i < new_s->nr_blocks; i++) { + ret = mark_stripe_bucket(trans, new, i, flags); + if (ret) + return ret; + } + + ret = bch2_update_replicas(c, new, &m->r.e, + ((s64) m->sectors * m->nr_redundant), + journal_seq, gc); + if (ret) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, new); + bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + printbuf_exit(&buf); + return ret; + } + } + + return 0; +} + /* returns blocknr in stripe that we matched: */ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, struct bkey_s_c k, unsigned *block) diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 7d0237c9819f..3f7b2179d95e 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -12,6 +12,9 @@ int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); +int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ .key_invalid = bch2_stripe_invalid, \ From e4eb3e5ae46b654ab50c62cce96cf04039bab2e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 01:46:04 -0500 Subject: [PATCH 200/226] bcachefs: unify stripe trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 164 +++++++++++++++++++++-------------------------- fs/bcachefs/ec.h | 9 ++- 2 files changed, 76 insertions(+), 97 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 0c757e195356..d802bc63c8d0 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -237,81 +237,6 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, return ret; } -int bch2_trans_mark_stripe(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - const struct bch_stripe *old_s = NULL; - struct bch_stripe *new_s = NULL; - struct bch_replicas_padded r; - unsigned i, nr_blocks; - int ret = 0; - - if (old.k->type == KEY_TYPE_stripe) - old_s = bkey_s_c_to_stripe(old).v; - if (new.k->type == KEY_TYPE_stripe) - new_s = bkey_s_to_stripe(new).v; - - /* - * If the pointers aren't changing, we don't need to do anything: - */ - if (new_s && old_s && - new_s->nr_blocks == old_s->nr_blocks && - new_s->nr_redundant == old_s->nr_redundant && - !memcmp(old_s->ptrs, new_s->ptrs, - new_s->nr_blocks * sizeof(struct bch_extent_ptr))) - return 0; - - BUG_ON(new_s && old_s && - (new_s->nr_blocks != old_s->nr_blocks || - new_s->nr_redundant != old_s->nr_redundant)); - - nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; - - if (new_s) { - s64 sectors = le16_to_cpu(new_s->sectors); - - bch2_bkey_to_replicas(&r.e, new.s_c); - ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); - if (ret) - return ret; - } - - if (old_s) { - s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); - - bch2_bkey_to_replicas(&r.e, old); - ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); - if (ret) - return ret; - } - - for (i = 0; i < nr_blocks; i++) { - if (new_s && old_s && - !memcmp(&new_s->ptrs[i], - &old_s->ptrs[i], - sizeof(new_s->ptrs[i]))) - continue; - - if (new_s) { - ret = bch2_trans_mark_stripe_bucket(trans, - bkey_s_to_stripe(new).c, i, false); - if (ret) - break; - } - - if (old_s) { - ret = bch2_trans_mark_stripe_bucket(trans, - bkey_s_c_to_stripe(old), i, true); - if (ret) - break; - } - } - - return ret; -} - static int mark_stripe_bucket(struct btree_trans *trans, struct bkey_s_c k, unsigned ptr_idx, @@ -370,26 +295,79 @@ static int mark_stripe_bucket(struct btree_trans *trans, return ret; } -int bch2_mark_stripe(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s _new, - unsigned flags) +int bch2_trigger_stripe(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s _new, + unsigned flags) { struct bkey_s_c new = _new.s_c; - bool gc = flags & BTREE_TRIGGER_GC; - u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; u64 idx = new.k->p.offset; const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe ? bkey_s_c_to_stripe(old).v : NULL; const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe ? bkey_s_c_to_stripe(new).v : NULL; - unsigned i; - int ret; - BUG_ON(gc && old_s); + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + /* + * If the pointers aren't changing, we don't need to do anything: + */ + if (new_s && old_s && + new_s->nr_blocks == old_s->nr_blocks && + new_s->nr_redundant == old_s->nr_redundant && + !memcmp(old_s->ptrs, new_s->ptrs, + new_s->nr_blocks * sizeof(struct bch_extent_ptr))) + return 0; - if (!gc) { + BUG_ON(new_s && old_s && + (new_s->nr_blocks != old_s->nr_blocks || + new_s->nr_redundant != old_s->nr_redundant)); + + if (new_s) { + s64 sectors = le16_to_cpu(new_s->sectors); + + struct bch_replicas_padded r; + bch2_bkey_to_replicas(&r.e, new); + int ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + if (ret) + return ret; + } + + if (old_s) { + s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); + + struct bch_replicas_padded r; + bch2_bkey_to_replicas(&r.e, old); + int ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + if (ret) + return ret; + } + + unsigned nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; + for (unsigned i = 0; i < nr_blocks; i++) { + if (new_s && old_s && + !memcmp(&new_s->ptrs[i], + &old_s->ptrs[i], + sizeof(new_s->ptrs[i]))) + continue; + + if (new_s) { + int ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_c_to_stripe(new), i, false); + if (ret) + return ret; + } + + if (old_s) { + int ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_c_to_stripe(old), i, true); + if (ret) + return ret; + } + } + } + + if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) { struct stripe *m = genradix_ptr(&c->stripes, idx); if (!m) { @@ -418,7 +396,7 @@ int bch2_mark_stripe(struct btree_trans *trans, m->nr_redundant = new_s->nr_redundant; m->blocks_nonempty = 0; - for (i = 0; i < new_s->nr_blocks; i++) + for (unsigned i = 0; i < new_s->nr_blocks; i++) m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); if (!old_s) @@ -426,7 +404,9 @@ int bch2_mark_stripe(struct btree_trans *trans, else bch2_stripes_heap_update(c, m, idx); } - } else { + } + + if (flags & BTREE_TRIGGER_GC) { struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); @@ -444,7 +424,7 @@ int bch2_mark_stripe(struct btree_trans *trans, m->nr_blocks = new_s->nr_blocks; m->nr_redundant = new_s->nr_redundant; - for (i = 0; i < new_s->nr_blocks; i++) + for (unsigned i = 0; i < new_s->nr_blocks; i++) m->ptrs[i] = new_s->ptrs[i]; bch2_bkey_to_replicas(&m->r.e, new); @@ -455,15 +435,15 @@ int bch2_mark_stripe(struct btree_trans *trans, */ memset(m->block_sectors, 0, sizeof(m->block_sectors)); - for (i = 0; i < new_s->nr_blocks; i++) { - ret = mark_stripe_bucket(trans, new, i, flags); + for (unsigned i = 0; i < new_s->nr_blocks; i++) { + int ret = mark_stripe_bucket(trans, new, i, flags); if (ret) return ret; } - ret = bch2_update_replicas(c, new, &m->r.e, + int ret = bch2_update_replicas(c, new, &m->r.e, ((s64) m->sectors * m->nr_redundant), - journal_seq, gc); + 0, true); if (ret) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 3f7b2179d95e..94efb8d9beb9 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -12,16 +12,15 @@ int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ .key_invalid = bch2_stripe_invalid, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trans_mark_stripe, \ - .atomic_trigger = bch2_mark_stripe, \ + .trans_trigger = bch2_trigger_stripe, \ + .atomic_trigger = bch2_trigger_stripe, \ .min_val_size = 8, \ }) From 1f34c21bc685f6bd43383aefcb85a9cf604d439a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 19:02:56 -0500 Subject: [PATCH 201/226] bcachefs: bch2_trigger_pointer() Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 455 ++++++++++++++++++++---------------------- 1 file changed, 215 insertions(+), 240 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e035ba3c359b..4c55945c4f19 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -653,201 +653,6 @@ int bch2_check_bucket_ref(struct btree_trans *trans, goto out; } -static int __mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, - const struct bch_extent_ptr *ptr, - s64 sectors, enum bch_data_type ptr_data_type, - u8 bucket_gen, u8 *bucket_data_type, - u32 *dirty_sectors, u32 *cached_sectors) -{ - u32 *dst_sectors = !ptr->cached - ? dirty_sectors - : cached_sectors; - int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, - bucket_gen, *bucket_data_type, *dst_sectors); - - if (ret) - return ret; - - *dst_sectors += sectors; - - if (!*dirty_sectors && !*cached_sectors) - *bucket_data_type = 0; - else if (*bucket_data_type != BCH_DATA_stripe) - *bucket_data_type = ptr_data_type; - - return 0; -} - -static int bch2_mark_pointer(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, - struct extent_ptr_decoded p, - s64 sectors, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bucket old, new, *g; - enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); - u8 bucket_data_type; - int ret = 0; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - percpu_down_read(&c->mark_lock); - g = PTR_GC_BUCKET(ca, &p.ptr); - bucket_lock(g); - old = *g; - - bucket_data_type = g->data_type; - ret = __mark_pointer(trans, k, &p.ptr, sectors, - data_type, g->gen, - &bucket_data_type, - &g->dirty_sectors, - &g->cached_sectors); - if (!ret) - g->data_type = bucket_data_type; - - new = *g; - bucket_unlock(g); - if (!ret) - bch2_dev_usage_update_m(c, ca, &old, &new); - percpu_up_read(&c->mark_lock); - - return ret; -} - -static int bch2_mark_stripe_ptr(struct btree_trans *trans, - struct bkey_s_c k, - struct bch_extent_stripe_ptr p, - enum bch_data_type data_type, - s64 sectors, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_replicas_padded r; - struct gc_stripe *m; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL); - if (!m) { - bch_err(c, "error allocating memory for gc_stripes, idx %llu", - (u64) p.idx); - return -BCH_ERR_ENOMEM_mark_stripe_ptr; - } - - mutex_lock(&c->ec_stripes_heap_lock); - - if (!m || !m->alive) { - mutex_unlock(&c->ec_stripes_heap_lock); - bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", - (u64) p.idx); - bch2_inconsistent_error(c); - return -EIO; - } - - m->block_sectors[p.block] += sectors; - - r = m->r; - mutex_unlock(&c->ec_stripes_heap_lock); - - r.e.data_type = data_type; - bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); - - return 0; -} - -static int __mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - u64 journal_seq = trans->journal_res.seq; - struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_replicas_padded r; - enum bch_data_type data_type = bkey_is_btree_ptr(k.k) - ? BCH_DATA_btree - : BCH_DATA_user; - s64 sectors = bkey_is_btree_ptr(k.k) - ? btree_sectors(c) - : k.k->size; - s64 dirty_sectors = 0; - bool stale; - int ret; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - r.e.data_type = data_type; - r.e.nr_devs = 0; - r.e.nr_required = 1; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors = ptr_disk_sectors(sectors, p); - - if (flags & BTREE_TRIGGER_OVERWRITE) - disk_sectors = -disk_sectors; - - ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags); - if (ret < 0) - return ret; - - stale = ret > 0; - - if (p.ptr.cached) { - if (!stale) { - ret = update_cached_sectors(c, k, p.ptr.dev, - disk_sectors, journal_seq, true); - if (ret) { - bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", - __func__); - return ret; - } - } - } else if (!p.has_ec) { - dirty_sectors += disk_sectors; - r.e.devs[r.e.nr_devs++] = p.ptr.dev; - } else { - ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type, - disk_sectors, flags); - if (ret) - return ret; - - /* - * There may be other dirty pointers in this extent, but - * if so they're not required for mounting if we have an - * erasure coded pointer in this extent: - */ - r.e.nr_required = 0; - } - } - - if (r.e.nr_devs) { - ret = bch2_update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); - if (ret) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, k); - bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); - printbuf_exit(&buf); - return ret; - } - } - - return 0; -} - -int bch2_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - return trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); -} - void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -967,45 +772,140 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, return -1; } -/* trans_mark: */ +/* KEY_TYPE_extent: */ -static inline int bch2_trans_mark_pointer(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - unsigned flags) +static int __mark_pointer(struct btree_trans *trans, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 *bucket_data_type, + u32 *dirty_sectors, u32 *cached_sectors) { - bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); - struct btree_iter iter; - struct bkey_i_alloc_v4 *a; - struct bpos bucket; - struct bch_backpointer bp; - s64 sectors; - int ret; - - bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp); - sectors = bp.bucket_len; - if (!insert) - sectors = -sectors; - - a = bch2_trans_start_alloc_update(trans, &iter, bucket); - if (IS_ERR(a)) - return PTR_ERR(a); - - ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type, - a->v.gen, &a->v.data_type, - &a->v.dirty_sectors, &a->v.cached_sectors) ?: - bch2_trans_update(trans, &iter, &a->k_i, 0); - bch2_trans_iter_exit(trans, &iter); + u32 *dst_sectors = !ptr->cached + ? dirty_sectors + : cached_sectors; + int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, + bucket_gen, *bucket_data_type, *dst_sectors); if (ret) return ret; - if (!p.ptr.cached) { - ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert); + *dst_sectors += sectors; + + if (!*dirty_sectors && !*cached_sectors) + *bucket_data_type = 0; + else if (*bucket_data_type != BCH_DATA_stripe) + *bucket_data_type = ptr_data_type; + + return 0; +} + +static int bch2_trigger_pointer(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + s64 *sectors, + unsigned flags) +{ + bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); + struct bpos bucket; + struct bch_backpointer bp; + + bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp); + *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len); + + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct btree_iter iter; + struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &iter, bucket); + int ret = PTR_ERR_OR_ZERO(a); if (ret) return ret; + + ret = __mark_pointer(trans, k, &p.ptr, *sectors, bp.data_type, + a->v.gen, &a->v.data_type, + &a->v.dirty_sectors, &a->v.cached_sectors) ?: + bch2_trans_update(trans, &iter, &a->k_i, 0); + bch2_trans_iter_exit(trans, &iter); + + if (ret) + return ret; + + if (!p.ptr.cached) { + ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert); + if (ret) + return ret; + } } + if (flags & BTREE_TRIGGER_GC) { + struct bch_fs *c = trans->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); + enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); + + percpu_down_read(&c->mark_lock); + struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + bucket_lock(g); + struct bucket old = *g; + + u8 bucket_data_type = g->data_type; + int ret = __mark_pointer(trans, k, &p.ptr, *sectors, + data_type, g->gen, + &bucket_data_type, + &g->dirty_sectors, + &g->cached_sectors); + if (ret) { + bucket_unlock(g); + percpu_up_read(&c->mark_lock); + return ret; + } + + g->data_type = bucket_data_type; + struct bucket new = *g; + bucket_unlock(g); + bch2_dev_usage_update_m(c, ca, &old, &new); + percpu_up_read(&c->mark_lock); + } + + return 0; +} + +static int bch2_mark_stripe_ptr(struct btree_trans *trans, + struct bkey_s_c k, + struct bch_extent_stripe_ptr p, + enum bch_data_type data_type, + s64 sectors, + unsigned flags) +{ + struct bch_fs *c = trans->c; + struct bch_replicas_padded r; + struct gc_stripe *m; + + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL); + if (!m) { + bch_err(c, "error allocating memory for gc_stripes, idx %llu", + (u64) p.idx); + return -BCH_ERR_ENOMEM_mark_stripe_ptr; + } + + mutex_lock(&c->ec_stripes_heap_lock); + + if (!m || !m->alive) { + mutex_unlock(&c->ec_stripes_heap_lock); + bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", + (u64) p.idx); + bch2_inconsistent_error(c); + return -EIO; + } + + m->block_sectors[p.block] += sectors; + + r = m->r; + mutex_unlock(&c->ec_stripes_heap_lock); + + r.e.data_type = data_type; + bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); + return 0; } @@ -1049,10 +949,11 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, return ret; } -static int __trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +static int __mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { + u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1061,11 +962,86 @@ static int __trans_mark_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - s64 sectors = bkey_is_btree_ptr(k.k) - ? btree_sectors(c) - : k.k->size; s64 dirty_sectors = 0; - bool stale; + int ret; + + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + r.e.data_type = data_type; + r.e.nr_devs = 0; + r.e.nr_required = 1; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + s64 disk_sectors; + ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags); + if (ret < 0) + return ret; + + bool stale = ret > 0; + + if (p.ptr.cached) { + if (!stale) { + ret = update_cached_sectors(c, k, p.ptr.dev, + disk_sectors, journal_seq, true); + if (ret) { + bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", + __func__); + return ret; + } + } + } else if (!p.has_ec) { + dirty_sectors += disk_sectors; + r.e.devs[r.e.nr_devs++] = p.ptr.dev; + } else { + ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type, + disk_sectors, flags); + if (ret) + return ret; + + /* + * There may be other dirty pointers in this extent, but + * if so they're not required for mounting if we have an + * erasure coded pointer in this extent: + */ + r.e.nr_required = 0; + } + } + + if (r.e.nr_devs) { + ret = bch2_update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); + if (ret) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); + printbuf_exit(&buf); + return ret; + } + } + + return 0; +} + +int bch2_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) +{ + return trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); +} + +static int __trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bch_replicas_padded r; + enum bch_data_type data_type = bkey_is_btree_ptr(k.k) + ? BCH_DATA_btree + : BCH_DATA_user; + s64 dirty_sectors = 0; int ret = 0; r.e.data_type = data_type; @@ -1073,16 +1049,12 @@ static int __trans_mark_extent(struct btree_trans *trans, r.e.nr_required = 1; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors = ptr_disk_sectors(sectors, p); - - if (flags & BTREE_TRIGGER_OVERWRITE) - disk_sectors = -disk_sectors; - - ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags); + s64 disk_sectors; + ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags); if (ret < 0) return ret; - stale = ret > 0; + bool stale = ret > 0; if (p.ptr.cached) { if (!stale) { @@ -1104,10 +1076,13 @@ static int __trans_mark_extent(struct btree_trans *trans, } } - if (r.e.nr_devs) + if (r.e.nr_devs) { ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors); + if (ret) + return ret; + } - return ret; + return 0; } int bch2_trans_mark_extent(struct btree_trans *trans, From d55ddf6e7a81e1e72b2f73c3cca836a6961c68af Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 31 Dec 2023 19:41:45 -0500 Subject: [PATCH 202/226] bcachefs: Online fsck can now fix errors BCH_FS_fsck_done -> BCH_FS_fsck_running; set when we might be fixing fsck errors. Also; set fix_errors to ask by default when fsck is running. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 5 +---- fs/bcachefs/chardev.c | 11 ++++++++++- fs/bcachefs/error.c | 8 ++++---- fs/bcachefs/recovery.c | 7 +++++-- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index ffef6182a477..c7ed097e41a7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -602,9 +602,6 @@ struct bch_dev { }; /* - * fsck_done - kill? - * - * replace with something more general from enumated fsck passes/errors: * initial_gc_unfixed * error * topology error @@ -620,7 +617,7 @@ struct bch_dev { x(going_ro) \ x(write_disable_complete) \ x(clean_shutdown) \ - x(fsck_done) \ + x(fsck_running) \ x(initial_gc_unfixed) \ x(need_another_gc) \ x(need_delete_dead_snapshots) \ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 46db563e0497..226b39c17667 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -774,15 +774,24 @@ static int bch2_fsck_online_thread_fn(void *arg) /* * XXX: can we figure out a way to do this without mucking with c->opts? */ + unsigned old_fix_errors = c->opts.fix_errors; if (opt_defined(thr->opts, fix_errors)) c->opts.fix_errors = thr->opts.fix_errors; + else + c->opts.fix_errors = FSCK_FIX_ask; + c->opts.fsck = true; + set_bit(BCH_FS_fsck_running, &c->flags); c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; - bch2_run_online_recovery_passes(c); + int ret = bch2_run_online_recovery_passes(c); + + clear_bit(BCH_FS_fsck_running, &c->flags); + bch_err_fn(c, ret); c->stdio = NULL; c->stdio_filter = NULL; + c->opts.fix_errors = old_fix_errors; thread_with_stdio_done(&thr->thr); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 8a8bcbcdff2a..e8200cad3ab8 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -28,7 +28,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) void bch2_topology_error(struct bch_fs *c) { set_bit(BCH_FS_topology_error, &c->flags); - if (test_bit(BCH_FS_fsck_done, &c->flags)) + if (!test_bit(BCH_FS_fsck_running, &c->flags)) bch2_inconsistent_error(c); } @@ -141,7 +141,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) { struct fsck_err_state *s; - if (test_bit(BCH_FS_fsck_done, &c->flags)) + if (!test_bit(BCH_FS_fsck_running, &c->flags)) return NULL; list_for_each_entry(s, &c->fsck_error_msgs, list) @@ -223,7 +223,7 @@ int bch2_fsck_err(struct bch_fs *c, prt_printf(out, bch2_log_msg(c, "")); #endif - if (test_bit(BCH_FS_fsck_done, &c->flags)) { + if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); @@ -290,7 +290,7 @@ int bch2_fsck_err(struct bch_fs *c, bch2_print_string_as_lines(KERN_ERR, out->buf); } - if (!test_bit(BCH_FS_fsck_done, &c->flags) && + if (test_bit(BCH_FS_fsck_running, &c->flags) && (ret != -BCH_ERR_fsck_fix && ret != -BCH_ERR_fsck_ignore)) bch_err(c, "Unable to continue, halting"); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 066aef6f3c74..6c0af6502ce9 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -832,6 +832,9 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + if (c->opts.fsck) + set_bit(BCH_FS_fsck_running, &c->flags); + ret = bch2_blacklist_table_initialize(c); if (ret) { bch_err(c, "error initializing blacklist table"); @@ -972,6 +975,8 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; + clear_bit(BCH_FS_fsck_running, &c->flags); + /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && test_bit(BCH_FS_errors_fixed, &c->flags) && @@ -1066,7 +1071,6 @@ int bch2_fs_recovery(struct bch_fs *c) ret = 0; out: - set_bit(BCH_FS_fsck_done, &c->flags); bch2_flush_fsck_errs(c); if (!c->opts.keep_journal && @@ -1113,7 +1117,6 @@ int bch2_fs_initialize(struct bch_fs *c) c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); set_bit(BCH_FS_may_go_rw, &c->flags); - set_bit(BCH_FS_fsck_done, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); From 5a82ec3feaaf8be8f566fd535bac5dca4a00dfc7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 19:08:54 -0500 Subject: [PATCH 203/226] bcachefs: bch2_trigger_stripe_ptr() Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 136 ++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 71 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4c55945c4f19..e434df606656 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -868,87 +868,83 @@ static int bch2_trigger_pointer(struct btree_trans *trans, return 0; } -static int bch2_mark_stripe_ptr(struct btree_trans *trans, +static int bch2_trigger_stripe_ptr(struct btree_trans *trans, struct bkey_s_c k, - struct bch_extent_stripe_ptr p, + struct extent_ptr_decoded p, enum bch_data_type data_type, - s64 sectors, - unsigned flags) + s64 sectors, unsigned flags) { - struct bch_fs *c = trans->c; - struct bch_replicas_padded r; - struct gc_stripe *m; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct btree_iter iter; + struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_stripes, POS(0, p.ec.idx), + BTREE_ITER_WITH_UPDATES, stripe); + int ret = PTR_ERR_OR_ZERO(s); + if (unlikely(ret)) { + bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, + "pointer to nonexistent stripe %llu", + (u64) p.ec.idx); + goto err; + } - BUG_ON(!(flags & BTREE_TRIGGER_GC)); + if (!bch2_ptr_matches_stripe(&s->v, p)) { + bch2_trans_inconsistent(trans, + "stripe pointer doesn't match stripe %llu", + (u64) p.ec.idx); + ret = -EIO; + goto err; + } - m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL); - if (!m) { - bch_err(c, "error allocating memory for gc_stripes, idx %llu", - (u64) p.idx); - return -BCH_ERR_ENOMEM_mark_stripe_ptr; + stripe_blockcount_set(&s->v, p.ec.block, + stripe_blockcount_get(&s->v, p.ec.block) + + sectors); + + struct bch_replicas_padded r; + bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); + r.e.data_type = data_type; + ret = bch2_update_replicas_list(trans, &r.e, sectors); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; } - mutex_lock(&c->ec_stripes_heap_lock); + if (flags & BTREE_TRIGGER_GC) { + struct bch_fs *c = trans->c; - if (!m || !m->alive) { + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL); + if (!m) { + bch_err(c, "error allocating memory for gc_stripes, idx %llu", + (u64) p.ec.idx); + return -BCH_ERR_ENOMEM_mark_stripe_ptr; + } + + mutex_lock(&c->ec_stripes_heap_lock); + + if (!m || !m->alive) { + mutex_unlock(&c->ec_stripes_heap_lock); + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, k); + bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s", + (u64) p.ec.idx, buf.buf); + printbuf_exit(&buf); + bch2_inconsistent_error(c); + return -EIO; + } + + m->block_sectors[p.ec.block] += sectors; + + struct bch_replicas_padded r = m->r; mutex_unlock(&c->ec_stripes_heap_lock); - bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", - (u64) p.idx); - bch2_inconsistent_error(c); - return -EIO; + + r.e.data_type = data_type; + bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); } - m->block_sectors[p.block] += sectors; - - r = m->r; - mutex_unlock(&c->ec_stripes_heap_lock); - - r.e.data_type = data_type; - bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); - return 0; } -static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, - struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) -{ - struct btree_iter iter; - struct bkey_i_stripe *s; - struct bch_replicas_padded r; - int ret = 0; - - s = bch2_bkey_get_mut_typed(trans, &iter, - BTREE_ID_stripes, POS(0, p.ec.idx), - BTREE_ITER_WITH_UPDATES, stripe); - ret = PTR_ERR_OR_ZERO(s); - if (unlikely(ret)) { - bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, - "pointer to nonexistent stripe %llu", - (u64) p.ec.idx); - goto err; - } - - if (!bch2_ptr_matches_stripe(&s->v, p)) { - bch2_trans_inconsistent(trans, - "stripe pointer doesn't match stripe %llu", - (u64) p.ec.idx); - ret = -EIO; - goto err; - } - - stripe_blockcount_set(&s->v, p.ec.block, - stripe_blockcount_get(&s->v, p.ec.block) + - sectors); - - bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); - r.e.data_type = data_type; - ret = bch2_update_replicas_list(trans, &r.e, sectors); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - static int __mark_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, unsigned flags) @@ -993,8 +989,7 @@ static int __mark_extent(struct btree_trans *trans, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type, - disk_sectors, flags); + ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); if (ret) return ret; @@ -1067,8 +1062,7 @@ static int __trans_mark_extent(struct btree_trans *trans, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - ret = bch2_trans_mark_stripe_ptr(trans, p, - disk_sectors, data_type); + ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); if (ret) return ret; From 4f9ec59f8fd658d832460aa25c145bdecfdbaa2d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 02:11:00 -0500 Subject: [PATCH 204/226] bcachefs: unify extent trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 138 +++++++++++++----------------------------- fs/bcachefs/buckets.h | 5 +- fs/bcachefs/extents.h | 12 ++-- fs/bcachefs/reflink.c | 2 +- fs/bcachefs/reflink.h | 2 +- 5 files changed, 51 insertions(+), 108 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e434df606656..67b7e79648b1 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -945,90 +945,12 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, return 0; } -static int __mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +static int __trigger_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { - u64 journal_seq = trans->journal_res.seq; + bool gc = flags & BTREE_TRIGGER_GC; struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_replicas_padded r; - enum bch_data_type data_type = bkey_is_btree_ptr(k.k) - ? BCH_DATA_btree - : BCH_DATA_user; - s64 dirty_sectors = 0; - int ret; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - r.e.data_type = data_type; - r.e.nr_devs = 0; - r.e.nr_required = 1; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors; - ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags); - if (ret < 0) - return ret; - - bool stale = ret > 0; - - if (p.ptr.cached) { - if (!stale) { - ret = update_cached_sectors(c, k, p.ptr.dev, - disk_sectors, journal_seq, true); - if (ret) { - bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", - __func__); - return ret; - } - } - } else if (!p.has_ec) { - dirty_sectors += disk_sectors; - r.e.devs[r.e.nr_devs++] = p.ptr.dev; - } else { - ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); - if (ret) - return ret; - - /* - * There may be other dirty pointers in this extent, but - * if so they're not required for mounting if we have an - * erasure coded pointer in this extent: - */ - r.e.nr_required = 0; - } - } - - if (r.e.nr_devs) { - ret = bch2_update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); - if (ret) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, k); - bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); - printbuf_exit(&buf); - return ret; - } - } - - return 0; -} - -int bch2_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - return trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); -} - -static int __trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1053,8 +975,11 @@ static int __trans_mark_extent(struct btree_trans *trans, if (p.ptr.cached) { if (!stale) { - ret = bch2_update_cached_sectors_list(trans, p.ptr.dev, - disk_sectors); + ret = !gc + ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors) + : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true); + bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors", + __func__); if (ret) return ret; } @@ -1066,12 +991,26 @@ static int __trans_mark_extent(struct btree_trans *trans, if (ret) return ret; + /* + * There may be other dirty pointers in this extent, but + * if so they're not required for mounting if we have an + * erasure coded pointer in this extent: + */ r.e.nr_required = 0; } } if (r.e.nr_devs) { - ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors); + ret = !gc + ? bch2_update_replicas_list(trans, &r.e, dirty_sectors) + : bch2_update_replicas(c, k, &r.e, dirty_sectors, 0, true); + if (unlikely(ret && gc)) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); + printbuf_exit(&buf); + } if (ret) return ret; } @@ -1079,22 +1018,27 @@ static int __trans_mark_extent(struct btree_trans *trans, return 0; } -int bch2_trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) +int bch2_trigger_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { - struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - - (int) bch2_bkey_needs_rebalance(c, old); + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct bch_fs *c = trans->c; + int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - + (int) bch2_bkey_needs_rebalance(c, old); - if (mod) { - int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0); - if (ret) - return ret; + if (mod) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0); + if (ret) + return ret; + } } - return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); + if (flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC)) + return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree_id, level, old, new, flags); + + return 0; } /* KEY_TYPE_reservation */ diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 3c0b868cf858..2c95cc5d86be 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -340,12 +340,11 @@ int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, size_t, enum bch_data_type, unsigned, struct gc_pos, unsigned); -int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); -int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, unsigned); #define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 374ce357e8bb..b59a8b1c4678 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -415,8 +415,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .key_invalid = bch2_btree_ptr_invalid, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trans_mark_extent, \ - .atomic_trigger = bch2_mark_extent, \ + .trans_trigger = bch2_trigger_extent, \ + .atomic_trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ @@ -424,8 +424,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ - .trans_trigger = bch2_trans_mark_extent, \ - .atomic_trigger = bch2_mark_extent, \ + .trans_trigger = bch2_trigger_extent, \ + .atomic_trigger = bch2_trigger_extent, \ .min_val_size = 40, \ }) @@ -439,8 +439,8 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ .key_merge = bch2_extent_merge, \ - .trans_trigger = bch2_trans_mark_extent, \ - .atomic_trigger = bch2_mark_extent, \ + .trans_trigger = bch2_trigger_extent, \ + .atomic_trigger = bch2_trigger_extent, \ }) /* KEY_TYPE_reservation: */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 74e08a9d7b02..bab1373f05ef 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -308,7 +308,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, bkey_val_bytes(new.k) - 8)) return 0; - return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); + return bch2_trigger_extent(trans, btree_id, level, old, new, flags); } /* indirect inline data */ diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index fbb34f468f71..8ca76deebabf 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -33,7 +33,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trans_trigger = bch2_trans_mark_reflink_v, \ - .atomic_trigger = bch2_mark_extent, \ + .atomic_trigger = bch2_trigger_extent, \ .min_val_size = 8, \ }) From f0431c5f474643a0dbe9f3c288480422abd16179 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 31 Dec 2023 21:01:06 -0500 Subject: [PATCH 205/226] bcachefs: Combine .trans_trigger, .atomic_trigger Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 12 +++---- fs/bcachefs/bkey_methods.h | 50 +++++++++++------------------ fs/bcachefs/btree_gc.c | 2 +- fs/bcachefs/btree_trans_commit.c | 31 ++++++++---------- fs/bcachefs/btree_update_interior.c | 19 +++++------ fs/bcachefs/ec.h | 3 +- fs/bcachefs/extents.h | 12 +++---- fs/bcachefs/inode.h | 9 ++---- fs/bcachefs/reflink.c | 4 ++- fs/bcachefs/reflink.h | 8 ++--- fs/bcachefs/snapshot.h | 2 +- 11 files changed, 61 insertions(+), 91 deletions(-) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index dbdf153e58ac..e7f7e842ee1b 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -182,24 +182,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v1_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trigger_alloc, \ - .atomic_trigger = bch2_trigger_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v2_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trigger_alloc, \ - .atomic_trigger = bch2_trigger_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v3_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trigger_alloc, \ - .atomic_trigger = bch2_trigger_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) @@ -207,8 +204,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .key_invalid = bch2_alloc_v4_invalid, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ - .trans_trigger = bch2_trigger_alloc, \ - .atomic_trigger = bch2_trigger_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index d8f42ba1cd0f..ee82283722b7 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -28,10 +28,8 @@ struct bkey_ops { void (*swab)(struct bkey_s); bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); - int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); - int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, unsigned); + int (*trigger)(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); void (*compat)(enum btree_id id, unsigned version, unsigned big_endian, int write, struct bkey_s); @@ -78,18 +76,6 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -static inline int bch2_mark_key(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) -{ - const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); - - return ops->atomic_trigger - ? ops->atomic_trigger(trans, btree, level, old, new, flags) - : 0; -} - enum btree_update_flags { __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, __BTREE_UPDATE_NOJOURNAL, @@ -134,42 +120,42 @@ enum btree_update_flags { /* signal from bucket invalidate path to alloc trigger */ #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) -static inline int bch2_trans_mark_key(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, - unsigned flags) +static inline int bch2_key_trigger(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); - return ops->trans_trigger - ? ops->trans_trigger(trans, btree_id, level, old, new, flags|BTREE_TRIGGER_TRANSACTIONAL) + return ops->trigger + ? ops->trigger(trans, btree, level, old, new, flags) : 0; } -static inline int bch2_trans_mark_old(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, unsigned flags) +static inline int bch2_key_trigger_old(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, unsigned flags) { struct bkey_i deleted; bkey_init(&deleted.k); deleted.k.p = old.k->p; - return bch2_trans_mark_key(trans, btree_id, level, old, bkey_i_to_s(&deleted), - BTREE_TRIGGER_OVERWRITE|flags); + return bch2_key_trigger(trans, btree_id, level, old, bkey_i_to_s(&deleted), + BTREE_TRIGGER_OVERWRITE|flags); } -static inline int bch2_trans_mark_new(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s new, unsigned flags) +static inline int bch2_key_trigger_new(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s new, unsigned flags) { struct bkey_i deleted; bkey_init(&deleted.k); deleted.k.p = new.k->p; - return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, - BTREE_TRIGGER_INSERT|flags); + return bch2_key_trigger(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, + BTREE_TRIGGER_INSERT|flags); } void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7858c4cccf3d..49b4ade758c3 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -834,7 +834,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, } ret = commit_do(trans, NULL, NULL, 0, - bch2_mark_key(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); + bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); fsck_err: err: bch_err_fn(c, ret); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 5b5cc3f44a82..90eb8065ff2d 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -451,20 +451,15 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) return 0; - if (old_ops->atomic_trigger == new_ops->atomic_trigger) { - ret = bch2_mark_key(trans, i->btree_id, i->level, + if (old_ops->trigger == new_ops->trigger) { + ret = bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); } else { - struct bkey _deleted = POS_KEY((trans->paths + i->path)->pos); - struct bkey_s deleted = (struct bkey_s) { { { &_deleted, NULL } } }; - - ret = bch2_mark_key(trans, i->btree_id, i->level, - deleted.s_c, bkey_i_to_s(new), - BTREE_TRIGGER_INSERT|flags) ?: - bch2_mark_key(trans, i->btree_id, i->level, - old, deleted, - BTREE_TRIGGER_OVERWRITE|flags); + ret = bch2_key_trigger_new(trans, i->btree_id, i->level, + bkey_i_to_s(new), flags) ?: + bch2_key_trigger_old(trans, i->btree_id, i->level, + old, flags); } return ret; @@ -482,6 +477,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ struct bkey_s_c old = { &old_k, i->old_v }; const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); + unsigned flags = i->flags|BTREE_TRIGGER_TRANSACTIONAL; verify_update_old_key(trans, i); @@ -491,19 +487,18 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ if (!i->insert_trigger_run && !i->overwrite_trigger_run && - old_ops->trans_trigger == new_ops->trans_trigger) { + old_ops->trigger == new_ops->trigger) { i->overwrite_trigger_run = true; i->insert_trigger_run = true; - return bch2_trans_mark_key(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), - BTREE_TRIGGER_INSERT| - BTREE_TRIGGER_OVERWRITE| - i->flags) ?: 1; + return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), + BTREE_TRIGGER_INSERT| + BTREE_TRIGGER_OVERWRITE|flags) ?: 1; } else if (overwrite && !i->overwrite_trigger_run) { i->overwrite_trigger_run = true; - return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1; + return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1; } else if (!overwrite && !i->insert_trigger_run) { i->insert_trigger_run = true; - return bch2_trans_mark_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), i->flags) ?: 1; + return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1; } else { return 0; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 7447949abbb0..44f9dfa28a09 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -568,7 +568,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, for_each_keylist_key(&as->old_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0); + ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k), + BTREE_TRIGGER_TRANSACTIONAL); if (ret) return ret; } @@ -576,7 +577,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, for_each_keylist_key(&as->new_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_trans_mark_new(trans, as->btree_id, level, bkey_i_to_s(k), 0); + ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k), + BTREE_TRIGGER_TRANSACTIONAL); if (ret) return ret; } @@ -2156,13 +2158,12 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, int ret; if (!skip_triggers) { - ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s_c(&b->key), 0); - if (ret) - return ret; - - ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s(new_key), 0); + ret = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s_c(&b->key), + BTREE_TRIGGER_TRANSACTIONAL) ?: + bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s(new_key), + BTREE_TRIGGER_TRANSACTIONAL); if (ret) return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 94efb8d9beb9..f4369b02e805 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -19,8 +19,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, .key_invalid = bch2_stripe_invalid, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trigger_stripe, \ - .atomic_trigger = bch2_trigger_stripe, \ + .trigger = bch2_trigger_stripe, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index b59a8b1c4678..a855c94d43dd 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -415,8 +415,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .key_invalid = bch2_btree_ptr_invalid, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trigger_extent, \ - .atomic_trigger = bch2_trigger_extent, \ + .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ @@ -424,8 +423,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ - .trans_trigger = bch2_trigger_extent, \ - .atomic_trigger = bch2_trigger_extent, \ + .trigger = bch2_trigger_extent, \ .min_val_size = 40, \ }) @@ -439,8 +437,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ .key_merge = bch2_extent_merge, \ - .trans_trigger = bch2_trigger_extent, \ - .atomic_trigger = bch2_trigger_extent, \ + .trigger = bch2_trigger_extent, \ }) /* KEY_TYPE_reservation: */ @@ -454,8 +451,7 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .key_invalid = bch2_reservation_invalid, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ - .trans_trigger = bch2_trigger_reservation, \ - .atomic_trigger = bch2_trigger_reservation, \ + .trigger = bch2_trigger_reservation, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index fed552e0140a..b63f312581cf 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -23,24 +23,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, #define bch2_bkey_ops_inode ((struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trigger_inode, \ - .atomic_trigger = bch2_trigger_inode, \ + .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ .key_invalid = bch2_inode_v2_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trigger_inode, \ - .atomic_trigger = bch2_trigger_inode, \ + .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ .key_invalid = bch2_inode_v3_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trigger_inode, \ - .atomic_trigger = bch2_trigger_inode, \ + .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index bab1373f05ef..e1d1bcb2a475 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -298,7 +298,9 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, unsigned flags) { - check_indirect_extent_deleting(new, &flags); + if ((flags & BTREE_TRIGGER_TRANSACTIONAL) && + (flags & BTREE_TRIGGER_INSERT)) + check_indirect_extent_deleting(new, &flags); if (old.k->type == KEY_TYPE_reflink_v && new.k->type == KEY_TYPE_reflink_v && diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 8ca76deebabf..8ee778ec0022 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -16,8 +16,7 @@ int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, .key_invalid = bch2_reflink_p_invalid, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ - .trans_trigger = bch2_trigger_reflink_p, \ - .atomic_trigger = bch2_trigger_reflink_p, \ + .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) @@ -32,8 +31,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, .key_invalid = bch2_reflink_v_invalid, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trans_mark_reflink_v, \ - .atomic_trigger = bch2_trigger_extent, \ + .trigger = bch2_trans_mark_reflink_v, \ .min_val_size = 8, \ }) @@ -49,7 +47,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *, #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ .key_invalid = bch2_indirect_inline_data_invalid, \ .val_to_text = bch2_indirect_inline_data_to_text, \ - .trans_trigger = bch2_trans_mark_indirect_inline_data, \ + .trigger = bch2_trans_mark_indirect_inline_data, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index bf5b200d189b..7c66ffc06385 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -27,7 +27,7 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ .key_invalid = bch2_snapshot_invalid, \ .val_to_text = bch2_snapshot_to_text, \ - .atomic_trigger = bch2_mark_snapshot, \ + .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ }) From 8a0dda6fd6b7af2a7aa797762148b8ecb8f47a67 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 13:44:43 -0500 Subject: [PATCH 206/226] bcachefs: kill useless return ret Signed-off-by: Kent Overstreet --- fs/bcachefs/debug.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index de5bfc0d4684..413992b229d0 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -930,8 +930,6 @@ void bch2_debug_exit(void) int __init bch2_debug_init(void) { - int ret = 0; - bch_debug = debugfs_create_dir("bcachefs", NULL); - return ret; + return 0; } From 49a5192c0e9cc1deac5130a82127b1e79db3fe20 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 15:21:45 -0500 Subject: [PATCH 207/226] bcachefs: Add an option to control btree node prefetching Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 6 ++++-- fs/bcachefs/opts.h | 8 +++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 7e5c797cfaf2..fa298289e016 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -897,7 +897,8 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, bch2_bkey_buf_reassemble(out, c, k); - if (flags & BTREE_ITER_PREFETCH) + if ((flags & BTREE_ITER_PREFETCH) && + c->opts.btree_node_prefetch) ret = btree_path_prefetch_j(trans, path, &jiter); bch2_btree_and_journal_iter_exit(&jiter); @@ -929,7 +930,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans, bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - if (flags & BTREE_ITER_PREFETCH) { + if ((flags & BTREE_ITER_PREFETCH) && + c->opts.btree_node_prefetch) { ret = btree_path_prefetch(trans, path); if (ret) goto err; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index a6f5185ecd67..93a24fef4214 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -458,7 +458,13 @@ enum fsck_err_opts { OPT_UINT(0, BCH_REPLICAS_MAX), \ BCH2_NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ - "to have already been replicated n times") + "to have already been replicated n times") \ + x(btree_node_prefetch, u8, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, true, \ + NULL, "BTREE_ITER_PREFETCH casuse btree nodes to be\n"\ + " prefetched sequentially") struct bch_opts { #define x(_name, _bits, ...) unsigned _name##_defined:1; From b819f30855a67a82fab9e93828e672f266f20992 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 15:25:08 -0500 Subject: [PATCH 208/226] bcachefs: don't clear accessed bit in btree node fill Seeing strange performance issues that might be caused by memory pressure causing prefetched nodes to be evicted before they're used. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 9574c8c4d708..8e2488a4b58d 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -719,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, if (IS_ERR(b)) return b; - /* - * Btree nodes read in from disk should not have the accessed bit set - * initially, so that linear scans don't thrash the cache: - */ - clear_btree_node_accessed(b); - bkey_copy(&b->key, k); if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ From c72e4d7a30670b59dcf32fb79b2c814a09171912 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 16:42:33 -0500 Subject: [PATCH 209/226] bcachefs: add time_stats for btree_node_read_done() Seeing weird latency issues in the btree node read path - add one bch2_btree_node_read_done(). Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/btree_io.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index c7ed097e41a7..dac383e37181 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -425,6 +425,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(btree_node_merge) \ x(btree_node_sort) \ x(btree_node_read) \ + x(btree_node_read_done) \ x(btree_interior_update_foreground) \ x(btree_interior_update_total) \ x(btree_gc) \ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 38d27cae49ea..cf5089cc358e 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -942,6 +942,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned ptr_written = btree_ptr_sectors_written(&b->key); struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; + u64 start_time = local_clock(); b->version_ondisk = U16_MAX; /* We might get called multiple times on read retry: */ @@ -1209,6 +1210,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, out: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); return retry_read; fsck_err: if (ret == -BCH_ERR_btree_node_read_err_want_retry || From 4798bd2443bb426b3065c0cc84bbe0458d4bf72c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 17:15:55 -0500 Subject: [PATCH 210/226] bcachefs: increase max_active on io_complete_wq this definitely should _not_ be 1, and we don't actually want any concurrency limiting at all here - btree node read completions are getting blocked behind btree node write submissions. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 64ff7da49860..84798059bc21 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -868,7 +868,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || !(c->io_complete_wq = alloc_workqueue("bcachefs_io", - WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) || + WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 512)) || !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", WQ_FREEZABLE, 0)) || #ifndef BCH_WRITE_REF_DEBUG From e9bc59f9df96a439c009d3590581c3a6ff68fcbe Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 17:32:07 -0500 Subject: [PATCH 211/226] bcachefs: add missing bch2_latency_acct() call Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index cf5089cc358e..378579bbe2ed 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1647,7 +1647,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, if (sync) { submit_bio_wait(bio); - + bch2_latency_acct(ca, rb->start_time, READ); btree_node_read_work(&rb->work); } else { submit_bio(bio); From a64a37338d49a79647d332f1f510daafffeab4ad Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 20:34:46 -0500 Subject: [PATCH 212/226] bcachefs: Don't autofix errors we can't fix Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index e8200cad3ab8..d32c8bebe46c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -179,7 +179,8 @@ int bch2_fsck_err(struct bch_fs *c, struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; - if (test_bit(err, c->sb.errors_silent)) + if ((flags & FSCK_CAN_FIX) && + test_bit(err, c->sb.errors_silent)) return -BCH_ERR_fsck_fix; bch2_sb_error_count(c, err); From d641d4cae72a178334c0c712f701f62cebde18ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 21:46:50 -0500 Subject: [PATCH 213/226] bcachefs: no thread_with_file in userspace Signed-off-by: Kent Overstreet --- fs/bcachefs/thread_with_file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index b24baeabf998..b1c867aa2b58 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef NO_BCACHEFS_FS #include "bcachefs.h" #include "printbuf.h" @@ -294,3 +295,5 @@ int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t wake_up(&stdio->input_wait); return ret; } + +#endif /* NO_BCACHEFS_FS */ From 15eaaa4c315547c1ec871956ad0752cb9f4b14ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 21:01:37 -0500 Subject: [PATCH 214/226] bcachefs: Upgrades now specify errors to fix, like downgrades Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 82 ++++++++++--------------------- fs/bcachefs/recovery.c | 18 +++---- fs/bcachefs/sb-downgrade.c | 90 +++++++++++++++++++++++++++++++---- fs/bcachefs/sb-downgrade.h | 1 + fs/bcachefs/super-io.c | 22 +-------- fs/bcachefs/super-io.h | 4 -- 6 files changed, 117 insertions(+), 100 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index eb2df422ae5a..0d5ac4184fbc 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1672,71 +1672,41 @@ struct bch_sb_field_downgrade { #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) -#define RECOVERY_PASS_ALL_FSCK (1ULL << 63) - /* * field 1: version name * field 2: BCH_VERSION(major, minor) * field 3: recovery passess required on upgrade */ #define BCH_METADATA_VERSIONS() \ - x(bkey_renumber, BCH_VERSION(0, 10), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_btree_change, BCH_VERSION(0, 11), \ - RECOVERY_PASS_ALL_FSCK) \ - x(snapshot, BCH_VERSION(0, 12), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_backpointers, BCH_VERSION(0, 13), \ - RECOVERY_PASS_ALL_FSCK) \ - x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \ - RECOVERY_PASS_ALL_FSCK) \ - x(snapshot_2, BCH_VERSION(0, 15), \ - BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \ - BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \ - RECOVERY_PASS_ALL_FSCK) \ - x(reflink_p_fix, BCH_VERSION(0, 16), \ - BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \ - x(subvol_dirent, BCH_VERSION(0, 17), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_v2, BCH_VERSION(0, 18), \ - RECOVERY_PASS_ALL_FSCK) \ - x(freespace, BCH_VERSION(0, 19), \ - RECOVERY_PASS_ALL_FSCK) \ - x(alloc_v4, BCH_VERSION(0, 20), \ - RECOVERY_PASS_ALL_FSCK) \ - x(new_data_types, BCH_VERSION(0, 21), \ - RECOVERY_PASS_ALL_FSCK) \ - x(backpointers, BCH_VERSION(0, 22), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_v3, BCH_VERSION(0, 23), \ - RECOVERY_PASS_ALL_FSCK) \ - x(unwritten_extents, BCH_VERSION(0, 24), \ - RECOVERY_PASS_ALL_FSCK) \ - x(bucket_gens, BCH_VERSION(0, 25), \ - BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ - RECOVERY_PASS_ALL_FSCK) \ - x(lru_v2, BCH_VERSION(0, 26), \ - RECOVERY_PASS_ALL_FSCK) \ - x(fragmentation_lru, BCH_VERSION(0, 27), \ - RECOVERY_PASS_ALL_FSCK) \ - x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \ - RECOVERY_PASS_ALL_FSCK) \ - x(snapshot_trees, BCH_VERSION(0, 29), \ - RECOVERY_PASS_ALL_FSCK) \ - x(major_minor, BCH_VERSION(1, 0), \ - 0) \ - x(snapshot_skiplists, BCH_VERSION(1, 1), \ - BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ - x(deleted_inodes, BCH_VERSION(1, 2), \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ - x(rebalance_work, BCH_VERSION(1, 3), \ - BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \ - x(member_seq, BCH_VERSION(1, 4), \ - 0) + x(bkey_renumber, BCH_VERSION(0, 10)) \ + x(inode_btree_change, BCH_VERSION(0, 11)) \ + x(snapshot, BCH_VERSION(0, 12)) \ + x(inode_backpointers, BCH_VERSION(0, 13)) \ + x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \ + x(snapshot_2, BCH_VERSION(0, 15)) \ + x(reflink_p_fix, BCH_VERSION(0, 16)) \ + x(subvol_dirent, BCH_VERSION(0, 17)) \ + x(inode_v2, BCH_VERSION(0, 18)) \ + x(freespace, BCH_VERSION(0, 19)) \ + x(alloc_v4, BCH_VERSION(0, 20)) \ + x(new_data_types, BCH_VERSION(0, 21)) \ + x(backpointers, BCH_VERSION(0, 22)) \ + x(inode_v3, BCH_VERSION(0, 23)) \ + x(unwritten_extents, BCH_VERSION(0, 24)) \ + x(bucket_gens, BCH_VERSION(0, 25)) \ + x(lru_v2, BCH_VERSION(0, 26)) \ + x(fragmentation_lru, BCH_VERSION(0, 27)) \ + x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \ + x(snapshot_trees, BCH_VERSION(0, 29)) \ + x(major_minor, BCH_VERSION(1, 0)) \ + x(snapshot_skiplists, BCH_VERSION(1, 1)) \ + x(deleted_inodes, BCH_VERSION(1, 2)) \ + x(rebalance_work, BCH_VERSION(1, 3)) \ + x(member_seq, BCH_VERSION(1, 4)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, -#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n, +#define x(t, n) bcachefs_metadata_version_##t = n, BCH_METADATA_VERSIONS() #undef x bcachefs_metadata_version_max diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 6c0af6502ce9..e1f0da6a717e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -575,7 +575,7 @@ u64 bch2_recovery_passes_from_stable(u64 v) return ret; } -static u64 check_version_upgrade(struct bch_fs *c) +static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; @@ -624,15 +624,15 @@ static u64 check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, new_version); prt_newline(&buf); - u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); - if (recovery_passes) { - prt_str(&buf, " running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, recovery_passes); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_upgrade(c, old_version, new_version); + passes = ext->recovery_passes_required[0] & ~passes; - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - ext->recovery_passes_required[0] |= - cpu_to_le64(bch2_recovery_passes_to_stable(recovery_passes)); - c->opts.fix_errors = FSCK_FIX_yes; + if (passes) { + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } bch_info(c, "%s", buf.buf); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 4919237bbe73..b8e70108793a 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -12,33 +12,103 @@ #include "sb-errors.h" #include "super-io.h" +#define RECOVERY_PASS_ALL_FSCK BIT_ULL(63) + /* - * Downgrade table: - * When dowgrading past certain versions, we need to run certain recovery passes - * and fix certain errors: + * Upgrade, downgrade tables - run certain recovery passes, fix certain errors * * x(version, recovery_passes, errors...) */ +#define UPGRADE_TABLE() \ + x(backpointers, \ + RECOVERY_PASS_ALL_FSCK) \ + x(inode_v3, \ + RECOVERY_PASS_ALL_FSCK) \ + x(unwritten_extents, \ + RECOVERY_PASS_ALL_FSCK) \ + x(bucket_gens, \ + BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ + RECOVERY_PASS_ALL_FSCK) \ + x(lru_v2, \ + RECOVERY_PASS_ALL_FSCK) \ + x(fragmentation_lru, \ + RECOVERY_PASS_ALL_FSCK) \ + x(no_bps_in_alloc_keys, \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot_trees, \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot_skiplists, \ + BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ + x(deleted_inodes, \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ + BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ + x(rebalance_work, \ + BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) #define DOWNGRADE_TABLE() -struct downgrade_entry { +struct upgrade_downgrade_entry { u64 recovery_passes; u16 version; u16 nr_errors; const u16 *errors; }; -#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ }; -DOWNGRADE_TABLE() +#define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ }; +UPGRADE_TABLE() #undef x -static const struct downgrade_entry downgrade_table[] = { +static const struct upgrade_downgrade_entry upgrade_table[] = { #define x(ver, passes, ...) { \ .recovery_passes = passes, \ .version = bcachefs_metadata_version_##ver,\ - .nr_errors = ARRAY_SIZE(ver_##errors), \ - .errors = ver_##errors, \ + .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \ + .errors = upgrade_##ver##_errors, \ +}, +UPGRADE_TABLE() +#undef x +}; + +void bch2_sb_set_upgrade(struct bch_fs *c, + unsigned old_version, + unsigned new_version) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + for (const struct upgrade_downgrade_entry *i = upgrade_table; + i < upgrade_table + ARRAY_SIZE(upgrade_table); + i++) + if (i->version > old_version && i->version <= new_version) { + u64 passes = i->recovery_passes; + + if (passes & RECOVERY_PASS_ALL_FSCK) + passes |= bch2_fsck_recovery_passes(); + passes &= ~RECOVERY_PASS_ALL_FSCK; + + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(passes)); + + for (const u16 *e = i->errors; + e < i->errors + i->nr_errors; + e++) { + __set_bit(*e, c->sb.errors_silent); + ext->errors_silent[*e / 64] |= cpu_to_le64(BIT_ULL(*e % 64)); + } + } +} + +#define x(ver, passes, ...) static const u16 downgrade_ver_##errors[] = { __VA_ARGS__ }; +DOWNGRADE_TABLE() +#undef x + +static const struct upgrade_downgrade_entry downgrade_table[] = { +#define x(ver, passes, ...) { \ + .recovery_passes = passes, \ + .version = bcachefs_metadata_version_##ver,\ + .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \ + .errors = downgrade_##ver##_errors, \ }, DOWNGRADE_TABLE() #undef x @@ -118,7 +188,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) darray_char table = {}; int ret = 0; - for (const struct downgrade_entry *src = downgrade_table; + for (const struct upgrade_downgrade_entry *src = downgrade_table; src < downgrade_table + ARRAY_SIZE(downgrade_table); src++) { if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h index bc48fd2ca70e..57e6c916fc73 100644 --- a/fs/bcachefs/sb-downgrade.h +++ b/fs/bcachefs/sb-downgrade.h @@ -5,6 +5,7 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade; int bch2_sb_downgrade_update(struct bch_fs *); +void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned); void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); #endif /* _BCACHEFS_SB_DOWNGRADE_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index aee3c634f4af..427426bb326c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -30,14 +30,12 @@ static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { struct bch2_metadata_version { u16 version; const char *name; - u64 recovery_passes; }; static const struct bch2_metadata_version bch2_metadata_versions[] = { -#define x(n, v, _recovery_passes) { \ +#define x(n, v) { \ .version = v, \ .name = #n, \ - .recovery_passes = _recovery_passes, \ }, BCH_METADATA_VERSIONS() #undef x @@ -70,24 +68,6 @@ unsigned bch2_latest_compatible_version(unsigned v) return v; } -u64 bch2_upgrade_recovery_passes(struct bch_fs *c, - unsigned old_version, - unsigned new_version) -{ - u64 ret = 0; - - for (const struct bch2_metadata_version *i = bch2_metadata_versions; - i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions); - i++) - if (i->version > old_version && i->version <= new_version) { - if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK) - ret |= bch2_fsck_recovery_passes(); - ret |= i->recovery_passes; - } - - return ret &= ~RECOVERY_PASS_ALL_FSCK; -} - const char * const bch2_sb_fields[] = { #define x(name, nr) #name, BCH_SB_FIELDS() diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 654c5d4d522a..0541f655f37b 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -19,10 +19,6 @@ static inline bool bch2_version_compatible(u16 version) void bch2_version_to_text(struct printbuf *, unsigned); unsigned bch2_latest_compatible_version(unsigned); -u64 bch2_upgrade_recovery_passes(struct bch_fs *c, - unsigned, - unsigned); - static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) { return le32_to_cpu(f->u64s) * sizeof(u64); From 074cbcdaeee433a02d6d0565b936bee0915cc5da Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Jan 2024 23:29:02 -0500 Subject: [PATCH 215/226] bcachefs: fsck_err()s don't need to manually check c->sb.version anymore Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 --- fs/bcachefs/backpointers.c | 3 +-- fs/bcachefs/fs-common.c | 36 ++++++++++++++-------------------- fs/bcachefs/fsck.c | 16 +++++++-------- fs/bcachefs/reflink.c | 15 +++++++------- fs/bcachefs/sb-downgrade.c | 4 +++- fs/bcachefs/sb-errors_types.h | 3 ++- fs/bcachefs/snapshot.c | 17 +++++++--------- 8 files changed, 42 insertions(+), 55 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index f7fd534b85ed..a09b9d00226a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1232,9 +1232,6 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, unsigned i, gens_offset, gens_end_offset; int ret; - if (c->sb.version < bcachefs_metadata_version_bucket_gens) - return 0; - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); k = bch2_btree_iter_peek_slot(bucket_gens_iter); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index a97fc2b61ee2..e358a2ffffde 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -467,8 +467,7 @@ static int check_bp_exists(struct btree_trans *trans, prt_printf(&buf, "\nbp pos "); bch2_bpos_to_text(&buf, bp_iter.pos); - if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || - c->opts.reconstruct_alloc || + if (c->opts.reconstruct_alloc || fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 4496cf91a4c1..1c1ea0f0c692 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -166,10 +166,8 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - new_inode->bi_dir = dir_u->bi_inum; - new_inode->bi_dir_offset = dir_offset; - } + new_inode->bi_dir = dir_u->bi_inum; + new_inode->bi_dir_offset = dir_offset; } inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS; @@ -228,10 +226,8 @@ int bch2_link_trans(struct btree_trans *trans, if (ret) goto err; - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - inode_u->bi_dir = dir.inum; - inode_u->bi_dir_offset = dir_offset; - } + inode_u->bi_dir = dir.inum; + inode_u->bi_dir_offset = dir_offset; ret = bch2_inode_write(trans, &dir_iter, dir_u) ?: bch2_inode_write(trans, &inode_iter, inode_u); @@ -414,21 +410,19 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - src_inode_u->bi_dir = dst_dir_u->bi_inum; - src_inode_u->bi_dir_offset = dst_offset; + src_inode_u->bi_dir = dst_dir_u->bi_inum; + src_inode_u->bi_dir_offset = dst_offset; - if (mode == BCH_RENAME_EXCHANGE) { - dst_inode_u->bi_dir = src_dir_u->bi_inum; - dst_inode_u->bi_dir_offset = src_offset; - } + if (mode == BCH_RENAME_EXCHANGE) { + dst_inode_u->bi_dir = src_dir_u->bi_inum; + dst_inode_u->bi_dir_offset = src_offset; + } - if (mode == BCH_RENAME_OVERWRITE && - dst_inode_u->bi_dir == dst_dir_u->bi_inum && - dst_inode_u->bi_dir_offset == src_offset) { - dst_inode_u->bi_dir = 0; - dst_inode_u->bi_dir_offset = 0; - } + if (mode == BCH_RENAME_OVERWRITE && + dst_inode_u->bi_dir == dst_dir_u->bi_inum && + dst_inode_u->bi_dir_offset == src_offset) { + dst_inode_u->bi_dir = 0; + dst_inode_u->bi_dir_offset = 0; } if (mode == BCH_RENAME_OVERWRITE) { diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index de1617ec1b59..4f0ecd605675 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -870,8 +870,7 @@ static int check_inode(struct btree_trans *trans, return 0; } - if (u.bi_flags & BCH_INODE_unlinked && - c->sb.version >= bcachefs_metadata_version_deleted_inodes) { + if (u.bi_flags & BCH_INODE_unlinked) { ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) return ret; @@ -1594,13 +1593,12 @@ static int check_dirent_target(struct btree_trans *trans, d = dirent_i_to_s_c(n); } - if (d.v->d_type == DT_SUBVOL && - target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) && - (c->sb.version < bcachefs_metadata_version_subvol_dirent || - fsck_err(c, dirent_d_parent_subvol_wrong, - "dirent has wrong d_parent_subvol field: got %u, should be %u", - le32_to_cpu(d.v->d_parent_subvol), - target->bi_parent_subvol))) { + if (fsck_err_on(d.v->d_type == DT_SUBVOL && + target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol), + c, dirent_d_parent_subvol_wrong, + "dirent has wrong d_parent_subvol field: got %u, should be %u", + le32_to_cpu(d.v->d_parent_subvol), + target->bi_parent_subvol)) { n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index e1d1bcb2a475..b24b71bc4e60 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -34,15 +34,14 @@ int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + int ret = 0; - if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix && - le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) { - prt_printf(err, "idx < front_pad (%llu < %u)", - le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); - return -EINVAL; - } - - return 0; + bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), + c, err, reflink_p_front_pad_bad, + "idx < front_pad (%llu < %u)", + le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); +fsck_err: + return ret; } void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index b8e70108793a..441dcb1bf160 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -38,7 +38,9 @@ x(snapshot_trees, \ RECOVERY_PASS_ALL_FSCK) \ x(snapshot_skiplists, \ - BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ + BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \ + BCH_FSCK_ERR_snapshot_bad_depth, \ + BCH_FSCK_ERR_snapshot_bad_skiplist) \ x(deleted_inodes, \ BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index e7be1f9bdaab..c08aacdfd073 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -249,7 +249,8 @@ x(dir_loop, 241) \ x(hash_table_key_duplicate, 242) \ x(hash_table_key_wrong_offset, 243) \ - x(unlinked_inode_not_on_deleted_list, 244) + x(unlinked_inode_not_on_deleted_list, 244) \ + x(reflink_p_front_pad_bad, 245) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b2306fabd1c9..56af937523ff 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -814,11 +814,10 @@ static int check_snapshot(struct btree_trans *trans, real_depth = bch2_snapshot_depth(c, parent_id); - if (le32_to_cpu(s.depth) != real_depth && - (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, snapshot_bad_depth, - "snapshot with incorrect depth field, should be %u:\n %s", - real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, + c, snapshot_bad_depth, + "snapshot with incorrect depth field, should be %u:\n %s", + real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) @@ -832,11 +831,9 @@ static int check_snapshot(struct btree_trans *trans, if (ret < 0) goto err; - if (!ret && - (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, snapshot_bad_skiplist, - "snapshot with bad skiplist field:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + if (fsck_err_on(!ret, c, snapshot_bad_skiplist, + "snapshot with bad skiplist field:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) From c13fbb7de2fc4fd61a44ebfa4ba182f35e0a3286 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 4 Jan 2024 18:59:17 -0500 Subject: [PATCH 216/226] bcachefs: Improve would_deadlock trace event We now include backtraces for every thread involved in the cycle. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_locking.c | 19 ++++++++++++------- fs/bcachefs/debug.c | 2 +- fs/bcachefs/trace.h | 26 +++++++++++++++++++++++--- fs/bcachefs/util.c | 8 ++++---- fs/bcachefs/util.h | 4 ++-- 5 files changed, 42 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 1ed8327a9fa2..2d1c95c42f24 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -86,8 +86,14 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) prt_printf(out, "Found lock cycle (%u entries):", g->nr); prt_newline(out); - for (i = g->g; i < g->g + g->nr; i++) + for (i = g->g; i < g->g + g->nr; i++) { + struct task_struct *task = READ_ONCE(i->trans->locking_wait.task); + if (!task) + continue; + bch2_btree_trans_to_text(out, i->trans); + bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1); + } } static noinline void print_chain(struct printbuf *out, struct lock_graph *g) @@ -144,8 +150,7 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g) return false; } -static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans, - unsigned long ip) +static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -157,7 +162,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans buf.atomic++; print_cycle(&buf, g); - trace_trans_restart_would_deadlock(trans, ip, buf.buf); + trace_trans_restart_would_deadlock(trans, buf.buf); printbuf_exit(&buf); } } @@ -165,7 +170,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { if (i == g->g) { - trace_would_deadlock(g, i->trans, _RET_IP_); + trace_would_deadlock(g, i->trans); return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; @@ -222,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_printf(&buf, "backtrace:"); prt_newline(&buf); printbuf_indent_add(&buf, 2); - bch2_prt_task_backtrace(&buf, trans->locking_wait.task); + bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2); printbuf_indent_sub(&buf, 2); prt_newline(&buf); } @@ -291,7 +296,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) if (cycle) return -1; - trace_would_deadlock(&g, trans, _RET_IP_); + trace_would_deadlock(&g, trans); return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 413992b229d0..d6418948495f 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -627,7 +627,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task); + bch2_prt_task_backtrace(&i->buf, task, 0); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 427edb3e7cd6..c94876b3bb06 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -72,6 +72,27 @@ DECLARE_EVENT_CLASS(trans_str, __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) ); +DECLARE_EVENT_CLASS(trans_str_nocaller, + TP_PROTO(struct btree_trans *trans, const char *str), + TP_ARGS(trans, str), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __array(char, trans_fn, 32 ) + __string(str, str ) + ), + + TP_fast_assign( + __entry->dev = trans->c->dev; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __assign_str(str, str); + ), + + TP_printk("%d,%d %s %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->trans_fn, __get_str(str)) +); + DECLARE_EVENT_CLASS(btree_node_nofs, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), @@ -1243,11 +1264,10 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(trans_str, trans_restart_would_deadlock, +DEFINE_EVENT(trans_str_nocaller, trans_restart_would_deadlock, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, const char *cycle), - TP_ARGS(trans, caller_ip, cycle) + TP_ARGS(trans, cycle) ); DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit, diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 2e4c5d9606de..c2ef7cddaa4f 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -267,7 +267,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) console_unlock(); } -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr) { #ifdef CONFIG_STACKTRACE unsigned nr_entries = 0; @@ -282,7 +282,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) return -1; do { - nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0); + nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); } while (nr_entries == stack->size && !(ret = darray_make_room(stack, stack->size * 2))); @@ -303,10 +303,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) } } -int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task) +int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr) { bch_stacktrace stack = { 0 }; - int ret = bch2_save_backtrace(&stack, task); + int ret = bch2_save_backtrace(&stack, task, skipnr + 1); bch2_prt_backtrace(out, &stack); darray_exit(&stack); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 4290e0a53b75..92acda29b581 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -347,9 +347,9 @@ void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); void bch2_print_string_as_lines(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *); +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); -int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *); +int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned); #define NR_QUANTILES 15 #define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) From 1f5af5fc178588ff1f1293b5ddadd4228ce5095e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 11:58:50 -0500 Subject: [PATCH 217/226] bcachefs: %pg is banished not portable to userspace Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 2 +- fs/bcachefs/super-io.c | 8 +++++-- fs/bcachefs/super.c | 49 ++++++++++++++++++++++++++++----------- fs/bcachefs/util.h | 9 +++++++ 4 files changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 1cd6ba8d0cce..06a7df529b40 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -557,7 +557,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) : NULL; if (ca && percpu_ref_tryget(&ca->io_ref)) { - prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); + prt_printf(out, "/dev/%s", ca->name); percpu_ref_put(&ca->io_ref); } else if (ca) { prt_printf(out, "offline device %u", t.dev); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 427426bb326c..042e08a92d4b 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -170,8 +170,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; if (new_bytes > max_bytes) { - pr_err("%pg: superblock too big: want %zu but have %llu", - sb->bdev, new_bytes, max_bytes); + struct printbuf buf = PRINTBUF; + + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); + pr_err("%s", buf.buf); + printbuf_exit(&buf); return -BCH_ERR_ENOSPC_sb; } } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 84798059bc21..9dbc35940197 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1083,17 +1083,22 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, fs->sb->write_time != sb->sb->write_time) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Split brain detected between %pg and %pg:", - sb->bdev, fs->bdev); + prt_str(&buf, "Split brain detected between "); + prt_bdevname(&buf, sb->bdev); + prt_str(&buf, " and "); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ':'); prt_newline(&buf); prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq)); prt_newline(&buf); - prt_printf(&buf, "%pg ", fs->bdev); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ' '); bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; prt_newline(&buf); - prt_printf(&buf, "%pg ", sb->bdev); + prt_bdevname(&buf, sb->bdev); + prt_char(&buf, ' '); bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; prt_newline(&buf); @@ -1109,13 +1114,26 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, u64 seq_from_member = le64_to_cpu(sb->sb->seq); if (seq_from_fs && seq_from_fs < seq_from_member) { - pr_err("Split brain detected between %pg and %pg:\n" - "%pg believes seq of %pg to be %llu, but %pg has %llu\n" - "Not using %pg", - sb->bdev, fs->bdev, - fs->bdev, sb->bdev, seq_from_fs, - sb->bdev, seq_from_member, - sb->bdev); + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Split brain detected between "); + prt_bdevname(&buf, sb->bdev); + prt_str(&buf, " and "); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ':'); + prt_newline(&buf); + + prt_bdevname(&buf, fs->bdev); + prt_str(&buf, "believes seq of "); + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, " to be %llu, but ", seq_from_fs); + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, " has %llu\n", seq_from_member); + prt_str(&buf, "Not using "); + prt_bdevname(&buf, sb->bdev); + + pr_err("%s", buf.buf); + printbuf_exit(&buf); return -BCH_ERR_device_splitbrain; } @@ -1364,9 +1382,14 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) bch2_dev_sysfs_online(c, ca); + struct printbuf name = PRINTBUF; + prt_bdevname(&name, ca->disk_sb.bdev); + if (c->sb.nr_devices == 1) - snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev); - snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev); + strlcpy(c->name, name.buf, sizeof(c->name)); + strlcpy(ca->name, name.buf, sizeof(ca->name)); + + printbuf_exit(&name); rebalance_wakeup(c); return 0; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 92acda29b581..c75fc31915d3 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -351,6 +351,15 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned); +static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev) +{ +#ifdef __KERNEL__ + prt_printf(out, "%pg", bdev); +#else + prt_str(out, bdev->name); +#endif +} + #define NR_QUANTILES 15 #define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) #define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES) From 5b883656605eb46457b7eeb51581c78477d254b6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 12:37:47 -0500 Subject: [PATCH 218/226] bcachefs: __bch2_sb_field_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 19 ++++++++++++------- fs/bcachefs/super-io.h | 2 ++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 042e08a92d4b..ea86921727b4 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1187,8 +1187,8 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, return ret; } -void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, - struct bch_sb_field *f) +void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) { unsigned type = le32_to_cpu(f->type); const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); @@ -1196,6 +1196,15 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, if (!out->nr_tabstops) printbuf_tabstop_push(out, 32); + if (ops->to_text) + ops->to_text(out, sb, f); +} + +void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + unsigned type = le32_to_cpu(f->type); + if (type < BCH_SB_FIELD_NR) prt_printf(out, "%s", bch2_sb_fields[type]); else @@ -1204,11 +1213,7 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, " (size %zu):", vstruct_bytes(f)); prt_newline(out); - if (ops->to_text) { - printbuf_indent_add(out, 2); - ops->to_text(out, sb, f); - printbuf_indent_sub(out, 2); - } + __bch2_sb_field_to_text(out, sb, f); } void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 0541f655f37b..95e80e06316b 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -93,6 +93,8 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) bool bch2_check_version_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned); +void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, + struct bch_sb_field *); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *); void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *); From 5e448c48932b578e7548d7df0f8bfb46eb29fbfc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 13:03:01 -0500 Subject: [PATCH 219/226] bcachefs: print sb magic when relevant Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index ea86921727b4..b05c5f1fe09d 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -628,7 +628,9 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) && !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) { - prt_printf(err, "Not a bcachefs superblock"); + prt_str(err, "Not a bcachefs superblock (got magic "); + pr_uuid(err, sb->sb->magic.b); + prt_str(err, ")"); return -BCH_ERR_invalid_sb_magic; } @@ -1261,6 +1263,11 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, pr_uuid(out, sb->uuid.b); prt_newline(out); + prt_printf(out, "Magic number:"); + prt_tab(out); + pr_uuid(out, sb->magic.b); + prt_newline(out); + prt_str(out, "Device index:"); prt_tab(out); prt_printf(out, "%u", sb->dev_idx); From 2d02bfb01b2743da06748ba396ff7da4425488ef Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 14:17:57 -0500 Subject: [PATCH 220/226] bcachefs: improve validate_bset_keys() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 75 +++++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 378579bbe2ed..250a2a59960f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -524,7 +524,8 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_printf(out, "at btree "); bch2_btree_pos_to_text(out, c, b); - prt_printf(out, "\n node offset %u", b->written); + prt_printf(out, "\n node offset %u/%u", + b->written, btree_ptr_sectors_written(&b->key)); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); prt_str(out, ": "); @@ -830,6 +831,23 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); } +static bool __bkey_valid(struct bch_fs *c, struct btree *b, + struct bset *i, struct bkey_packed *k) +{ + if (bkey_p_next(k) > vstruct_last(i)) + return false; + + if (k->format > KEY_FORMAT_CURRENT) + return false; + + struct printbuf buf = PRINTBUF; + struct bkey tmp; + struct bkey_s u = __bkey_disassemble(b, k, &tmp); + bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); + printbuf_exit(&buf); + return ret; +} + static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, bool have_retry, bool *saw_error) @@ -845,6 +863,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k != vstruct_last(i);) { struct bkey_s u; struct bkey tmp; + unsigned next_good_key; if (btree_err_on(bkey_p_next(k) > vstruct_last(i), -BCH_ERR_btree_node_read_err_fixable, @@ -859,12 +878,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_bad_format, - "invalid bkey format %u", k->format)) { - i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); - memmove_u64s_down(k, bkey_p_next(k), - (u64 *) vstruct_end(i) - (u64 *) k); - continue; - } + "invalid bkey format %u", k->format)) + goto drop_this_key; /* XXX: validate k->u64s */ if (!write) @@ -885,11 +900,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, c, NULL, b, i, btree_node_bad_bkey, "invalid bkey: %s", buf.buf); - - i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); - memmove_u64s_down(k, bkey_p_next(k), - (u64 *) vstruct_end(i) - (u64 *) k); - continue; + goto drop_this_key; } if (write) @@ -906,21 +917,45 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, prt_printf(&buf, " > "); bch2_bkey_to_text(&buf, u.k); - bch2_dump_bset(c, b, i, 0); - if (btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_out_of_order, - "%s", buf.buf)) { - i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); - memmove_u64s_down(k, bkey_p_next(k), - (u64 *) vstruct_end(i) - (u64 *) k); - continue; - } + "%s", buf.buf)) + goto drop_this_key; } prev = k; k = bkey_p_next(k); + continue; +drop_this_key: + next_good_key = k->u64s; + + if (!next_good_key || + (BSET_BIG_ENDIAN(i) == CPU_BIG_ENDIAN && + version >= bcachefs_metadata_version_snapshot)) { + /* + * only do scanning if bch2_bkey_compat() has nothing to + * do + */ + + if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { + for (next_good_key = 1; + next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; + next_good_key++) + if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) + goto got_good_key; + + } + + /* + * didn't find a good key, have to truncate the rest of + * the bset + */ + next_good_key = (u64 *) vstruct_last(i) - (u64 *) k; + } +got_good_key: + le16_add_cpu(&i->u64s, -next_good_key); + memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); } fsck_err: printbuf_exit(&buf); From 4819b66e29893828d3efa76ecdf1ede9e036db35 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 11:59:03 -0500 Subject: [PATCH 221/226] bcachefs: improve checksum error messages new helpers: - bch2_csum_to_text() - bch2_csum_err_msg() standardize our checksum error messages a bit, and print out the checksums a bit more nicely. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 18 ++++++++++------ fs/bcachefs/checksum.h | 23 ++++++++++++++++++++ fs/bcachefs/io_read.c | 11 +++++++--- fs/bcachefs/journal_io.c | 45 +++++++++++++++++++++++++++------------- fs/bcachefs/super-io.c | 10 ++++----- 5 files changed, 78 insertions(+), 29 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 250a2a59960f..33db48e2153f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1042,8 +1042,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, nonce = btree_nonce(i, b->written << 9); - csum_bad = bch2_crc_cmp(b->data->csum, - csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data)); + struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); + csum_bad = bch2_crc_cmp(b->data->csum, csum); if (csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); @@ -1051,7 +1051,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, bset_bad_csum, - "invalid checksum"); + "%s", + (printbuf_reset(&buf), + bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), + buf.buf)); ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, @@ -1080,8 +1083,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); - csum_bad = bch2_crc_cmp(bne->csum, - csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne)); + struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); + csum_bad = bch2_crc_cmp(bne->csum, csum); if (csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); @@ -1089,7 +1092,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, bset_bad_csum, - "invalid checksum"); + "%s", + (printbuf_reset(&buf), + bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), + buf.buf)); ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 13998388c545..1b8c2c1016dc 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -45,6 +45,29 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce, bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\ }) +static inline void bch2_csum_to_text(struct printbuf *out, + enum bch_csum_type type, + struct bch_csum csum) +{ + const u8 *p = (u8 *) &csum; + unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16; + + for (unsigned i = 0; i < bytes; i++) + prt_hex_byte(out, p[i]); +} + +static inline void bch2_csum_err_msg(struct printbuf *out, + enum bch_csum_type type, + struct bch_csum expected, + struct bch_csum got) +{ + prt_printf(out, "checksum error: got "); + bch2_csum_to_text(out, type, got); + prt_str(out, " should be "); + bch2_csum_to_text(out, type, expected); + prt_printf(out, " type %s", bch2_csum_types[type]); +} + int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); int bch2_request_key(struct bch_sb *, struct bch_key *); #ifndef __KERNEL__ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 5c2d118eaf6f..3c574d8873a1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -642,12 +642,17 @@ static void __bch2_read_endio(struct work_struct *work) goto out; } + struct printbuf buf = PRINTBUF; + buf.atomic++; + prt_str(&buf, "data "); + bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); + bch_err_inum_offset_ratelimited(ca, rbio->read_pos.inode, rbio->read_pos.offset << 9, - "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", - rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, - csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); + "data %s", buf.buf); + printbuf_exit(&buf); + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); goto out; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index c5bc58247146..b0f4dd491e12 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -27,11 +27,15 @@ static struct nonce journal_nonce(const struct jset *jset) }}; } -static bool jset_csum_good(struct bch_fs *c, struct jset *j) +static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum) { - return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) && - !bch2_crc_cmp(j->csum, - csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j)); + if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) { + *csum = (struct bch_csum) {}; + return false; + } + + *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j); + return !bch2_crc_cmp(j->csum, *csum); } static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) @@ -934,6 +938,7 @@ static int journal_read_bucket(struct bch_dev *ca, u64 offset = bucket_to_sector(ca, ja->buckets[bucket]), end = offset + ca->mi.bucket_size; bool saw_bad = false, csum_good; + struct printbuf err = PRINTBUF; int ret = 0; pr_debug("reading %u", bucket); @@ -966,7 +971,7 @@ static int journal_read_bucket(struct bch_dev *ca, * found on a different device, and missing or * no journal entries will be handled later */ - return 0; + goto out; } j = buf->data; @@ -983,12 +988,12 @@ static int journal_read_bucket(struct bch_dev *ca, ret = journal_read_buf_realloc(buf, vstruct_bytes(j)); if (ret) - return ret; + goto err; } goto reread; case JOURNAL_ENTRY_NONE: if (!saw_bad) - return 0; + goto out; /* * On checksum error we don't really trust the size * field of the journal entry we read, so try reading @@ -997,7 +1002,7 @@ static int journal_read_bucket(struct bch_dev *ca, sectors = block_sectors(c); goto next_block; default: - return ret; + goto err; } /* @@ -1007,20 +1012,28 @@ static int journal_read_bucket(struct bch_dev *ca, * bucket: */ if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket]) - return 0; + goto out; ja->bucket_seq[bucket] = le64_to_cpu(j->seq); - csum_good = jset_csum_good(c, j); + enum bch_csum_type csum_type = JSET_CSUM_TYPE(j); + struct bch_csum csum; + csum_good = jset_csum_good(c, j, &csum); + if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum, - "journal checksum error")) + "%s", + (printbuf_reset(&err), + prt_str(&err, "journal "), + bch2_csum_err_msg(&err, csum_type, j->csum, csum), + err.buf))) saw_bad = true; ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), j->encrypted_start, vstruct_end(j) - (void *) j->encrypted_start); bch2_fs_fatal_err_on(ret, c, - "error decrypting journal entry: %i", ret); + "error decrypting journal entry: %s", + bch2_err_str(ret)); mutex_lock(&jlist->lock); ret = journal_entry_add(c, ca, (struct journal_ptr) { @@ -1039,7 +1052,7 @@ static int journal_read_bucket(struct bch_dev *ca, case JOURNAL_ENTRY_ADD_OUT_OF_RANGE: break; default: - return ret; + goto err; } next_block: pr_debug("next"); @@ -1048,7 +1061,11 @@ static int journal_read_bucket(struct bch_dev *ca, j = ((void *) j) + (sectors << 9); } - return 0; +out: + ret = 0; +err: + printbuf_exit(&err); + return ret; } static CLOSURE_CALLBACK(bch2_journal_read_device) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index b05c5f1fe09d..c05983fe6815 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -612,7 +612,6 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) { - struct bch_csum csum; size_t bytes; int ret; reread: @@ -653,17 +652,16 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf goto reread; } - if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) { + enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); + if (csum_type >= BCH_CSUM_NR) { prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); return -BCH_ERR_invalid_sb_csum_type; } /* XXX: verify MACs */ - csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb), - null_nonce(), sb->sb); - + struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb); if (bch2_crc_cmp(csum, sb->sb->csum)) { - prt_printf(err, "bad checksum"); + bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum); return -BCH_ERR_invalid_sb_csum; } From d04d2727438575e1884ccd4a2eff34f5ee5acedd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 15:14:50 -0500 Subject: [PATCH 222/226] bcachefs: bch2_dump_bset() doesn't choke on u64s == 0 Signed-off-by: Kent Overstreet --- fs/bcachefs/bset.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index bb73ba9017b0..74bf8eb90a4c 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -68,6 +68,12 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, _k = _n) { _n = bkey_p_next(_k); + if (!_k->u64s) { + printk(KERN_ERR "block %u key %5zu - u64s 0? aieee!\n", set, + _k->_data - i->_data); + break; + } + k = bkey_disassemble(b, _k, &uk); printbuf_reset(&buf); From 72e2c920e4dcf73d37e513d404fa561c7b6f5ebc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 18:23:44 -0500 Subject: [PATCH 223/226] bcachefs: Restart recovery passes more reliably Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index e1f0da6a717e..725214605a05 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -696,8 +696,11 @@ static int bch2_run_recovery_passes(struct bch_fs *c) while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { if (should_run_recovery_pass(c, c->curr_recovery_pass)) { + unsigned pass = c->curr_recovery_pass; + ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) + if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || + (ret && c->curr_recovery_pass < pass)) continue; if (ret) break; From e7999235e6c437b99fad03d8301a4341be0d2bb5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 19:04:42 -0500 Subject: [PATCH 224/226] bcachefs: fix simulateously upgrading & downgrading Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index c05983fe6815..55926b81eede 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1088,13 +1088,22 @@ bool bch2_check_version_downgrade(struct bch_fs *c) /* * Downgrade, if superblock is at a higher version than currently * supported: + * + * c->sb will be checked before we write the superblock, so update it as + * well: */ - if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) { SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); - if (c->sb.version > bcachefs_metadata_version_current) + c->sb.version_upgrade_complete = bcachefs_metadata_version_current; + } + if (c->sb.version > bcachefs_metadata_version_current) { c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); - if (c->sb.version_min > bcachefs_metadata_version_current) + c->sb.version = bcachefs_metadata_version_current; + } + if (c->sb.version_min > bcachefs_metadata_version_current) { c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); + c->sb.version_min = bcachefs_metadata_version_current; + } c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); return ret; } From f5d4481c3edddf759e21e18751c168de879e3471 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Dec 2023 02:17:18 -0500 Subject: [PATCH 225/226] bcachefs: move "ptrs not changing" optimization to bch2_trigger_extent() This is useful for btree ptrs as well, when we're just updating sectors_written. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 12 ++++++++++++ fs/bcachefs/reflink.c | 8 -------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 67b7e79648b1..d83ea0e53df3 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1023,6 +1023,18 @@ int bch2_trigger_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, unsigned flags) { + struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); + struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); + unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; + unsigned old_ptrs_bytes = (void *) old_ptrs.end - (void *) old_ptrs.start; + + /* if pointers aren't changing - nothing to do: */ + if (new_ptrs_bytes == old_ptrs_bytes && + !memcmp(new_ptrs.start, + old_ptrs.start, + new_ptrs_bytes)) + return 0; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { struct bch_fs *c = trans->c; int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index b24b71bc4e60..faa5d3670058 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -301,14 +301,6 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, (flags & BTREE_TRIGGER_INSERT)) check_indirect_extent_deleting(new, &flags); - if (old.k->type == KEY_TYPE_reflink_v && - new.k->type == KEY_TYPE_reflink_v && - old.k->u64s == new.k->u64s && - !memcmp(bkey_s_c_to_reflink_v(old).v->start, - bkey_s_to_reflink_v(new).v->start, - bkey_val_bytes(new.k) - 8)) - return 0; - return bch2_trigger_extent(trans, btree_id, level, old, new, flags); } From 169de41985f53320580f3d347534966ea83343ca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 20:02:33 -0500 Subject: [PATCH 226/226] bcachefs: eytzinger0_find() search should be const Signed-off-by: Kent Overstreet --- fs/bcachefs/eytzinger.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index 05429c9631cd..9637f636e32d 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -261,11 +261,11 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, #define eytzinger0_find(base, nr, size, _cmp, search) \ ({ \ - void *_base = (base); \ - void *_search = (search); \ - size_t _nr = (nr); \ - size_t _size = (size); \ - size_t _i = 0; \ + void *_base = (base); \ + const void *_search = (search); \ + size_t _nr = (nr); \ + size_t _size = (size); \ + size_t _i = 0; \ int _res; \ \ while (_i < _nr && \