From 822835ffeae411bbc8af104da9331fdf63a7bc12 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 1 Apr 2022 01:29:59 -0400 Subject: [PATCH] bcachefs: Fold bucket_state in to BCH_DATA_TYPES() Previously, we were missing accounting for buckets in need_gc_gens and need_discard states. This matters because buckets in those states need other btree operations done before they can be used, so they can't be conuted when checking current number of free buckets against the allocation watermark. Also, we weren't directly counting free buckets at all. Now, data type 0 == BCH_DATA_free, and free buckets are counted; this means we can get rid of the separate (poorly defined) count of unavailable buckets. This is a new on disk format version, with upgrade and fsck required for the accounting changes. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 143 +++++++++++++++++++++------------ fs/bcachefs/alloc_background.h | 52 ++++++------ fs/bcachefs/alloc_foreground.c | 45 +++++++---- fs/bcachefs/alloc_types.h | 6 +- fs/bcachefs/bcachefs_format.h | 35 +++++++- fs/bcachefs/bcachefs_ioctl.h | 11 +-- fs/bcachefs/btree_gc.c | 32 +++++++- fs/bcachefs/btree_gc.h | 6 ++ fs/bcachefs/buckets.c | 90 +++++++++------------ fs/bcachefs/buckets.h | 23 +++--- fs/bcachefs/buckets_types.h | 1 - fs/bcachefs/chardev.c | 9 +-- fs/bcachefs/journal_io.c | 4 +- fs/bcachefs/lru.c | 2 +- fs/bcachefs/movinggc.c | 11 ++- fs/bcachefs/recovery.c | 15 ++-- fs/bcachefs/super-io.c | 1 - fs/bcachefs/super.c | 2 + fs/bcachefs/sysfs.c | 12 ++- 19 files changed, 298 insertions(+), 202 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 090fdee58157..3feaac33aaff 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -35,15 +35,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { #undef x }; -const char * const bch2_bucket_states[] = { - "free", - "need gc gens", - "need discard", - "cached", - "dirty", - NULL -}; - struct bkey_alloc_unpacked { u64 journal_seq; u64 bucket; @@ -355,19 +346,54 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, } if (rw == WRITE) { - if (a.v->cached_sectors && - !a.v->dirty_sectors && - !a.v->io_time[READ]) { - pr_buf(err, "cached bucket with read_time == 0"); + if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) { + pr_buf(err, "invalid data type (got %u should be %u)", + a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); return -EINVAL; } - if (!a.v->dirty_sectors && - !a.v->cached_sectors && - !a.v->stripe && - a.v->data_type) { - pr_buf(err, "empty, but data_type nonzero"); - return -EINVAL; + switch (a.v->data_type) { + case BCH_DATA_free: + case BCH_DATA_need_gc_gens: + case BCH_DATA_need_discard: + if (a.v->dirty_sectors || + a.v->cached_sectors || + a.v->stripe) { + pr_buf(err, "empty data type free but have data"); + return -EINVAL; + } + break; + case BCH_DATA_sb: + case BCH_DATA_journal: + case BCH_DATA_btree: + case BCH_DATA_user: + case BCH_DATA_parity: + if (!a.v->dirty_sectors) { + pr_buf(err, "data_type %s but dirty_sectors==0", + bch2_data_types[a.v->data_type]); + return -EINVAL; + } + break; + case BCH_DATA_cached: + if (!a.v->cached_sectors || + a.v->dirty_sectors || + a.v->stripe) { + pr_buf(err, "data type inconsistency"); + return -EINVAL; + } + + if (!a.v->io_time[READ]) { + pr_buf(err, "cached bucket with read_time == 0"); + return -EINVAL; + } + break; + case BCH_DATA_stripe: + if (!a.v->stripe) { + pr_buf(err, "data_type %s but stripe==0", + bch2_data_types[a.v->data_type]); + return -EINVAL; + } + break; } } @@ -394,9 +420,11 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c bch2_alloc_to_v4(k, &a); - pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu", + pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu", a.gen, a.oldest_gen, bch2_data_types[a.data_type], - a.journal_seq, BCH_ALLOC_V4_NEED_DISCARD(&a)); + a.journal_seq, + BCH_ALLOC_V4_NEED_DISCARD(&a), + BCH_ALLOC_V4_NEED_INC_GEN(&a)); pr_buf(out, " dirty_sectors %u", a.dirty_sectors); pr_buf(out, " cached_sectors %u", a.cached_sectors); pr_buf(out, " stripe %u", a.stripe); @@ -437,7 +465,7 @@ int bch2_alloc_read(struct bch_fs *c) static int bch2_bucket_do_index(struct btree_trans *trans, struct bkey_s_c alloc_k, - struct bch_alloc_v4 a, + const struct bch_alloc_v4 *a, bool set) { struct bch_fs *c = trans->c; @@ -445,15 +473,14 @@ static int bch2_bucket_do_index(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c old; struct bkey_i *k; - enum bucket_state state = bucket_state(a); enum btree_id btree; enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted; struct printbuf buf = PRINTBUF; int ret; - if (state != BUCKET_free && - state != BUCKET_need_discard) + if (a->data_type != BCH_DATA_free && + a->data_type != BCH_DATA_need_discard) return 0; k = bch2_trans_kmalloc(trans, sizeof(*k)); @@ -463,13 +490,13 @@ static int bch2_bucket_do_index(struct btree_trans *trans, bkey_init(&k->k); k->k.type = new_type; - switch (state) { - case BUCKET_free: + switch (a->data_type) { + case BCH_DATA_free: btree = BTREE_ID_freespace; - k->k.p = alloc_freespace_pos(alloc_k.k->p, a); + k->k.p = alloc_freespace_pos(alloc_k.k->p, *a); bch2_key_resize(&k->k, 1); break; - case BUCKET_need_discard: + case BCH_DATA_need_discard: btree = BTREE_ID_need_discard; k->k.p = alloc_k.k->p; break; @@ -523,6 +550,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, bch2_alloc_to_v4(old, &old_a); new_a = &bkey_i_to_alloc_v4(new)->v; + new_a->data_type = alloc_data_type(*new_a, new_a->data_type); + if (new_a->dirty_sectors > old_a.dirty_sectors || new_a->cached_sectors > old_a.cached_sectors) { new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); @@ -531,18 +560,18 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); } - if (old_a.data_type && !new_a->data_type && - old_a.gen == new_a->gen && + if (data_type_is_empty(new_a->data_type) && + BCH_ALLOC_V4_NEED_INC_GEN(new_a) && !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) { new_a->gen++; SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); } - if (bucket_state(old_a) != bucket_state(*new_a) || - (bucket_state(*new_a) == BUCKET_free && + if (old_a.data_type != new_a->data_type || + (new_a->data_type == BCH_DATA_free && alloc_freespace_genbits(old_a) != alloc_freespace_genbits(*new_a))) { - ret = bch2_bucket_do_index(trans, old, old_a, false) ?: - bch2_bucket_do_index(trans, bkey_i_to_s_c(new), *new_a, true); + ret = bch2_bucket_do_index(trans, old, &old_a, false) ?: + bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true); if (ret) return ret; } @@ -594,9 +623,9 @@ static int bch2_check_alloc_key(struct btree_trans *trans, bch2_alloc_to_v4(alloc_k, &a); - discard_key_type = bucket_state(a) == BUCKET_need_discard + discard_key_type = a.data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0; - freespace_key_type = bucket_state(a) == BUCKET_free + freespace_key_type = a.data_type == BCH_DATA_free ? KEY_TYPE_set : 0; bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, @@ -678,9 +707,9 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, u64 genbits; struct bpos pos; struct bkey_i *update; - enum bucket_state state = iter->btree_id == BTREE_ID_need_discard - ? BUCKET_need_discard - : BUCKET_free; + enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard + ? BCH_DATA_need_discard + : BCH_DATA_free; struct printbuf buf = PRINTBUF; int ret; @@ -711,13 +740,13 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans, bch2_alloc_to_v4(k, &a); - if (fsck_err_on(bucket_state(a) != state || - (state == BUCKET_free && + if (fsck_err_on(a.data_type != state || + (state == BCH_DATA_free && genbits != alloc_freespace_genbits(a)), c, "%s\n incorrectly set in %s index (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, k), buf.buf), bch2_btree_ids[iter->btree_id], - bucket_state(a) == state, + a.data_type == state, genbits >> 56, alloc_freespace_genbits(a) >> 56)) goto delete; out: @@ -818,7 +847,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, bch2_alloc_to_v4(alloc_k, &a); - if (bucket_state(a) != BUCKET_cached) + if (a.data_type != BCH_DATA_cached) return 0; bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, @@ -928,10 +957,19 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, goto write; } - BUG_ON(a->v.journal_seq > c->journal.flushed_seq_ondisk); + if (bch2_fs_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, c, + "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" + "%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = -EIO; + goto out; + } - if (bch2_fs_inconsistent_on(!BCH_ALLOC_V4_NEED_DISCARD(&a->v), c, - "%s\n incorrectly set in need_discard btree", + if (bch2_fs_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, c, + "bucket incorrectly set in need_discard btree\n" + "%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = -EIO; goto out; @@ -955,6 +993,7 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); + a->v.data_type = alloc_data_type(a->v, a->v.data_type); write: ret = bch2_trans_update(trans, &iter, &a->k_i, 0); out: @@ -1101,12 +1140,16 @@ static void bch2_do_invalidates_work(struct work_struct *work) bch2_trans_init(&trans, c, 0, 0); - for_each_member_device(ca, c, i) - while (!ret && should_invalidate_buckets(ca)) + for_each_member_device(ca, c, i) { + s64 nr_to_invalidate = + should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + + while (!ret && nr_to_invalidate-- >= 0) ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOFAIL, invalidate_one_bucket(&trans, ca)); + } bch2_trans_exit(&trans); percpu_ref_put(&c->writes); @@ -1139,7 +1182,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) bch2_alloc_to_v4(k, &a); ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, - bch2_bucket_do_index(&trans, k, a, true)); + bch2_bucket_do_index(&trans, k, &a, true)); if (ret) break; } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 11e0bca3e7f2..2bc622b305c2 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -28,32 +28,35 @@ static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) return a.gen - a.oldest_gen; } -enum bucket_state { - BUCKET_free, - BUCKET_need_gc_gens, - BUCKET_need_discard, - BUCKET_cached, - BUCKET_dirty, -}; - -extern const char * const bch2_bucket_states[]; - -static inline enum bucket_state bucket_state(struct bch_alloc_v4 a) +static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors, + u32 cached_sectors, + u32 stripe, + struct bch_alloc_v4 a, + enum bch_data_type data_type) { - if (a.dirty_sectors || a.stripe) - return BUCKET_dirty; - if (a.cached_sectors) - return BUCKET_cached; + if (dirty_sectors) + return data_type; + if (stripe) + return BCH_DATA_stripe; + if (cached_sectors) + return BCH_DATA_cached; if (BCH_ALLOC_V4_NEED_DISCARD(&a)) - return BUCKET_need_discard; + return BCH_DATA_need_discard; if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX) - return BUCKET_need_gc_gens; - return BUCKET_free; + return BCH_DATA_need_gc_gens; + return BCH_DATA_free; +} + +static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a, + enum bch_data_type data_type) +{ + return __alloc_data_type(a.dirty_sectors, a.cached_sectors, + a.stripe, a, data_type); } static inline u64 alloc_lru_idx(struct bch_alloc_v4 a) { - return bucket_state(a) == BUCKET_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; } static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) @@ -128,13 +131,14 @@ int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_do_discards(struct bch_fs *); -static inline bool should_invalidate_buckets(struct bch_dev *ca) +static inline u64 should_invalidate_buckets(struct bch_dev *ca, + struct bch_dev_usage u) { - struct bch_dev_usage u = bch2_dev_usage_read(ca); + u64 free = u.d[BCH_DATA_free].buckets + + u.d[BCH_DATA_need_discard].buckets; - return u.d[BCH_DATA_cached].buckets && - u.buckets_unavailable + u.d[BCH_DATA_cached].buckets < - ca->mi.nbuckets >> 7; + return clamp_t(s64, (ca->mi.nbuckets >> 7) - free, + 0, u.d[BCH_DATA_cached].buckets); } void bch2_do_invalidates(struct bch_fs *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 01abcf43341f..14162dd4d696 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -331,7 +331,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc } - if (a.data_type != BUCKET_free) { + if (a.data_type != BCH_DATA_free) { pr_buf(&buf, "non free bucket in freespace btree\n" " freespace key "); bch2_bkey_val_to_text(&buf, c, freespace_k); @@ -417,7 +417,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans, bch2_alloc_to_v4(k, &a); - if (bucket_state(a) != BUCKET_free) + if (a.data_type != BCH_DATA_free) continue; (*buckets_seen)++; @@ -517,27 +517,31 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; - u64 avail = dev_buckets_available(ca, reserve); + struct bch_dev_usage usage; + u64 avail; u64 buckets_seen = 0; u64 skipped_open = 0; u64 skipped_need_journal_commit = 0; u64 skipped_nouse = 0; - - if (may_alloc_partial) { - ob = try_alloc_partial_bucket(c, ca, reserve); - if (ob) - return ob; - } + bool waiting = false; again: + usage = bch2_dev_usage_read(ca); + avail = __dev_buckets_available(ca, usage,reserve); + + if (usage.d[BCH_DATA_need_discard].buckets > avail) + bch2_do_discards(c); + + if (usage.d[BCH_DATA_need_gc_gens].buckets > avail) + bch2_do_gc_gens(c); + + if (should_invalidate_buckets(ca, usage)) + bch2_do_invalidates(c); + if (!avail) { - if (cl) { + if (cl && !waiting) { closure_wait(&c->freelist_wait, cl); - /* recheck after putting ourself on waitlist */ - avail = dev_buckets_available(ca, reserve); - if (avail) { - closure_wake_up(&c->freelist_wait); - goto again; - } + waiting = true; + goto again; } if (!c->blocked_allocate) @@ -547,6 +551,15 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, goto err; } + if (waiting) + closure_wake_up(&c->freelist_wait); + + if (may_alloc_partial) { + ob = try_alloc_partial_bucket(c, ca, reserve); + if (ob) + return ob; + } + ob = likely(ca->mi.freespace_initialized) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &buckets_seen, diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index b3bef7074511..5eed5ce67c57 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -43,14 +43,14 @@ struct open_bucket { * the block in the stripe this open_bucket corresponds to: */ u8 ec_idx; - enum bch_data_type data_type:3; + enum bch_data_type data_type:8; unsigned valid:1; unsigned on_partial_list:1; - int alloc_reserve:3; + unsigned alloc_reserve:3; - unsigned sectors_free; u8 dev; u8 gen; + u32 sectors_free; u64 bucket; struct ec_stripe_new *ec; }; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5faa42baeeba..a84a8e088953 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1222,13 +1222,16 @@ LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); /* BCH_SB_FIELD_replicas: */ #define BCH_DATA_TYPES() \ - x(none, 0) \ + x(free, 0) \ x(sb, 1) \ x(journal, 2) \ x(btree, 3) \ x(user, 4) \ x(cached, 5) \ - x(parity, 6) + x(parity, 6) \ + x(stripe, 7) \ + x(need_gc_gens, 8) \ + x(need_discard, 9) enum bch_data_type { #define x(t, n) BCH_DATA_##t, @@ -1237,6 +1240,29 @@ enum bch_data_type { BCH_DATA_NR }; +static inline bool data_type_is_empty(enum bch_data_type type) +{ + switch (type) { + case BCH_DATA_free: + case BCH_DATA_need_gc_gens: + case BCH_DATA_need_discard: + return true; + default: + return false; + } +} + +static inline bool data_type_is_hidden(enum bch_data_type type) +{ + switch (type) { + case BCH_DATA_sb: + case BCH_DATA_journal: + return true; + default: + return false; + } +} + struct bch_replicas_entry_v0 { __u8 data_type; __u8 nr_devs; @@ -1364,7 +1390,8 @@ struct bch_sb_field_journal_seq_blacklist { x(subvol_dirent, 17) \ x(inode_v2, 18) \ x(freespace, 19) \ - x(alloc_v4, 20) + x(alloc_v4, 20) \ + x(new_data_types, 21) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1822,7 +1849,7 @@ struct jset_entry_dev_usage { __u32 pad; __le64 buckets_ec; - __le64 buckets_unavailable; + __le64 _buckets_unavailable; /* No longer used */ struct jset_entry_dev_usage_type d[]; } __attribute__((packed)); diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 66ab3aea9767..5e0062c6ec5c 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -285,13 +285,14 @@ struct bch_ioctl_dev_usage { __u32 bucket_size; __u64 nr_buckets; - __u64 available_buckets; - __u64 buckets[BCH_DATA_NR]; - __u64 sectors[BCH_DATA_NR]; + __u64 buckets_ec; - __u64 ec_buckets; - __u64 ec_sectors; + struct bch_ioctl_dev_usage_type { + __u64 buckets; + __u64 sectors; + __u64 fragmented; + } d[BCH_DATA_NR]; }; /* diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 14b772cd8fe5..0b1717120cc3 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1216,7 +1216,6 @@ static int bch2_gc_done(struct bch_fs *c, dev_usage_u64s()); copy_dev_field(buckets_ec, "buckets_ec"); - copy_dev_field(buckets_unavailable, "buckets_unavailable"); for (i = 0; i < BCH_DATA_NR; i++) { copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]); @@ -1301,6 +1300,9 @@ static int bch2_gc_start(struct bch_fs *c, percpu_ref_put(&ca->ref); return -ENOMEM; } + + this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets, + ca->mi.nbuckets - ca->mi.first_bucket); } return 0; @@ -1325,10 +1327,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); - struct bucket gc; + struct bucket gc, *b; struct bkey_s_c k; struct bkey_i_alloc_v4 *a; struct bch_alloc_v4 old, new; + enum bch_data_type type; int ret; k = bch2_btree_iter_peek_slot(iter); @@ -1340,7 +1343,29 @@ static int bch2_alloc_write_key(struct btree_trans *trans, new = old; percpu_down_read(&c->mark_lock); - gc = *gc_bucket(ca, iter->pos.offset); + b = gc_bucket(ca, iter->pos.offset); + + /* + * b->data_type doesn't yet include need_discard & need_gc_gen states - + * fix that here: + */ + type = __alloc_data_type(b->dirty_sectors, + b->cached_sectors, + b->stripe, + old, + b->data_type); + if (b->data_type != type) { + struct bch_dev_usage *u; + + preempt_disable(); + u = this_cpu_ptr(ca->usage_gc); + u->d[b->data_type].buckets--; + b->data_type = type; + u->d[b->data_type].buckets++; + preempt_enable(); + } + + gc = *b; percpu_up_read(&c->mark_lock); if (metadata_only && @@ -1926,6 +1951,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i return ret; a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; + a_mut->v.data_type = alloc_data_type(a_mut->v, a_mut->v.data_type); return bch2_trans_update(trans, iter, &a_mut->k_i, 0); } diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index 0665f5941fcc..8de54005e4ea 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -102,4 +102,10 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos) return ret; } +static inline void bch2_do_gc_gens(struct bch_fs *c) +{ + atomic_inc(&c->kick_gc); + wake_up_process(c->gc_thread); +} + #endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 31720093de45..7fa76e737aa7 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -283,9 +283,9 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -static inline int is_unavailable_bucket(struct bch_alloc_v4 a) +void bch2_dev_usage_init(struct bch_dev *ca) { - return a.dirty_sectors || a.stripe; + ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; } static inline int bucket_sectors_fragmented(struct bch_dev *ca, @@ -296,24 +296,6 @@ static inline int bucket_sectors_fragmented(struct bch_dev *ca, : 0; } -static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a) -{ - return a.cached_sectors && !a.dirty_sectors - ? BCH_DATA_cached - : a.data_type; -} - -static inline void account_bucket(struct bch_fs_usage *fs_usage, - struct bch_dev_usage *dev_usage, - enum bch_data_type type, - int nr, s64 size) -{ - if (type == BCH_DATA_sb || type == BCH_DATA_journal) - fs_usage->hidden += size; - - dev_usage->d[type].buckets += nr; -} - static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 old, struct bch_alloc_v4 new, @@ -324,23 +306,25 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, preempt_disable(); fs_usage = fs_usage_ptr(c, journal_seq, gc); + + if (data_type_is_hidden(old.data_type)) + fs_usage->hidden -= ca->mi.bucket_size; + if (data_type_is_hidden(new.data_type)) + fs_usage->hidden += ca->mi.bucket_size; + u = dev_usage_ptr(ca, journal_seq, gc); - if (bucket_type(old)) - account_bucket(fs_usage, u, bucket_type(old), - -1, -ca->mi.bucket_size); + u->d[old.data_type].buckets--; + u->d[new.data_type].buckets++; - if (bucket_type(new)) - account_bucket(fs_usage, u, bucket_type(new), - 1, ca->mi.bucket_size); - - u->buckets_unavailable += - is_unavailable_bucket(new) - is_unavailable_bucket(old); + u->buckets_ec -= (int) !!old.stripe; + u->buckets_ec += (int) !!new.stripe; u->d[old.data_type].sectors -= old.dirty_sectors; u->d[new.data_type].sectors += new.dirty_sectors; - u->d[BCH_DATA_cached].sectors += - (int) new.cached_sectors - (int) old.cached_sectors; + + u->d[BCH_DATA_cached].sectors += new.cached_sectors; + u->d[BCH_DATA_cached].sectors -= old.cached_sectors; u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old); u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); @@ -531,7 +515,8 @@ int bch2_mark_alloc(struct btree_trans *trans, bch2_alloc_to_v4(new, &new_a); if ((flags & BTREE_TRIGGER_INSERT) && - !old_a.data_type != !new_a.data_type && + data_type_is_empty(old_a.data_type) != + data_type_is_empty(new_a.data_type) && new.k->type == KEY_TYPE_alloc_v4) { struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; @@ -542,14 +527,16 @@ int bch2_mark_alloc(struct btree_trans *trans, * before the bucket became empty again, then the we don't have * to wait on a journal flush before we can reuse the bucket: */ - new_a.journal_seq = !new_a.data_type && + new_a.journal_seq = data_type_is_empty(new_a.data_type) && (journal_seq == v->journal_seq || bch2_journal_noflush_seq(&c->journal, v->journal_seq)) ? 0 : journal_seq; v->journal_seq = new_a.journal_seq; } - if (old_a.data_type && !new_a.data_type && new_a.journal_seq) { + if (!data_type_is_empty(old_a.data_type) && + data_type_is_empty(new_a.data_type) && + new_a.journal_seq) { ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, c->journal.flushed_seq_ondisk, new.k->p.inode, new.k->p.offset, @@ -561,24 +548,21 @@ int bch2_mark_alloc(struct btree_trans *trans, } } - if (!new_a.data_type && + if (new_a.data_type == BCH_DATA_free && (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) closure_wake_up(&c->freelist_wait); - if ((flags & BTREE_TRIGGER_INSERT) && - BCH_ALLOC_V4_NEED_DISCARD(&new_a) && - !new_a.journal_seq) + if (new_a.data_type == BCH_DATA_need_discard && + (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) bch2_do_discards(c); - if (!old_a.data_type && - new_a.data_type && - should_invalidate_buckets(ca)) + if (old_a.data_type != BCH_DATA_cached && + new_a.data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) bch2_do_invalidates(c); - if (bucket_state(new_a) == BUCKET_need_gc_gens) { - atomic_inc(&c->kick_gc); - wake_up_process(c->gc_thread); - } + if (new_a.data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); percpu_down_read(&c->mark_lock); if (!gc && new_a.gen != old_a.gen) @@ -704,6 +688,9 @@ static int check_bucket_ref(struct bch_fs *c, struct printbuf buf = PRINTBUF; int ret = 0; + if (bucket_data_type == BCH_DATA_cached) + bucket_data_type = BCH_DATA_user; + if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" @@ -748,7 +735,8 @@ static int check_bucket_ref(struct bch_fs *c, goto err; } - if (bucket_data_type && ptr_data_type && + if (!data_type_is_empty(bucket_data_type) && + ptr_data_type && bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" @@ -1401,14 +1389,8 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, a->v.gen, &a->v.data_type, - &a->v.dirty_sectors, &a->v.cached_sectors); - if (ret) - goto out; - - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto out; -out: + &a->v.dirty_sectors, &a->v.cached_sectors) ?: + bch2_trans_update(trans, &iter, &a->k_i, 0); bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 053b6dc215b3..518f5104a2f7 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -121,12 +121,10 @@ static inline u8 ptr_stale(struct bch_dev *ca, /* Device usage: */ struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *); +void bch2_dev_usage_init(struct bch_dev *); -static inline u64 __dev_buckets_available(struct bch_dev *ca, - struct bch_dev_usage stats, - enum alloc_reserve reserve) +static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve) { - s64 total = ca->mi.nbuckets - ca->mi.first_bucket; s64 reserved = 0; switch (reserve) { @@ -141,20 +139,19 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca, fallthrough; case RESERVE_btree_movinggc: break; - default: - BUG(); } - if (WARN_ONCE(stats.buckets_unavailable > total, - "buckets_unavailable overflow (%llu > %llu)\n", - stats.buckets_unavailable, total)) - return 0; + return reserved; +} +static inline u64 __dev_buckets_available(struct bch_dev *ca, + struct bch_dev_usage usage, + enum alloc_reserve reserve) +{ return max_t(s64, 0, - total - - stats.buckets_unavailable - + usage.d[BCH_DATA_free].buckets - ca->nr_open_buckets - - reserved); + bch2_dev_buckets_reserved(ca, reserve)); } static inline u64 dev_buckets_available(struct bch_dev *ca, diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index e79a33795bf9..0a9dd5af3524 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -34,7 +34,6 @@ struct bucket_gens { struct bch_dev_usage { u64 buckets_ec; - u64 buckets_unavailable; struct { u64 buckets; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 6cd0a2739ce5..7b448b9551b6 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -501,13 +501,12 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - arg.available_buckets = arg.nr_buckets - src.buckets_unavailable; - arg.ec_buckets = src.buckets_ec; - arg.ec_sectors = 0; + arg.buckets_ec = src.buckets_ec; for (i = 0; i < BCH_DATA_NR; i++) { - arg.buckets[i] = src.d[i].buckets; - arg.sectors[i] = src.d[i].sectors; + arg.d[i].buckets = src.d[i].buckets; + arg.d[i].sectors = src.d[i].sectors; + arg.d[i].fragmented = src.d[i].fragmented; } percpu_ref_put(&ca->ref); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 5ea685fd15e7..fad142196daa 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -585,9 +585,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].fragmented)); } - pr_buf(out, " buckets_ec: %llu buckets_unavailable: %llu", - le64_to_cpu(u->buckets_ec), - le64_to_cpu(u->buckets_unavailable)); + pr_buf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec)); } static int journal_entry_log_validate(struct bch_fs *c, diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index c6f433153286..267f2f8fb13b 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -155,7 +155,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, bch2_alloc_to_v4(k, &a); - if (fsck_err_on(bucket_state(a) != BUCKET_cached || + if (fsck_err_on(a.data_type != BCH_DATA_cached || a.io_time[READ] != lru_k.k->p.offset, c, "incorrect lru entry %s\n" " for %s", diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index cd7a9d81dfe8..6209cb51efcb 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -235,8 +235,15 @@ static int bch2_copygc(struct bch_fs *c) } for_each_rw_member(ca, c, dev_idx) { - s64 avail = min(dev_buckets_available(ca, RESERVE_movinggc), - ca->mi.nbuckets >> 6); + struct bch_dev_usage usage = bch2_dev_usage_read(ca); + + u64 avail = max_t(s64, 0, + usage.d[BCH_DATA_free].buckets + + usage.d[BCH_DATA_need_discard].buckets - + ca->nr_open_buckets - + bch2_dev_buckets_reserved(ca, RESERVE_movinggc)); + + avail = min(avail, ca->mi.nbuckets >> 6); sectors_reserved += avail * ca->mi.bucket_size; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1fe3e81eaa3d..fd0c2a203619 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -713,7 +713,6 @@ static int journal_replay_entry_early(struct bch_fs *c, unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec); - ca->usage_base->buckets_unavailable = le64_to_cpu(u->buckets_unavailable); for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); @@ -1080,18 +1079,11 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->opts.nochanges) { - if (c->sb.version < bcachefs_metadata_version_inode_backpointers) { - bch_info(c, "version prior to inode backpointers, upgrade and fsck required"); + if (c->sb.version < bcachefs_metadata_version_new_data_types) { + bch_info(c, "version prior to new_data_types, upgrade and fsck required"); c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; - } else if (c->sb.version < bcachefs_metadata_version_subvol_dirent) { - bch_info(c, "filesystem version is prior to subvol_dirent - upgrading"); - c->opts.version_upgrade = true; - c->opts.fsck = true; - } else if (c->sb.version < bcachefs_metadata_version_alloc_v4) { - bch_info(c, "filesystem version is prior to alloc_v4 - upgrading"); - c->opts.version_upgrade = true; } } @@ -1436,6 +1428,9 @@ int bch2_fs_initialize(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); + for_each_online_member(ca, c, i) + bch2_dev_usage_init(ca); + err = "unable to allocate journal buckets"; for_each_online_member(ca, c, i) { ret = bch2_dev_journal_alloc(ca); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 7e885b51349e..c3c7043d7426 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1275,7 +1275,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.type = BCH_JSET_ENTRY_dev_usage; u->dev = cpu_to_le32(dev); u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec); - u->buckets_unavailable = cpu_to_le64(ca->usage_base->buckets_unavailable); for (i = 0; i < BCH_DATA_NR; i++) { u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 3183f49a488f..2c3d0546f2b6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1566,6 +1566,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err; } + bch2_dev_usage_init(ca); + ret = __bch2_dev_attach_bdev(ca, &sb); if (ret) { bch2_dev_free(ca); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 872d7bed7b6b..c0cc6e9a3e05 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -724,18 +724,17 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) nr[c->open_buckets[i].data_type]++; pr_buf(out, - "\t\t buckets\t sectors fragmented\n" - "capacity%16llu\n", + "\t\t\t buckets\t sectors fragmented\n" + "capacity\t%16llu\n", ca->mi.nbuckets - ca->mi.first_bucket); - for (i = 1; i < BCH_DATA_NR; i++) - pr_buf(out, "%-8s%16llu%16llu%16llu\n", + for (i = 0; i < BCH_DATA_NR; i++) + pr_buf(out, "%-16s%16llu%16llu%16llu\n", bch2_data_types[i], stats.d[i].buckets, stats.d[i].sectors, stats.d[i].fragmented); pr_buf(out, - "ec\t%16llu\n" - "available%15llu\n" + "ec\t\t%16llu\n" "\n" "freelist_wait\t\t%s\n" "open buckets allocated\t%u\n" @@ -746,7 +745,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) "open_buckets_user\t%u\n" "btree reserve cache\t%u\n", stats.buckets_ec, - __dev_buckets_available(ca, stats, RESERVE_none), c->freelist_wait.list.first ? "waiting" : "empty", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free, ca->nr_open_buckets,