mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-28 16:53:49 +00:00
More bcachefs updates for 6.7-rc1
- assorted prep work for disk space accounting rewrite - BTREE_TRIGGER_ATOMIC: after combining our trigger callbacks, this makes our trigger context more explicit - A few fixes to avoid excessive transaction restarts on multithreaded workloads: fstests (in addition to ktest tests) are now checking slowpath counters, and that's shaking out a few bugs - Assorted tracepoint improvements - Starting to break up bcachefs_format.h and move on disk types so they're with the code they belong to; this will make room to start documenting the on disk format better. - A few minor fixes -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmWtjOsACgkQE6szbY3K bnbyXRAAsx+yM81TFqsLzRRqf8oocRwf2dj5XzExz9Ig/lYQS5LIVROS2OxwDsAc DeaYQSTcph9dkOswCrNR96bBnEgmmZ1ClfVI6WRXvm6vs4rjhSMNbNaVyySrMUVn 5p/Lsn1/RKl0lWMYlHrdryo+106zRcr6z1Hiv9QCXkXhzdkV8wFYDkfbMveShUsu KobC29wvd2EfZr04nqsIXS/y/iRIXhtZqJmFCiAguN70UWrwUwArpELHI5Ve+WPZ 9VjgFXW6Ka3QxJs/20tX+t24DrC+eDXR44DzQmxwG5mPBBpXkcSk5UgRw/EUag5U 5+mDZQ5Ei3gvZvUwrilMosVy3pIw0IuvqeqwDGFoFXs1cce01QCMN+NG/dBTQw9i KGGxJw5sOrZ8fIiFnypk1M+r9NVtA8MjriLNR5bJjCWPSpWqzkT2HzxFXc6HmTZu vsE/AxwC1RLA6B2HZlDEqLOdHE3cofkDiIzWM5ABvb4p118iyk9hE6HhAufk5UdE HaG646kGB8pUY/sCxBIOD6K2pgthDFv+fftTM7X+uIazD3bovvPQCEInu48/KAHn /KmslSPO0txyjnRFMbXFJvd4Fgfo44GcBCeqGpy3B79aEJ3nroyRZ0qNnnsqj0Gl picUWjTn4W561Q1zBXuE/6cLWEp+sfaqYQcM8L3CCitRTVDPaCQ= =yd+F -----END PGP SIGNATURE----- Merge tag 'bcachefs-2024-01-21' of https://evilpiepirate.org/git/bcachefs Pull more bcachefs updates from Kent Overstreet: "Some fixes, Some refactoring, some minor features: - Assorted prep work for disk space accounting rewrite - BTREE_TRIGGER_ATOMIC: after combining our trigger callbacks, this makes our trigger context more explicit - A few fixes to avoid excessive transaction restarts on multithreaded workloads: fstests (in addition to ktest tests) are now checking slowpath counters, and that's shaking out a few bugs - Assorted tracepoint improvements - Starting to break up bcachefs_format.h and move on disk types so they're with the code they belong to; this will make room to start documenting the on disk format better. - A few minor fixes" * tag 'bcachefs-2024-01-21' of https://evilpiepirate.org/git/bcachefs: (46 commits) bcachefs: Improve inode_to_text() bcachefs: logged_ops_format.h bcachefs: reflink_format.h bcachefs; extents_format.h bcachefs: ec_format.h bcachefs: subvolume_format.h bcachefs: snapshot_format.h bcachefs: alloc_background_format.h bcachefs: xattr_format.h bcachefs: dirent_format.h bcachefs: inode_format.h bcachefs; quota_format.h bcachefs: sb-counters_format.h bcachefs: counters.c -> sb-counters.c bcachefs: comment bch_subvolume bcachefs: bch_snapshot::btime bcachefs: add missing __GFP_NOWARN bcachefs: opts->compression can now also be applied in the background bcachefs: Prep work for variable size btree node buffers bcachefs: grab s_umount only if snapshotting ...
This commit is contained in:
commit
35a4474b5c
@ -27,7 +27,6 @@ bcachefs-y := \
|
||||
checksum.o \
|
||||
clock.o \
|
||||
compress.o \
|
||||
counters.o \
|
||||
darray.o \
|
||||
debug.o \
|
||||
dirent.o \
|
||||
@ -71,6 +70,7 @@ bcachefs-y := \
|
||||
reflink.o \
|
||||
replicas.o \
|
||||
sb-clean.o \
|
||||
sb-counters.o \
|
||||
sb-downgrade.o \
|
||||
sb-errors.o \
|
||||
sb-members.o \
|
||||
|
@ -273,7 +273,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
|
||||
c, err, alloc_key_dirty_sectors_0,
|
||||
"data_type %s but dirty_sectors==0",
|
||||
bch2_data_types[a.v->data_type]);
|
||||
bch2_data_type_str(a.v->data_type));
|
||||
break;
|
||||
case BCH_DATA_cached:
|
||||
bkey_fsck_err_on(!a.v->cached_sectors ||
|
||||
@ -321,16 +321,12 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
{
|
||||
struct bch_alloc_v4 _a;
|
||||
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
|
||||
unsigned i;
|
||||
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "gen %u oldest_gen %u data_type %s",
|
||||
a->gen, a->oldest_gen,
|
||||
a->data_type < BCH_DATA_NR
|
||||
? bch2_data_types[a->data_type]
|
||||
: "(invalid data type)");
|
||||
prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
|
||||
bch2_prt_data_type(out, a->data_type);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "journal_seq %llu", a->journal_seq);
|
||||
prt_newline(out);
|
||||
@ -353,23 +349,6 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
prt_printf(out, "fragmentation %llu", a->fragmentation_lru);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
|
||||
prt_newline(out);
|
||||
|
||||
if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) {
|
||||
struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
|
||||
const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
|
||||
|
||||
prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) {
|
||||
prt_newline(out);
|
||||
bch2_backpointer_to_text(out, &bps[i]);
|
||||
}
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
@ -839,7 +818,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
|
||||
if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) {
|
||||
struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
|
||||
u64 journal_seq = trans->journal_res.seq;
|
||||
u64 bucket_journal_seq = new_a->journal_seq;
|
||||
@ -1625,13 +1604,36 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct discard_buckets_state {
|
||||
u64 seen;
|
||||
u64 open;
|
||||
u64 need_journal_commit;
|
||||
u64 discarded;
|
||||
struct bch_dev *ca;
|
||||
u64 need_journal_commit_this_dev;
|
||||
};
|
||||
|
||||
static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
|
||||
{
|
||||
if (s->ca == ca)
|
||||
return;
|
||||
|
||||
if (s->ca && s->need_journal_commit_this_dev >
|
||||
bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
if (s->ca)
|
||||
percpu_ref_put(&s->ca->ref);
|
||||
if (ca)
|
||||
percpu_ref_get(&ca->ref);
|
||||
s->ca = ca;
|
||||
s->need_journal_commit_this_dev = 0;
|
||||
}
|
||||
|
||||
static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
struct btree_iter *need_discard_iter,
|
||||
struct bpos *discard_pos_done,
|
||||
u64 *seen,
|
||||
u64 *open,
|
||||
u64 *need_journal_commit,
|
||||
u64 *discarded)
|
||||
struct discard_buckets_state *s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos pos = need_discard_iter->pos;
|
||||
@ -1643,20 +1645,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
int ret = 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, pos.inode);
|
||||
|
||||
if (!percpu_ref_tryget(&ca->io_ref)) {
|
||||
bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
|
||||
return 0;
|
||||
}
|
||||
|
||||
discard_buckets_next_dev(c, s, ca);
|
||||
|
||||
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
|
||||
(*open)++;
|
||||
s->open++;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk,
|
||||
pos.inode, pos.offset)) {
|
||||
(*need_journal_commit)++;
|
||||
s->need_journal_commit++;
|
||||
s->need_journal_commit_this_dev++;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1732,9 +1738,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
goto out;
|
||||
|
||||
count_event(c, bucket_discard);
|
||||
(*discarded)++;
|
||||
s->discarded++;
|
||||
out:
|
||||
(*seen)++;
|
||||
s->seen++;
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
printbuf_exit(&buf);
|
||||
@ -1744,7 +1750,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
static void bch2_do_discards_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
|
||||
u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
|
||||
struct discard_buckets_state s = {};
|
||||
struct bpos discard_pos_done = POS_MAX;
|
||||
int ret;
|
||||
|
||||
@ -1756,19 +1762,14 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter,
|
||||
BTREE_ID_need_discard, POS_MIN, 0, k,
|
||||
bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
|
||||
&seen,
|
||||
&open,
|
||||
&need_journal_commit,
|
||||
&discarded)));
|
||||
bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
|
||||
|
||||
if (need_journal_commit * 2 > seen)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
discard_buckets_next_dev(c, &s, NULL);
|
||||
|
||||
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
|
||||
bch2_err_str(ret));
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
|
||||
trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
|
||||
bch2_err_str(ret));
|
||||
}
|
||||
|
||||
void bch2_do_discards(struct bch_fs *c)
|
||||
|
92
fs/bcachefs/alloc_background_format.h
Normal file
92
fs/bcachefs/alloc_background_format.h
Normal file
@ -0,0 +1,92 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
|
||||
#define _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
|
||||
|
||||
struct bch_alloc {
|
||||
struct bch_val v;
|
||||
__u8 fields;
|
||||
__u8 gen;
|
||||
__u8 data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_ALLOC_FIELDS_V1() \
|
||||
x(read_time, 16) \
|
||||
x(write_time, 16) \
|
||||
x(data_type, 8) \
|
||||
x(dirty_sectors, 16) \
|
||||
x(cached_sectors, 16) \
|
||||
x(oldest_gen, 8) \
|
||||
x(stripe, 32) \
|
||||
x(stripe_redundancy, 8)
|
||||
|
||||
enum {
|
||||
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
|
||||
BCH_ALLOC_FIELDS_V1()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_alloc_v2 {
|
||||
struct bch_val v;
|
||||
__u8 nr_fields;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
__u8 data_type;
|
||||
__u8 data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_ALLOC_FIELDS_V2() \
|
||||
x(read_time, 64) \
|
||||
x(write_time, 64) \
|
||||
x(dirty_sectors, 32) \
|
||||
x(cached_sectors, 32) \
|
||||
x(stripe, 32) \
|
||||
x(stripe_redundancy, 8)
|
||||
|
||||
struct bch_alloc_v3 {
|
||||
struct bch_val v;
|
||||
__le64 journal_seq;
|
||||
__le32 flags;
|
||||
__u8 nr_fields;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
__u8 data_type;
|
||||
__u8 data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
struct bch_alloc_v4 {
|
||||
struct bch_val v;
|
||||
__u64 journal_seq;
|
||||
__u32 flags;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
__u8 data_type;
|
||||
__u8 stripe_redundancy;
|
||||
__u32 dirty_sectors;
|
||||
__u32 cached_sectors;
|
||||
__u64 io_time[2];
|
||||
__u32 stripe;
|
||||
__u32 nr_external_backpointers;
|
||||
__u64 fragmentation_lru;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_ALLOC_V4_U64s_V0 6
|
||||
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
|
||||
|
||||
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
|
||||
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
|
||||
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
|
||||
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
|
||||
|
||||
#define KEY_TYPE_BUCKET_GENS_BITS 8
|
||||
#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS)
|
||||
#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1)
|
||||
|
||||
struct bch_bucket_gens {
|
||||
struct bch_val v;
|
||||
u8 gens[KEY_TYPE_BUCKET_GENS_NR];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H */
|
@ -1525,10 +1525,11 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str
|
||||
unsigned data_type = ob->data_type;
|
||||
barrier(); /* READ_ONCE() doesn't work on bitfields */
|
||||
|
||||
prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
|
||||
prt_printf(out, "%zu ref %u ",
|
||||
ob - c->open_buckets,
|
||||
atomic_read(&ob->pin),
|
||||
data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
|
||||
atomic_read(&ob->pin));
|
||||
bch2_prt_data_type(out, data_type);
|
||||
prt_printf(out, " %u:%llu gen %u allocated %u/%u",
|
||||
ob->dev, ob->bucket, ob->gen,
|
||||
ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
|
||||
if (ob->ec)
|
||||
|
@ -400,13 +400,24 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
|
||||
{
|
||||
return bpos_eq(l.k->p, r.k->p) &&
|
||||
bkey_bytes(l.k) == bkey_bytes(r.k) &&
|
||||
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
|
||||
}
|
||||
|
||||
struct extents_to_bp_state {
|
||||
struct bpos bucket_start;
|
||||
struct bpos bucket_end;
|
||||
struct bkey_buf last_flushed;
|
||||
};
|
||||
|
||||
static int check_bp_exists(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end,
|
||||
struct bkey_buf *last_flushed)
|
||||
struct bkey_s_c orig_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter bp_iter = { NULL };
|
||||
@ -417,8 +428,8 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
if (bpos_lt(bucket, bucket_start) ||
|
||||
bpos_gt(bucket, bucket_end))
|
||||
if (bpos_lt(bucket, s->bucket_start) ||
|
||||
bpos_gt(bucket, s->bucket_end))
|
||||
return 0;
|
||||
|
||||
if (!bch2_dev_bucket_exists(c, bucket))
|
||||
@ -433,11 +444,9 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
|
||||
if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) ||
|
||||
bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) ||
|
||||
memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, orig_k);
|
||||
bch2_bkey_buf_reassemble(&tmp, c, orig_k);
|
||||
|
||||
if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
|
||||
if (bp.level) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
@ -447,7 +456,7 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
|
||||
bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
|
||||
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
goto out;
|
||||
}
|
||||
@ -475,10 +484,8 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end,
|
||||
struct bkey_buf *last_flushed,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -498,9 +505,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
bch2_extent_ptr_to_bp(c, btree, level,
|
||||
k, p, &bucket_pos, &bp);
|
||||
|
||||
ret = check_bp_exists(trans, bucket_pos, bp, k,
|
||||
bucket_start, bucket_end,
|
||||
last_flushed);
|
||||
ret = check_bp_exists(trans, s, bucket_pos, bp, k);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -509,10 +514,8 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static int check_btree_root_to_backpointers(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s,
|
||||
enum btree_id btree_id,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end,
|
||||
struct bkey_buf *last_flushed,
|
||||
int *level)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -536,9 +539,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
|
||||
*level = b->c.level;
|
||||
|
||||
k = bkey_i_to_s_c(&b->key);
|
||||
ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1,
|
||||
bucket_start, bucket_end,
|
||||
last_flushed, k);
|
||||
ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -559,7 +560,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
|
||||
|
||||
si_meminfo(&i);
|
||||
mem_bytes = i.totalram * i.mem_unit;
|
||||
return div_u64(mem_bytes >> 1, btree_bytes(c));
|
||||
return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
|
||||
}
|
||||
|
||||
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
@ -610,43 +611,35 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end)
|
||||
struct extents_to_bp_state *s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
enum btree_id btree_id;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf last_flushed;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
|
||||
for (enum btree_id btree_id = 0;
|
||||
btree_id < btree_id_nr_alive(c);
|
||||
btree_id++) {
|
||||
int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
check_btree_root_to_backpointers(trans, btree_id,
|
||||
bucket_start, bucket_end,
|
||||
&last_flushed, &level));
|
||||
check_btree_root_to_backpointers(trans, s, btree_id, &level));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (level >= depth) {
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
|
||||
level,
|
||||
BTREE_ITER_PREFETCH);
|
||||
while (1) {
|
||||
bch2_trans_begin(trans);
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
|
||||
struct bkey_s_c k = bch2_btree_iter_peek(&iter);
|
||||
if (!k.k)
|
||||
break;
|
||||
ret = bkey_err(k) ?:
|
||||
check_extent_to_backpointers(trans, btree_id, level,
|
||||
bucket_start, bucket_end,
|
||||
&last_flushed, k) ?:
|
||||
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
@ -668,7 +661,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -731,37 +723,43 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bpos start = POS_MIN, end;
|
||||
struct extents_to_bp_state s = { .bucket_start = POS_MIN };
|
||||
int ret;
|
||||
|
||||
bch2_bkey_buf_init(&s.last_flushed);
|
||||
bkey_init(&s.last_flushed.k->k);
|
||||
|
||||
while (1) {
|
||||
ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
|
||||
ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
|
||||
if ( bpos_eq(s.bucket_start, POS_MIN) &&
|
||||
!bpos_eq(s.bucket_end, SPOS_MAX))
|
||||
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
|
||||
__func__, btree_nodes_fit_in_ram(c));
|
||||
|
||||
if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
|
||||
if (!bpos_eq(s.bucket_start, POS_MIN) ||
|
||||
!bpos_eq(s.bucket_end, SPOS_MAX)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "check_extents_to_backpointers(): ");
|
||||
bch2_bpos_to_text(&buf, start);
|
||||
bch2_bpos_to_text(&buf, s.bucket_start);
|
||||
prt_str(&buf, "-");
|
||||
bch2_bpos_to_text(&buf, end);
|
||||
bch2_bpos_to_text(&buf, s.bucket_end);
|
||||
|
||||
bch_verbose(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
|
||||
if (ret || bpos_eq(end, SPOS_MAX))
|
||||
ret = bch2_check_extents_to_backpointers_pass(trans, &s);
|
||||
if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
|
||||
break;
|
||||
|
||||
start = bpos_successor(end);
|
||||
s.bucket_start = bpos_successor(s.bucket_end);
|
||||
}
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&s.last_flushed, c);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
|
@ -1204,11 +1204,6 @@ static inline unsigned block_sectors(const struct bch_fs *c)
|
||||
return c->opts.block_size >> 9;
|
||||
}
|
||||
|
||||
static inline size_t btree_sectors(const struct bch_fs *c)
|
||||
{
|
||||
return c->opts.btree_node_size >> 9;
|
||||
}
|
||||
|
||||
static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
|
||||
{
|
||||
return c->btree_key_cache_btrees & (1U << btree);
|
||||
|
@ -417,600 +417,12 @@ struct bch_set {
|
||||
struct bch_val v;
|
||||
};
|
||||
|
||||
/* Extents */
|
||||
|
||||
/*
|
||||
* In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
|
||||
* preceded by checksum/compression information (bch_extent_crc32 or
|
||||
* bch_extent_crc64).
|
||||
*
|
||||
* One major determining factor in the format of extents is how we handle and
|
||||
* represent extents that have been partially overwritten and thus trimmed:
|
||||
*
|
||||
* If an extent is not checksummed or compressed, when the extent is trimmed we
|
||||
* don't have to remember the extent we originally allocated and wrote: we can
|
||||
* merely adjust ptr->offset to point to the start of the data that is currently
|
||||
* live. The size field in struct bkey records the current (live) size of the
|
||||
* extent, and is also used to mean "size of region on disk that we point to" in
|
||||
* this case.
|
||||
*
|
||||
* Thus an extent that is not checksummed or compressed will consist only of a
|
||||
* list of bch_extent_ptrs, with none of the fields in
|
||||
* bch_extent_crc32/bch_extent_crc64.
|
||||
*
|
||||
* When an extent is checksummed or compressed, it's not possible to read only
|
||||
* the data that is currently live: we have to read the entire extent that was
|
||||
* originally written, and then return only the part of the extent that is
|
||||
* currently live.
|
||||
*
|
||||
* Thus, in addition to the current size of the extent in struct bkey, we need
|
||||
* to store the size of the originally allocated space - this is the
|
||||
* compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
|
||||
* when the extent is trimmed, instead of modifying the offset field of the
|
||||
* pointer, we keep a second smaller offset field - "offset into the original
|
||||
* extent of the currently live region".
|
||||
*
|
||||
* The other major determining factor is replication and data migration:
|
||||
*
|
||||
* Each pointer may have its own bch_extent_crc32/64. When doing a replicated
|
||||
* write, we will initially write all the replicas in the same format, with the
|
||||
* same checksum type and compression format - however, when copygc runs later (or
|
||||
* tiering/cache promotion, anything that moves data), it is not in general
|
||||
* going to rewrite all the pointers at once - one of the replicas may be in a
|
||||
* bucket on one device that has very little fragmentation while another lives
|
||||
* in a bucket that has become heavily fragmented, and thus is being rewritten
|
||||
* sooner than the rest.
|
||||
*
|
||||
* Thus it will only move a subset of the pointers (or in the case of
|
||||
* tiering/cache promotion perhaps add a single pointer without dropping any
|
||||
* current pointers), and if the extent has been partially overwritten it must
|
||||
* write only the currently live portion (or copygc would not be able to reduce
|
||||
* fragmentation!) - which necessitates a different bch_extent_crc format for
|
||||
* the new pointer.
|
||||
*
|
||||
* But in the interests of space efficiency, we don't want to store one
|
||||
* bch_extent_crc for each pointer if we don't have to.
|
||||
*
|
||||
* Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
|
||||
* bch_extent_ptrs appended arbitrarily one after the other. We determine the
|
||||
* type of a given entry with a scheme similar to utf8 (except we're encoding a
|
||||
* type, not a size), encoding the type in the position of the first set bit:
|
||||
*
|
||||
* bch_extent_crc32 - 0b1
|
||||
* bch_extent_ptr - 0b10
|
||||
* bch_extent_crc64 - 0b100
|
||||
*
|
||||
* We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
|
||||
* bch_extent_crc64 is the least constrained).
|
||||
*
|
||||
* Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
|
||||
* until the next bch_extent_crc32/64.
|
||||
*
|
||||
* If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
|
||||
* is neither checksummed nor compressed.
|
||||
*/
|
||||
|
||||
/* 128 bits, sufficient for cryptographic MACs: */
|
||||
struct bch_csum {
|
||||
__le64 lo;
|
||||
__le64 hi;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_EXTENT_ENTRY_TYPES() \
|
||||
x(ptr, 0) \
|
||||
x(crc32, 1) \
|
||||
x(crc64, 2) \
|
||||
x(crc128, 3) \
|
||||
x(stripe_ptr, 4) \
|
||||
x(rebalance, 5)
|
||||
#define BCH_EXTENT_ENTRY_MAX 6
|
||||
|
||||
enum bch_extent_entry_type {
|
||||
#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
|
||||
BCH_EXTENT_ENTRY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
/* Compressed/uncompressed size are stored biased by 1: */
|
||||
struct bch_extent_crc32 {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u32 type:2,
|
||||
_compressed_size:7,
|
||||
_uncompressed_size:7,
|
||||
offset:7,
|
||||
_unused:1,
|
||||
csum_type:4,
|
||||
compression_type:4;
|
||||
__u32 csum;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u32 csum;
|
||||
__u32 compression_type:4,
|
||||
csum_type:4,
|
||||
_unused:1,
|
||||
offset:7,
|
||||
_uncompressed_size:7,
|
||||
_compressed_size:7,
|
||||
type:2;
|
||||
#endif
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CRC32_SIZE_MAX (1U << 7)
|
||||
#define CRC32_NONCE_MAX 0
|
||||
|
||||
struct bch_extent_crc64 {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:3,
|
||||
_compressed_size:9,
|
||||
_uncompressed_size:9,
|
||||
offset:9,
|
||||
nonce:10,
|
||||
csum_type:4,
|
||||
compression_type:4,
|
||||
csum_hi:16;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 csum_hi:16,
|
||||
compression_type:4,
|
||||
csum_type:4,
|
||||
nonce:10,
|
||||
offset:9,
|
||||
_uncompressed_size:9,
|
||||
_compressed_size:9,
|
||||
type:3;
|
||||
#endif
|
||||
__u64 csum_lo;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CRC64_SIZE_MAX (1U << 9)
|
||||
#define CRC64_NONCE_MAX ((1U << 10) - 1)
|
||||
|
||||
struct bch_extent_crc128 {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:4,
|
||||
_compressed_size:13,
|
||||
_uncompressed_size:13,
|
||||
offset:13,
|
||||
nonce:13,
|
||||
csum_type:4,
|
||||
compression_type:4;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 compression_type:4,
|
||||
csum_type:4,
|
||||
nonce:13,
|
||||
offset:13,
|
||||
_uncompressed_size:13,
|
||||
_compressed_size:13,
|
||||
type:4;
|
||||
#endif
|
||||
struct bch_csum csum;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CRC128_SIZE_MAX (1U << 13)
|
||||
#define CRC128_NONCE_MAX ((1U << 13) - 1)
|
||||
|
||||
/*
|
||||
* @reservation - pointer hasn't been written to, just reserved
|
||||
*/
|
||||
struct bch_extent_ptr {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:1,
|
||||
cached:1,
|
||||
unused:1,
|
||||
unwritten:1,
|
||||
offset:44, /* 8 petabytes */
|
||||
dev:8,
|
||||
gen:8;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 gen:8,
|
||||
dev:8,
|
||||
offset:44,
|
||||
unwritten:1,
|
||||
unused:1,
|
||||
cached:1,
|
||||
type:1;
|
||||
#endif
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_extent_stripe_ptr {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:5,
|
||||
block:8,
|
||||
redundancy:4,
|
||||
idx:47;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 idx:47,
|
||||
redundancy:4,
|
||||
block:8,
|
||||
type:5;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct bch_extent_rebalance {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:6,
|
||||
unused:34,
|
||||
compression:8, /* enum bch_compression_opt */
|
||||
target:16;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 target:16,
|
||||
compression:8,
|
||||
unused:34,
|
||||
type:6;
|
||||
#endif
|
||||
};
|
||||
|
||||
union bch_extent_entry {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
|
||||
unsigned long type;
|
||||
#elif __BITS_PER_LONG == 32
|
||||
struct {
|
||||
unsigned long pad;
|
||||
unsigned long type;
|
||||
};
|
||||
#else
|
||||
#error edit for your odd byteorder.
|
||||
#endif
|
||||
|
||||
#define x(f, n) struct bch_extent_##f f;
|
||||
BCH_EXTENT_ENTRY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_btree_ptr {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 _data[0];
|
||||
struct bch_extent_ptr start[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_btree_ptr_v2 {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 mem_ptr;
|
||||
__le64 seq;
|
||||
__le16 sectors_written;
|
||||
__le16 flags;
|
||||
struct bpos min_key;
|
||||
__u64 _data[0];
|
||||
struct bch_extent_ptr start[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
|
||||
|
||||
struct bch_extent {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 _data[0];
|
||||
union bch_extent_entry start[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_reservation {
|
||||
struct bch_val v;
|
||||
|
||||
__le32 generation;
|
||||
__u8 nr_replicas;
|
||||
__u8 pad[3];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Maximum size (in u64s) a single pointer could be: */
|
||||
#define BKEY_EXTENT_PTR_U64s_MAX\
|
||||
((sizeof(struct bch_extent_crc128) + \
|
||||
sizeof(struct bch_extent_ptr)) / sizeof(__u64))
|
||||
|
||||
/* Maximum possible size of an entire extent value: */
|
||||
#define BKEY_EXTENT_VAL_U64s_MAX \
|
||||
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
|
||||
|
||||
/* * Maximum possible size of an entire extent, key + value: */
|
||||
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
|
||||
|
||||
/* Btree pointers don't carry around checksums: */
|
||||
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
|
||||
((sizeof(struct bch_btree_ptr_v2) + \
|
||||
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
|
||||
#define BKEY_BTREE_PTR_U64s_MAX \
|
||||
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
|
||||
|
||||
/* Inodes */
|
||||
|
||||
#define BLOCKDEV_INODE_MAX 4096
|
||||
|
||||
#define BCACHEFS_ROOT_INO 4096
|
||||
|
||||
struct bch_inode {
|
||||
struct bch_val v;
|
||||
|
||||
__le64 bi_hash_seed;
|
||||
__le32 bi_flags;
|
||||
__le16 bi_mode;
|
||||
__u8 fields[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_inode_v2 {
|
||||
struct bch_val v;
|
||||
|
||||
__le64 bi_journal_seq;
|
||||
__le64 bi_hash_seed;
|
||||
__le64 bi_flags;
|
||||
__le16 bi_mode;
|
||||
__u8 fields[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_inode_v3 {
|
||||
struct bch_val v;
|
||||
|
||||
__le64 bi_journal_seq;
|
||||
__le64 bi_hash_seed;
|
||||
__le64 bi_flags;
|
||||
__le64 bi_sectors;
|
||||
__le64 bi_size;
|
||||
__le64 bi_version;
|
||||
__u8 fields[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define INODEv3_FIELDS_START_INITIAL 6
|
||||
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
|
||||
|
||||
struct bch_inode_generation {
|
||||
struct bch_val v;
|
||||
|
||||
__le32 bi_generation;
|
||||
__le32 pad;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/*
|
||||
* bi_subvol and bi_parent_subvol are only set for subvolume roots:
|
||||
*/
|
||||
|
||||
#define BCH_INODE_FIELDS_v2() \
|
||||
x(bi_atime, 96) \
|
||||
x(bi_ctime, 96) \
|
||||
x(bi_mtime, 96) \
|
||||
x(bi_otime, 96) \
|
||||
x(bi_size, 64) \
|
||||
x(bi_sectors, 64) \
|
||||
x(bi_uid, 32) \
|
||||
x(bi_gid, 32) \
|
||||
x(bi_nlink, 32) \
|
||||
x(bi_generation, 32) \
|
||||
x(bi_dev, 32) \
|
||||
x(bi_data_checksum, 8) \
|
||||
x(bi_compression, 8) \
|
||||
x(bi_project, 32) \
|
||||
x(bi_background_compression, 8) \
|
||||
x(bi_data_replicas, 8) \
|
||||
x(bi_promote_target, 16) \
|
||||
x(bi_foreground_target, 16) \
|
||||
x(bi_background_target, 16) \
|
||||
x(bi_erasure_code, 16) \
|
||||
x(bi_fields_set, 16) \
|
||||
x(bi_dir, 64) \
|
||||
x(bi_dir_offset, 64) \
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32)
|
||||
|
||||
#define BCH_INODE_FIELDS_v3() \
|
||||
x(bi_atime, 96) \
|
||||
x(bi_ctime, 96) \
|
||||
x(bi_mtime, 96) \
|
||||
x(bi_otime, 96) \
|
||||
x(bi_uid, 32) \
|
||||
x(bi_gid, 32) \
|
||||
x(bi_nlink, 32) \
|
||||
x(bi_generation, 32) \
|
||||
x(bi_dev, 32) \
|
||||
x(bi_data_checksum, 8) \
|
||||
x(bi_compression, 8) \
|
||||
x(bi_project, 32) \
|
||||
x(bi_background_compression, 8) \
|
||||
x(bi_data_replicas, 8) \
|
||||
x(bi_promote_target, 16) \
|
||||
x(bi_foreground_target, 16) \
|
||||
x(bi_background_target, 16) \
|
||||
x(bi_erasure_code, 16) \
|
||||
x(bi_fields_set, 16) \
|
||||
x(bi_dir, 64) \
|
||||
x(bi_dir_offset, 64) \
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32) \
|
||||
x(bi_nocow, 8)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
x(data_checksum, 8) \
|
||||
x(compression, 8) \
|
||||
x(project, 32) \
|
||||
x(background_compression, 8) \
|
||||
x(data_replicas, 8) \
|
||||
x(promote_target, 16) \
|
||||
x(foreground_target, 16) \
|
||||
x(background_target, 16) \
|
||||
x(erasure_code, 16) \
|
||||
x(nocow, 8)
|
||||
|
||||
enum inode_opt_id {
|
||||
#define x(name, ...) \
|
||||
Inode_opt_##name,
|
||||
BCH_INODE_OPTS()
|
||||
#undef x
|
||||
Inode_opt_nr,
|
||||
};
|
||||
|
||||
#define BCH_INODE_FLAGS() \
|
||||
x(sync, 0) \
|
||||
x(immutable, 1) \
|
||||
x(append, 2) \
|
||||
x(nodump, 3) \
|
||||
x(noatime, 4) \
|
||||
x(i_size_dirty, 5) \
|
||||
x(i_sectors_dirty, 6) \
|
||||
x(unlinked, 7) \
|
||||
x(backptr_untrusted, 8)
|
||||
|
||||
/* bits 20+ reserved for packed fields below: */
|
||||
|
||||
enum bch_inode_flags {
|
||||
#define x(t, n) BCH_INODE_##t = 1U << n,
|
||||
BCH_INODE_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
enum __bch_inode_flags {
|
||||
#define x(t, n) __BCH_INODE_##t = n,
|
||||
BCH_INODE_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
|
||||
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
|
||||
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
|
||||
|
||||
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
|
||||
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
|
||||
|
||||
LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
|
||||
LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
|
||||
|
||||
LE64_BITMASK(INODEv3_FIELDS_START,
|
||||
struct bch_inode_v3, bi_flags, 31, 36);
|
||||
LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
|
||||
|
||||
/* Dirents */
|
||||
|
||||
/*
|
||||
* Dirents (and xattrs) have to implement string lookups; since our b-tree
|
||||
* doesn't support arbitrary length strings for the key, we instead index by a
|
||||
* 64 bit hash (currently truncated sha1) of the string, stored in the offset
|
||||
* field of the key - using linear probing to resolve hash collisions. This also
|
||||
* provides us with the readdir cookie posix requires.
|
||||
*
|
||||
* Linear probing requires us to use whiteouts for deletions, in the event of a
|
||||
* collision:
|
||||
*/
|
||||
|
||||
struct bch_dirent {
|
||||
struct bch_val v;
|
||||
|
||||
/* Target inode number: */
|
||||
union {
|
||||
__le64 d_inum;
|
||||
struct { /* DT_SUBVOL */
|
||||
__le32 d_child_subvol;
|
||||
__le32 d_parent_subvol;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* Copy of mode bits 12-15 from the target inode - so userspace can get
|
||||
* the filetype without having to do a stat()
|
||||
*/
|
||||
__u8 d_type;
|
||||
|
||||
__u8 d_name[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define DT_SUBVOL 16
|
||||
#define BCH_DT_MAX 17
|
||||
|
||||
#define BCH_NAME_MAX 512
|
||||
|
||||
/* Xattrs */
|
||||
|
||||
#define KEY_TYPE_XATTR_INDEX_USER 0
|
||||
#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1
|
||||
#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
|
||||
#define KEY_TYPE_XATTR_INDEX_TRUSTED 3
|
||||
#define KEY_TYPE_XATTR_INDEX_SECURITY 4
|
||||
|
||||
struct bch_xattr {
|
||||
struct bch_val v;
|
||||
__u8 x_type;
|
||||
__u8 x_name_len;
|
||||
__le16 x_val_len;
|
||||
__u8 x_name[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Bucket/allocation information: */
|
||||
|
||||
struct bch_alloc {
|
||||
struct bch_val v;
|
||||
__u8 fields;
|
||||
__u8 gen;
|
||||
__u8 data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_ALLOC_FIELDS_V1() \
|
||||
x(read_time, 16) \
|
||||
x(write_time, 16) \
|
||||
x(data_type, 8) \
|
||||
x(dirty_sectors, 16) \
|
||||
x(cached_sectors, 16) \
|
||||
x(oldest_gen, 8) \
|
||||
x(stripe, 32) \
|
||||
x(stripe_redundancy, 8)
|
||||
|
||||
enum {
|
||||
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
|
||||
BCH_ALLOC_FIELDS_V1()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_alloc_v2 {
|
||||
struct bch_val v;
|
||||
__u8 nr_fields;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
__u8 data_type;
|
||||
__u8 data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_ALLOC_FIELDS_V2() \
|
||||
x(read_time, 64) \
|
||||
x(write_time, 64) \
|
||||
x(dirty_sectors, 32) \
|
||||
x(cached_sectors, 32) \
|
||||
x(stripe, 32) \
|
||||
x(stripe_redundancy, 8)
|
||||
|
||||
struct bch_alloc_v3 {
|
||||
struct bch_val v;
|
||||
__le64 journal_seq;
|
||||
__le32 flags;
|
||||
__u8 nr_fields;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
__u8 data_type;
|
||||
__u8 data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
struct bch_alloc_v4 {
|
||||
struct bch_val v;
|
||||
__u64 journal_seq;
|
||||
__u32 flags;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
__u8 data_type;
|
||||
__u8 stripe_redundancy;
|
||||
__u32 dirty_sectors;
|
||||
__u32 cached_sectors;
|
||||
__u64 io_time[2];
|
||||
__u32 stripe;
|
||||
__u32 nr_external_backpointers;
|
||||
__u64 fragmentation_lru;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_ALLOC_V4_U64s_V0 6
|
||||
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
|
||||
|
||||
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
|
||||
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
|
||||
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
|
||||
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
|
||||
|
||||
#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40
|
||||
|
||||
struct bch_backpointer {
|
||||
struct bch_val v;
|
||||
__u8 btree_id;
|
||||
@ -1021,154 +433,6 @@ struct bch_backpointer {
|
||||
struct bpos pos;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define KEY_TYPE_BUCKET_GENS_BITS 8
|
||||
#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS)
|
||||
#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1)
|
||||
|
||||
struct bch_bucket_gens {
|
||||
struct bch_val v;
|
||||
u8 gens[KEY_TYPE_BUCKET_GENS_NR];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Quotas: */
|
||||
|
||||
enum quota_types {
|
||||
QTYP_USR = 0,
|
||||
QTYP_GRP = 1,
|
||||
QTYP_PRJ = 2,
|
||||
QTYP_NR = 3,
|
||||
};
|
||||
|
||||
enum quota_counters {
|
||||
Q_SPC = 0,
|
||||
Q_INO = 1,
|
||||
Q_COUNTERS = 2,
|
||||
};
|
||||
|
||||
struct bch_quota_counter {
|
||||
__le64 hardlimit;
|
||||
__le64 softlimit;
|
||||
};
|
||||
|
||||
struct bch_quota {
|
||||
struct bch_val v;
|
||||
struct bch_quota_counter c[Q_COUNTERS];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Erasure coding */
|
||||
|
||||
struct bch_stripe {
|
||||
struct bch_val v;
|
||||
__le16 sectors;
|
||||
__u8 algorithm;
|
||||
__u8 nr_blocks;
|
||||
__u8 nr_redundant;
|
||||
|
||||
__u8 csum_granularity_bits;
|
||||
__u8 csum_type;
|
||||
__u8 pad;
|
||||
|
||||
struct bch_extent_ptr ptrs[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Reflink: */
|
||||
|
||||
struct bch_reflink_p {
|
||||
struct bch_val v;
|
||||
__le64 idx;
|
||||
/*
|
||||
* A reflink pointer might point to an indirect extent which is then
|
||||
* later split (by copygc or rebalance). If we only pointed to part of
|
||||
* the original indirect extent, and then one of the fragments is
|
||||
* outside the range we point to, we'd leak a refcount: so when creating
|
||||
* reflink pointers, we need to store pad values to remember the full
|
||||
* range we were taking a reference on.
|
||||
*/
|
||||
__le32 front_pad;
|
||||
__le32 back_pad;
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_reflink_v {
|
||||
struct bch_val v;
|
||||
__le64 refcount;
|
||||
union bch_extent_entry start[0];
|
||||
__u64 _data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_indirect_inline_data {
|
||||
struct bch_val v;
|
||||
__le64 refcount;
|
||||
u8 data[];
|
||||
};
|
||||
|
||||
/* Inline data */
|
||||
|
||||
struct bch_inline_data {
|
||||
struct bch_val v;
|
||||
u8 data[];
|
||||
};
|
||||
|
||||
/* Subvolumes: */
|
||||
|
||||
#define SUBVOL_POS_MIN POS(0, 1)
|
||||
#define SUBVOL_POS_MAX POS(0, S32_MAX)
|
||||
#define BCACHEFS_ROOT_SUBVOL 1
|
||||
|
||||
struct bch_subvolume {
|
||||
struct bch_val v;
|
||||
__le32 flags;
|
||||
__le32 snapshot;
|
||||
__le64 inode;
|
||||
/*
|
||||
* Snapshot subvolumes form a tree, separate from the snapshot nodes
|
||||
* tree - if this subvolume is a snapshot, this is the ID of the
|
||||
* subvolume it was created from:
|
||||
*/
|
||||
__le32 parent;
|
||||
__le32 pad;
|
||||
bch_le128 otime;
|
||||
};
|
||||
|
||||
LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1)
|
||||
/*
|
||||
* We need to know whether a subvolume is a snapshot so we can know whether we
|
||||
* can delete it (or whether it should just be rm -rf'd)
|
||||
*/
|
||||
LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2)
|
||||
LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3)
|
||||
|
||||
/* Snapshots */
|
||||
|
||||
struct bch_snapshot {
|
||||
struct bch_val v;
|
||||
__le32 flags;
|
||||
__le32 parent;
|
||||
__le32 children[2];
|
||||
__le32 subvol;
|
||||
/* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
|
||||
__le32 tree;
|
||||
__le32 depth;
|
||||
__le32 skip[3];
|
||||
};
|
||||
|
||||
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
|
||||
|
||||
/* True if a subvolume points to this snapshot node: */
|
||||
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
|
||||
|
||||
/*
|
||||
* Snapshot trees:
|
||||
*
|
||||
* The snapshot_trees btree gives us persistent indentifier for each tree of
|
||||
* bch_snapshot nodes, and allow us to record and easily find the root/master
|
||||
* subvolume that other snapshots were created from:
|
||||
*/
|
||||
struct bch_snapshot_tree {
|
||||
struct bch_val v;
|
||||
__le32 master_subvol;
|
||||
__le32 root_snapshot;
|
||||
};
|
||||
|
||||
/* LRU btree: */
|
||||
|
||||
struct bch_lru {
|
||||
@ -1178,33 +442,6 @@ struct bch_lru {
|
||||
|
||||
#define LRU_ID_STRIPES (1U << 16)
|
||||
|
||||
/* Logged operations btree: */
|
||||
|
||||
struct bch_logged_op_truncate {
|
||||
struct bch_val v;
|
||||
__le32 subvol;
|
||||
__le32 pad;
|
||||
__le64 inum;
|
||||
__le64 new_i_size;
|
||||
};
|
||||
|
||||
enum logged_op_finsert_state {
|
||||
LOGGED_OP_FINSERT_start,
|
||||
LOGGED_OP_FINSERT_shift_extents,
|
||||
LOGGED_OP_FINSERT_finish,
|
||||
};
|
||||
|
||||
struct bch_logged_op_finsert {
|
||||
struct bch_val v;
|
||||
__u8 state;
|
||||
__u8 pad[3];
|
||||
__le32 subvol;
|
||||
__le64 inum;
|
||||
__le64 dst_offset;
|
||||
__le64 src_offset;
|
||||
__le64 pos;
|
||||
};
|
||||
|
||||
/* Optional/variable size superblock sections: */
|
||||
|
||||
struct bch_sb_field {
|
||||
@ -1230,6 +467,19 @@ struct bch_sb_field {
|
||||
x(ext, 13) \
|
||||
x(downgrade, 14)
|
||||
|
||||
#include "alloc_background_format.h"
|
||||
#include "extents_format.h"
|
||||
#include "reflink_format.h"
|
||||
#include "ec_format.h"
|
||||
#include "inode_format.h"
|
||||
#include "dirent_format.h"
|
||||
#include "xattr_format.h"
|
||||
#include "quota_format.h"
|
||||
#include "logged_ops_format.h"
|
||||
#include "snapshot_format.h"
|
||||
#include "subvolume_format.h"
|
||||
#include "sb-counters_format.h"
|
||||
|
||||
enum bch_sb_field_type {
|
||||
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
||||
BCH_SB_FIELDS()
|
||||
@ -1465,23 +715,6 @@ struct bch_sb_field_replicas {
|
||||
struct bch_replicas_entry_v1 entries[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* BCH_SB_FIELD_quota: */
|
||||
|
||||
struct bch_sb_quota_counter {
|
||||
__le32 timelimit;
|
||||
__le32 warnlimit;
|
||||
};
|
||||
|
||||
struct bch_sb_quota_type {
|
||||
__le64 flags;
|
||||
struct bch_sb_quota_counter c[Q_COUNTERS];
|
||||
};
|
||||
|
||||
struct bch_sb_field_quota {
|
||||
struct bch_sb_field field;
|
||||
struct bch_sb_quota_type q[QTYP_NR];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* BCH_SB_FIELD_disk_groups: */
|
||||
|
||||
#define BCH_SB_LABEL_SIZE 32
|
||||
@ -1500,101 +733,6 @@ struct bch_sb_field_disk_groups {
|
||||
struct bch_disk_group entries[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* BCH_SB_FIELD_counters */
|
||||
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0) \
|
||||
x(io_write, 1) \
|
||||
x(io_move, 2) \
|
||||
x(bucket_invalidate, 3) \
|
||||
x(bucket_discard, 4) \
|
||||
x(bucket_alloc, 5) \
|
||||
x(bucket_alloc_fail, 6) \
|
||||
x(btree_cache_scan, 7) \
|
||||
x(btree_cache_reap, 8) \
|
||||
x(btree_cache_cannibalize, 9) \
|
||||
x(btree_cache_cannibalize_lock, 10) \
|
||||
x(btree_cache_cannibalize_lock_fail, 11) \
|
||||
x(btree_cache_cannibalize_unlock, 12) \
|
||||
x(btree_node_write, 13) \
|
||||
x(btree_node_read, 14) \
|
||||
x(btree_node_compact, 15) \
|
||||
x(btree_node_merge, 16) \
|
||||
x(btree_node_split, 17) \
|
||||
x(btree_node_rewrite, 18) \
|
||||
x(btree_node_alloc, 19) \
|
||||
x(btree_node_free, 20) \
|
||||
x(btree_node_set_root, 21) \
|
||||
x(btree_path_relock_fail, 22) \
|
||||
x(btree_path_upgrade_fail, 23) \
|
||||
x(btree_reserve_get_fail, 24) \
|
||||
x(journal_entry_full, 25) \
|
||||
x(journal_full, 26) \
|
||||
x(journal_reclaim_finish, 27) \
|
||||
x(journal_reclaim_start, 28) \
|
||||
x(journal_write, 29) \
|
||||
x(read_promote, 30) \
|
||||
x(read_bounce, 31) \
|
||||
x(read_split, 33) \
|
||||
x(read_retry, 32) \
|
||||
x(read_reuse_race, 34) \
|
||||
x(move_extent_read, 35) \
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_fail, 38) \
|
||||
x(move_extent_start_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
x(gc_gens_end, 42) \
|
||||
x(gc_gens_start, 43) \
|
||||
x(trans_blocked_journal_reclaim, 44) \
|
||||
x(trans_restart_btree_node_reused, 45) \
|
||||
x(trans_restart_btree_node_split, 46) \
|
||||
x(trans_restart_fault_inject, 47) \
|
||||
x(trans_restart_iter_upgrade, 48) \
|
||||
x(trans_restart_journal_preres_get, 49) \
|
||||
x(trans_restart_journal_reclaim, 50) \
|
||||
x(trans_restart_journal_res_get, 51) \
|
||||
x(trans_restart_key_cache_key_realloced, 52) \
|
||||
x(trans_restart_key_cache_raced, 53) \
|
||||
x(trans_restart_mark_replicas, 54) \
|
||||
x(trans_restart_mem_realloced, 55) \
|
||||
x(trans_restart_memory_allocation_failure, 56) \
|
||||
x(trans_restart_relock, 57) \
|
||||
x(trans_restart_relock_after_fill, 58) \
|
||||
x(trans_restart_relock_key_cache_fill, 59) \
|
||||
x(trans_restart_relock_next_node, 60) \
|
||||
x(trans_restart_relock_parent_for_fill, 61) \
|
||||
x(trans_restart_relock_path, 62) \
|
||||
x(trans_restart_relock_path_intent, 63) \
|
||||
x(trans_restart_too_many_iters, 64) \
|
||||
x(trans_restart_traverse, 65) \
|
||||
x(trans_restart_upgrade, 66) \
|
||||
x(trans_restart_would_deadlock, 67) \
|
||||
x(trans_restart_would_deadlock_write, 68) \
|
||||
x(trans_restart_injected, 69) \
|
||||
x(trans_restart_key_cache_upgrade, 70) \
|
||||
x(trans_traverse_all, 71) \
|
||||
x(transaction_commit, 72) \
|
||||
x(write_super, 73) \
|
||||
x(trans_restart_would_deadlock_recursion_limit, 74) \
|
||||
x(trans_restart_write_buffer_flush, 75) \
|
||||
x(trans_restart_split_race, 76) \
|
||||
x(write_buffer_flush_slowpath, 77) \
|
||||
x(write_buffer_flush_sync, 78)
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
BCH_COUNTER_NR
|
||||
};
|
||||
|
||||
struct bch_sb_field_counters {
|
||||
struct bch_sb_field field;
|
||||
__le64 d[];
|
||||
};
|
||||
|
||||
/*
|
||||
* On clean shutdown, store btree roots and current journal sequence number in
|
||||
* the superblock:
|
||||
|
@ -33,7 +33,7 @@ void bch2_bkey_packed_to_binary_text(struct printbuf *out,
|
||||
next_key_bits -= 64;
|
||||
}
|
||||
|
||||
bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
|
||||
bch2_prt_u64_base2_nbits(out, v, min(word_bits, nr_key_bits));
|
||||
|
||||
if (!next_key_bits)
|
||||
break;
|
||||
|
@ -63,8 +63,17 @@ static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_cookie ck = bkey_s_c_to_cookie(k);
|
||||
|
||||
prt_printf(out, "%llu", le64_to_cpu(ck.v->cookie));
|
||||
}
|
||||
|
||||
#define bch2_bkey_ops_cookie ((struct bkey_ops) { \
|
||||
.key_invalid = key_type_cookie_invalid, \
|
||||
.val_to_text = key_type_cookie_to_text, \
|
||||
.min_val_size = 8, \
|
||||
})
|
||||
|
||||
|
@ -83,9 +83,10 @@ enum btree_update_flags {
|
||||
|
||||
__BTREE_TRIGGER_NORUN,
|
||||
__BTREE_TRIGGER_TRANSACTIONAL,
|
||||
__BTREE_TRIGGER_ATOMIC,
|
||||
__BTREE_TRIGGER_GC,
|
||||
__BTREE_TRIGGER_INSERT,
|
||||
__BTREE_TRIGGER_OVERWRITE,
|
||||
__BTREE_TRIGGER_GC,
|
||||
__BTREE_TRIGGER_BUCKET_INVALIDATE,
|
||||
};
|
||||
|
||||
@ -107,6 +108,10 @@ enum btree_update_flags {
|
||||
* causing us to go emergency read-only)
|
||||
*/
|
||||
#define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL)
|
||||
#define BTREE_TRIGGER_ATOMIC (1U << __BTREE_TRIGGER_ATOMIC)
|
||||
|
||||
/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
|
||||
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
|
||||
|
||||
/* @new is entering the btree */
|
||||
#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
|
||||
@ -114,9 +119,6 @@ enum btree_update_flags {
|
||||
/* @old is leaving the btree */
|
||||
#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
|
||||
|
||||
/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
|
||||
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
|
||||
|
||||
/* signal from bucket invalidate path to alloc trigger */
|
||||
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
|
||||
|
||||
|
@ -720,7 +720,7 @@ static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
|
||||
{
|
||||
struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
|
||||
struct bkey_i min_key, max_key;
|
||||
unsigned j, cacheline = 1;
|
||||
unsigned cacheline = 1;
|
||||
|
||||
t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
|
||||
bset_ro_tree_capacity(b, t));
|
||||
@ -823,13 +823,12 @@ void bch2_bset_init_first(struct btree *b, struct bset *i)
|
||||
set_btree_bset(b, t, i);
|
||||
}
|
||||
|
||||
void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
|
||||
struct btree_node_entry *bne)
|
||||
void bch2_bset_init_next(struct btree *b, struct btree_node_entry *bne)
|
||||
{
|
||||
struct bset *i = &bne->keys;
|
||||
struct bset_tree *t;
|
||||
|
||||
BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
|
||||
BUG_ON(bset_byte_offset(b, bne) >= btree_buf_bytes(b));
|
||||
BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
|
||||
BUG_ON(b->nsets >= MAX_BSETS);
|
||||
|
||||
|
@ -264,8 +264,7 @@ static inline struct bset *bset_next_set(struct btree *b,
|
||||
void bch2_btree_keys_init(struct btree *);
|
||||
|
||||
void bch2_bset_init_first(struct btree *, struct bset *);
|
||||
void bch2_bset_init_next(struct bch_fs *, struct btree *,
|
||||
struct btree_node_entry *);
|
||||
void bch2_bset_init_next(struct btree *, struct btree_node_entry *);
|
||||
void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
|
||||
|
||||
void bch2_bset_insert(struct btree *, struct btree_node_iter *,
|
||||
|
@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
clear_btree_node_just_written(b);
|
||||
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
kvpfree(b->data, btree_buf_bytes(b));
|
||||
b->data = NULL;
|
||||
#ifdef __KERNEL__
|
||||
kvfree(b->aux_data);
|
||||
@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
{
|
||||
BUG_ON(b->data || b->aux_data);
|
||||
|
||||
b->data = kvpmalloc(btree_bytes(c), gfp);
|
||||
b->data = kvpmalloc(btree_buf_bytes(b), gfp);
|
||||
if (!b->data)
|
||||
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
||||
#ifdef __KERNEL__
|
||||
@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
b->aux_data = NULL;
|
||||
#endif
|
||||
if (!b->aux_data) {
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
kvpfree(b->data, btree_buf_bytes(b));
|
||||
b->data = NULL;
|
||||
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
||||
}
|
||||
@ -126,7 +126,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
bkey_btree_ptr_init(&b->key);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
b->byte_order = ilog2(btree_bytes(c));
|
||||
b->byte_order = ilog2(c->opts.btree_node_size);
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
if (c->verify_data)
|
||||
list_move(&c->verify_data->list, &bc->live);
|
||||
|
||||
kvpfree(c->verify_ondisk, btree_bytes(c));
|
||||
kvpfree(c->verify_ondisk, c->opts.btree_node_size);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
@ -1192,7 +1192,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
|
||||
" failed unpacked %zu\n",
|
||||
b->unpack_fn_len,
|
||||
b->nr.live_u64s * sizeof(u64),
|
||||
btree_bytes(c) - sizeof(struct btree_node),
|
||||
btree_buf_bytes(b) - sizeof(struct btree_node),
|
||||
b->nr.live_u64s * 100 / btree_max_u64s(c),
|
||||
b->sib_u64s[0],
|
||||
b->sib_u64s[1],
|
||||
|
@ -74,22 +74,27 @@ static inline bool btree_node_hashed(struct btree *b)
|
||||
_iter = 0; _iter < (_tbl)->size; _iter++) \
|
||||
rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
|
||||
|
||||
static inline size_t btree_bytes(struct bch_fs *c)
|
||||
static inline size_t btree_buf_bytes(const struct btree *b)
|
||||
{
|
||||
return c->opts.btree_node_size;
|
||||
return 1UL << b->byte_order;
|
||||
}
|
||||
|
||||
static inline size_t btree_max_u64s(struct bch_fs *c)
|
||||
static inline size_t btree_buf_max_u64s(const struct btree *b)
|
||||
{
|
||||
return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
|
||||
return (btree_buf_bytes(b) - sizeof(struct btree_node)) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline size_t btree_pages(struct bch_fs *c)
|
||||
static inline size_t btree_max_u64s(const struct bch_fs *c)
|
||||
{
|
||||
return btree_bytes(c) / PAGE_SIZE;
|
||||
return (c->opts.btree_node_size - sizeof(struct btree_node)) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline unsigned btree_blocks(struct bch_fs *c)
|
||||
static inline size_t btree_sectors(const struct bch_fs *c)
|
||||
{
|
||||
return c->opts.btree_node_size >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned btree_blocks(const struct bch_fs *c)
|
||||
{
|
||||
return btree_sectors(c) >> c->block_bits;
|
||||
}
|
||||
|
@ -597,7 +597,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
|
||||
p.ptr.gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
|
||||
@ -615,7 +615,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
"bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
|
||||
p.ptr.gen, g->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
|
||||
@ -637,7 +637,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
|
||||
p.ptr.gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
|
||||
@ -649,7 +649,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
"bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
|
||||
p.ptr.gen, g->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
|
||||
@ -664,8 +664,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[g->data_type],
|
||||
bch2_data_types[data_type],
|
||||
bch2_data_type_str(g->data_type),
|
||||
bch2_data_type_str(data_type),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
if (data_type == BCH_DATA_btree) {
|
||||
@ -1238,11 +1238,11 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
|
||||
for (i = 0; i < BCH_DATA_NR; i++) {
|
||||
copy_dev_field(dev_usage_buckets_wrong,
|
||||
d[i].buckets, "%s buckets", bch2_data_types[i]);
|
||||
d[i].buckets, "%s buckets", bch2_data_type_str(i));
|
||||
copy_dev_field(dev_usage_sectors_wrong,
|
||||
d[i].sectors, "%s sectors", bch2_data_types[i]);
|
||||
d[i].sectors, "%s sectors", bch2_data_type_str(i));
|
||||
copy_dev_field(dev_usage_fragmented_wrong,
|
||||
d[i].fragmented, "%s fragmented", bch2_data_types[i]);
|
||||
d[i].fragmented, "%s fragmented", bch2_data_type_str(i));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1253,19 +1253,19 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
|
||||
|
||||
copy_fs_field(fs_usage_hidden_wrong,
|
||||
hidden, "hidden");
|
||||
b.hidden, "hidden");
|
||||
copy_fs_field(fs_usage_btree_wrong,
|
||||
btree, "btree");
|
||||
b.btree, "btree");
|
||||
|
||||
if (!metadata_only) {
|
||||
copy_fs_field(fs_usage_data_wrong,
|
||||
data, "data");
|
||||
b.data, "data");
|
||||
copy_fs_field(fs_usage_cached_wrong,
|
||||
cached, "cached");
|
||||
b.cached, "cached");
|
||||
copy_fs_field(fs_usage_reserved_wrong,
|
||||
reserved, "reserved");
|
||||
b.reserved, "reserved");
|
||||
copy_fs_field(fs_usage_nr_inodes_wrong,
|
||||
nr_inodes,"nr_inodes");
|
||||
b.nr_inodes,"nr_inodes");
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
copy_fs_field(fs_usage_persistent_reserved_wrong,
|
||||
@ -1417,8 +1417,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
": got %s, should be %s",
|
||||
iter->pos.inode, iter->pos.offset,
|
||||
gc.gen,
|
||||
bch2_data_types[new.data_type],
|
||||
bch2_data_types[gc.data_type]))
|
||||
bch2_data_type_str(new.data_type),
|
||||
bch2_data_type_str(gc.data_type)))
|
||||
new.data_type = gc.data_type;
|
||||
|
||||
#define copy_bucket_field(_errtype, _f) \
|
||||
@ -1428,7 +1428,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
": got %u, should be %u", \
|
||||
iter->pos.inode, iter->pos.offset, \
|
||||
gc.gen, \
|
||||
bch2_data_types[gc.data_type], \
|
||||
bch2_data_type_str(gc.data_type), \
|
||||
new._f, gc._f)) \
|
||||
new._f = gc._f; \
|
||||
|
||||
|
@ -112,7 +112,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
|
||||
unsigned flags = memalloc_nofs_save();
|
||||
void *p;
|
||||
|
||||
BUG_ON(size > btree_bytes(c));
|
||||
BUG_ON(size > c->opts.btree_node_size);
|
||||
|
||||
*used_mempool = false;
|
||||
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
|
||||
@ -174,8 +174,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
||||
|
||||
ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
|
||||
|
||||
for (k = unwritten_whiteouts_start(c, b);
|
||||
k != unwritten_whiteouts_end(c, b);
|
||||
for (k = unwritten_whiteouts_start(b);
|
||||
k != unwritten_whiteouts_end(b);
|
||||
k = bkey_p_next(k))
|
||||
*--ptrs = k;
|
||||
|
||||
@ -192,7 +192,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
||||
verify_no_dups(b, new_whiteouts,
|
||||
(void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
|
||||
|
||||
memcpy_u64s(unwritten_whiteouts_start(c, b),
|
||||
memcpy_u64s(unwritten_whiteouts_start(b),
|
||||
new_whiteouts, b->whiteout_u64s);
|
||||
|
||||
btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
|
||||
@ -313,7 +313,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
bytes = sorting_entire_node
|
||||
? btree_bytes(c)
|
||||
? btree_buf_bytes(b)
|
||||
: __vstruct_bytes(struct btree_node, u64s);
|
||||
|
||||
out = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
@ -338,7 +338,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
if (sorting_entire_node) {
|
||||
u64s = le16_to_cpu(out->keys.u64s);
|
||||
|
||||
BUG_ON(bytes != btree_bytes(c));
|
||||
BUG_ON(bytes != btree_buf_bytes(b));
|
||||
|
||||
/*
|
||||
* Our temporary buffer is the same size as the btree node's
|
||||
@ -502,7 +502,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
bne = want_new_bset(c, b);
|
||||
if (bne)
|
||||
bch2_bset_init_next(c, b, bne);
|
||||
bch2_bset_init_next(b, bne);
|
||||
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
@ -1160,7 +1160,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
ptr_written, b->written);
|
||||
} else {
|
||||
for (bne = write_block(b);
|
||||
bset_byte_offset(b, bne) < btree_bytes(c);
|
||||
bset_byte_offset(b, bne) < btree_buf_bytes(b);
|
||||
bne = (void *) bne + block_bytes(c))
|
||||
btree_err_on(bne->keys.seq == b->data->keys.seq &&
|
||||
!bch2_journal_seq_is_blacklisted(c,
|
||||
@ -1172,7 +1172,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
"found bset signature after last bset");
|
||||
}
|
||||
|
||||
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
|
||||
sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
|
||||
set_btree_bset(b, b->set, &b->data->keys);
|
||||
@ -1188,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
BUG_ON(b->nr.live_u64s != u64s);
|
||||
|
||||
btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
|
||||
btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
|
||||
|
||||
if (updated_range)
|
||||
bch2_btree_node_drop_keys_outside_node(b);
|
||||
@ -1284,7 +1284,7 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
rb->have_ioref = bch2_dev_get_ioref(ca, READ);
|
||||
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
|
||||
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_bytes(c);
|
||||
bio->bi_iter.bi_size = btree_buf_bytes(b);
|
||||
|
||||
if (rb->have_ioref) {
|
||||
bio_set_dev(bio, ca->disk_sb.bdev);
|
||||
@ -1512,7 +1512,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
|
||||
}
|
||||
|
||||
if (best >= 0) {
|
||||
memcpy(b->data, ra->buf[best], btree_bytes(c));
|
||||
memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
|
||||
ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
|
||||
} else {
|
||||
ret = -1;
|
||||
@ -1578,7 +1578,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
|
||||
ra->bio[i] = bio_alloc_bioset(NULL,
|
||||
buf_pages(ra->buf[i], btree_bytes(c)),
|
||||
buf_pages(ra->buf[i], btree_buf_bytes(b)),
|
||||
REQ_OP_READ|REQ_SYNC|REQ_META,
|
||||
GFP_NOFS,
|
||||
&c->btree_bio);
|
||||
@ -1598,7 +1598,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
|
||||
rb->pick = pick;
|
||||
rb->bio.bi_iter.bi_sector = pick.ptr.offset;
|
||||
rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
|
||||
bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
|
||||
bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b));
|
||||
|
||||
if (rb->have_ioref) {
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
|
||||
@ -1665,7 +1665,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
|
||||
bio = bio_alloc_bioset(NULL,
|
||||
buf_pages(b->data, btree_bytes(c)),
|
||||
buf_pages(b->data, btree_buf_bytes(b)),
|
||||
REQ_OP_READ|REQ_SYNC|REQ_META,
|
||||
GFP_NOFS,
|
||||
&c->btree_bio);
|
||||
@ -1679,7 +1679,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
INIT_WORK(&rb->work, btree_node_read_work);
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
bio->bi_end_io = btree_node_read_endio;
|
||||
bch2_bio_map(bio, b->data, btree_bytes(c));
|
||||
bch2_bio_map(bio, b->data, btree_buf_bytes(b));
|
||||
|
||||
if (rb->have_ioref) {
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
|
||||
@ -2074,8 +2074,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
i->u64s = 0;
|
||||
|
||||
sort_iter_add(&sort_iter.iter,
|
||||
unwritten_whiteouts_start(c, b),
|
||||
unwritten_whiteouts_end(c, b));
|
||||
unwritten_whiteouts_start(b),
|
||||
unwritten_whiteouts_end(b));
|
||||
SET_BSET_SEPARATE_WHITEOUTS(i, false);
|
||||
|
||||
b->whiteout_u64s = 0;
|
||||
@ -2251,7 +2251,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bne = want_new_bset(c, b);
|
||||
if (bne)
|
||||
bch2_bset_init_next(c, b, bne);
|
||||
bch2_bset_init_next(b, bne);
|
||||
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
|
@ -1337,7 +1337,7 @@ void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool in
|
||||
|
||||
if (path->should_be_locked &&
|
||||
!trans->restarted &&
|
||||
(!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
|
||||
(!dup || !bch2_btree_path_relock_norestart(trans, dup)))
|
||||
return;
|
||||
|
||||
if (dup) {
|
||||
|
@ -819,6 +819,11 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
|
||||
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
|
||||
for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
|
||||
|
||||
/*
|
||||
* This should not be used in a fastpath, without first trying _do in
|
||||
* nonblocking mode - it will cause excessive transaction restarts and
|
||||
* potentially livelocking:
|
||||
*/
|
||||
#define drop_locks_do(_trans, _do) \
|
||||
({ \
|
||||
bch2_trans_unlock(_trans); \
|
||||
|
@ -631,8 +631,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
__flatten
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
struct get_locks_fail f;
|
||||
|
||||
@ -642,7 +641,7 @@ bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
|
||||
int __bch2_btree_path_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
|
||||
if (!bch2_btree_path_relock_norestart(trans, path)) {
|
||||
trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
|
||||
}
|
||||
@ -759,12 +758,39 @@ int bch2_trans_relock(struct btree_trans *trans)
|
||||
if (unlikely(trans->restarted))
|
||||
return -((int) trans->restarted);
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
trans_for_each_path(trans, path, i) {
|
||||
struct get_locks_fail f;
|
||||
|
||||
if (path->should_be_locked &&
|
||||
!bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
|
||||
trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path);
|
||||
!btree_path_get_locks(trans, path, false, &f)) {
|
||||
if (trace_trans_restart_relock_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf, path->pos);
|
||||
prt_printf(&buf, " l=%u seq=%u node seq=",
|
||||
f.l, path->l[f.l].lock_seq);
|
||||
if (IS_ERR_OR_NULL(f.b)) {
|
||||
prt_str(&buf, bch2_err_str(PTR_ERR(f.b)));
|
||||
} else {
|
||||
prt_printf(&buf, "%u", f.b->c.lock.seq);
|
||||
|
||||
struct six_lock_count c =
|
||||
bch2_btree_node_lock_counts(trans, NULL, &f.b->c, f.l);
|
||||
prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
|
||||
|
||||
c = six_lock_counts(&f.b->c.lock);
|
||||
prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
|
||||
}
|
||||
|
||||
trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
count_event(trans->c, trans_restart_relock);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -778,7 +804,7 @@ int bch2_trans_relock_notrace(struct btree_trans *trans)
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
if (path->should_be_locked &&
|
||||
!bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
|
||||
!bch2_btree_path_relock_norestart(trans, path)) {
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
|
||||
}
|
||||
return 0;
|
||||
|
@ -312,8 +312,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *,
|
||||
|
||||
/* relock: */
|
||||
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *,
|
||||
struct btree_path *, unsigned long);
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *, struct btree_path *);
|
||||
int __bch2_btree_path_relock(struct btree_trans *,
|
||||
struct btree_path *, unsigned long);
|
||||
|
||||
@ -353,12 +352,6 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
|
||||
|
||||
/* upgrade */
|
||||
|
||||
|
||||
struct get_locks_fail {
|
||||
unsigned l;
|
||||
struct btree *b;
|
||||
};
|
||||
|
||||
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
|
||||
struct btree_path *, unsigned,
|
||||
struct get_locks_fail *);
|
||||
|
@ -139,8 +139,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
|
||||
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
|
||||
EBUG_ON(bpos_lt(insert->k.p, b->data->min_key));
|
||||
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
|
||||
EBUG_ON(insert->k.u64s >
|
||||
bch_btree_keys_u64s_remaining(trans->c, b));
|
||||
EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
|
||||
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
@ -160,7 +159,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
|
||||
k->type = KEY_TYPE_deleted;
|
||||
|
||||
if (k->needs_whiteout)
|
||||
push_whiteout(trans->c, b, insert->k.p);
|
||||
push_whiteout(b, insert->k.p);
|
||||
k->needs_whiteout = false;
|
||||
|
||||
if (k >= btree_bset_last(b)->start) {
|
||||
@ -348,9 +347,7 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
|
||||
static inline int btree_key_can_insert(struct btree_trans *trans,
|
||||
struct btree *b, unsigned u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (!bch2_btree_node_insert_fits(c, b, u64s))
|
||||
if (!bch2_btree_node_insert_fits(b, u64s))
|
||||
return -BCH_ERR_btree_insert_btree_node_full;
|
||||
|
||||
return 0;
|
||||
@ -418,7 +415,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
|
||||
return 0;
|
||||
|
||||
new_u64s = roundup_pow_of_two(u64s);
|
||||
new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
|
||||
new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (unlikely(!new_k))
|
||||
return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
|
||||
|
||||
@ -448,9 +445,6 @@ static int run_one_mem_trigger(struct btree_trans *trans,
|
||||
if (unlikely(flags & BTREE_TRIGGER_NORUN))
|
||||
return 0;
|
||||
|
||||
if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
|
||||
return 0;
|
||||
|
||||
if (old_ops->trigger == new_ops->trigger) {
|
||||
ret = bch2_key_trigger(trans, i->btree_id, i->level,
|
||||
old, bkey_i_to_s(new),
|
||||
@ -586,9 +580,6 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
|
||||
static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
/*
|
||||
* XXX: synchronization of cached update triggers with gc
|
||||
@ -596,14 +587,15 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
|
||||
*/
|
||||
BUG_ON(i->cached || i->level);
|
||||
|
||||
if (gc_visited(c, gc_pos_btree_node(insert_l(trans, i)->b))) {
|
||||
ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC);
|
||||
if (btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)) &&
|
||||
gc_visited(trans->c, gc_pos_btree_node(insert_l(trans, i)->b))) {
|
||||
int ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC);
|
||||
if (ret)
|
||||
break;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
@ -680,6 +672,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
|
||||
return -BCH_ERR_btree_insert_need_mark_replicas;
|
||||
|
||||
/* XXX: we only want to run this if deltas are nonzero */
|
||||
bch2_trans_account_disk_usage_change(trans);
|
||||
|
||||
h = trans->hooks;
|
||||
while (h) {
|
||||
ret = h->fn(trans, h);
|
||||
@ -689,8 +684,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
|
||||
ret = run_one_mem_trigger(trans, i, i->flags);
|
||||
if (BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS & (1U << i->bkey_type)) {
|
||||
ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_ATOMIC|i->flags);
|
||||
if (ret)
|
||||
goto fatal_err;
|
||||
}
|
||||
@ -994,6 +989,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
!trans->journal_entries_u64s)
|
||||
goto out_reset;
|
||||
|
||||
memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
|
||||
|
||||
ret = bch2_trans_commit_run_triggers(trans);
|
||||
if (ret)
|
||||
goto out_reset;
|
||||
|
@ -430,6 +430,9 @@ struct btree_trans {
|
||||
struct journal_res journal_res;
|
||||
u64 *journal_seq;
|
||||
struct disk_reservation *disk_res;
|
||||
|
||||
struct bch_fs_usage_base fs_usage_delta;
|
||||
|
||||
unsigned journal_u64s;
|
||||
unsigned extra_disk_res; /* XXX kill */
|
||||
struct replicas_delta_list *fs_usage_deltas;
|
||||
@ -653,7 +656,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
|
||||
BIT_ULL(BKEY_TYPE_reflink)| \
|
||||
BIT_ULL(BKEY_TYPE_btree))
|
||||
|
||||
#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
|
||||
#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \
|
||||
(BIT_ULL(BKEY_TYPE_alloc)| \
|
||||
BIT_ULL(BKEY_TYPE_inodes)| \
|
||||
BIT_ULL(BKEY_TYPE_stripes)| \
|
||||
@ -661,7 +664,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
|
||||
|
||||
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
|
||||
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
|
||||
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
|
||||
BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS)
|
||||
|
||||
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
|
||||
{
|
||||
@ -738,4 +741,9 @@ enum btree_node_sibling {
|
||||
btree_next_sib,
|
||||
};
|
||||
|
||||
struct get_locks_fail {
|
||||
unsigned l;
|
||||
struct btree *b;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_TYPES_H */
|
||||
|
@ -159,7 +159,7 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
|
||||
{
|
||||
size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
|
||||
|
||||
return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
|
||||
return __vstruct_bytes(struct btree_node, u64s) < btree_buf_bytes(b);
|
||||
}
|
||||
|
||||
/* Btree node freeing/allocation: */
|
||||
@ -1097,7 +1097,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
* Always check for space for two keys, even if we won't have to
|
||||
* split at prior level - it might have been a merge instead:
|
||||
*/
|
||||
if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
|
||||
if (bch2_btree_node_insert_fits(path->l[update_level].b,
|
||||
BKEY_BTREE_PTR_U64s_MAX * 2))
|
||||
break;
|
||||
|
||||
@ -1401,7 +1401,7 @@ static void __btree_split_node(struct btree_update *as,
|
||||
|
||||
unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
|
||||
nr_keys[i].val_u64s;
|
||||
if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c))
|
||||
if (__vstruct_bytes(struct btree_node, u64s) > btree_buf_bytes(b))
|
||||
n[i]->data->format = b->format;
|
||||
|
||||
btree_node_set_format(n[i], n[i]->data->format);
|
||||
@ -1703,7 +1703,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
|
||||
bch2_btree_node_prep_for_write(trans, path, b);
|
||||
|
||||
if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
|
||||
if (!bch2_btree_node_insert_fits(b, bch2_keylist_u64s(keys))) {
|
||||
bch2_btree_node_unlock_write(trans, path, b);
|
||||
goto split;
|
||||
}
|
||||
|
@ -184,21 +184,19 @@ static inline void btree_node_reset_sib_u64s(struct btree *b)
|
||||
b->sib_u64s[1] = b->nr.live_u64s;
|
||||
}
|
||||
|
||||
static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
|
||||
static inline void *btree_data_end(struct btree *b)
|
||||
{
|
||||
return (void *) b->data + btree_bytes(c);
|
||||
return (void *) b->data + btree_buf_bytes(b);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
static inline struct bkey_packed *unwritten_whiteouts_start(struct btree *b)
|
||||
{
|
||||
return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
|
||||
return (void *) ((u64 *) btree_data_end(b) - b->whiteout_u64s);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
static inline struct bkey_packed *unwritten_whiteouts_end(struct btree *b)
|
||||
{
|
||||
return btree_data_end(c, b);
|
||||
return btree_data_end(b);
|
||||
}
|
||||
|
||||
static inline void *write_block(struct btree *b)
|
||||
@ -221,13 +219,11 @@ static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
|
||||
return __btree_addr_written(b, k);
|
||||
}
|
||||
|
||||
static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
|
||||
struct btree *b,
|
||||
void *end)
|
||||
static inline ssize_t __bch2_btree_u64s_remaining(struct btree *b, void *end)
|
||||
{
|
||||
ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
|
||||
b->whiteout_u64s;
|
||||
ssize_t total = c->opts.btree_node_size >> 3;
|
||||
ssize_t total = btree_buf_bytes(b) >> 3;
|
||||
|
||||
/* Always leave one extra u64 for bch2_varint_decode: */
|
||||
used++;
|
||||
@ -235,10 +231,9 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
|
||||
return total - used;
|
||||
}
|
||||
|
||||
static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
static inline size_t bch2_btree_keys_u64s_remaining(struct btree *b)
|
||||
{
|
||||
ssize_t remaining = __bch_btree_u64s_remaining(c, b,
|
||||
ssize_t remaining = __bch2_btree_u64s_remaining(b,
|
||||
btree_bkey_last(b, bset_tree_last(b)));
|
||||
|
||||
BUG_ON(remaining < 0);
|
||||
@ -260,14 +255,13 @@ static inline unsigned btree_write_set_buffer(struct btree *b)
|
||||
return 8 << BTREE_WRITE_SET_U64s_BITS;
|
||||
}
|
||||
|
||||
static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct bset_tree *t = bset_tree_last(b);
|
||||
struct btree_node_entry *bne = max(write_block(b),
|
||||
(void *) btree_bkey_last(b, bset_tree_last(b)));
|
||||
ssize_t remaining_space =
|
||||
__bch_btree_u64s_remaining(c, b, bne->keys.start);
|
||||
__bch2_btree_u64s_remaining(b, bne->keys.start);
|
||||
|
||||
if (unlikely(bset_written(b, bset(b, t)))) {
|
||||
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
|
||||
@ -281,12 +275,11 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void push_whiteout(struct bch_fs *c, struct btree *b,
|
||||
struct bpos pos)
|
||||
static inline void push_whiteout(struct btree *b, struct bpos pos)
|
||||
{
|
||||
struct bkey_packed k;
|
||||
|
||||
BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
|
||||
BUG_ON(bch2_btree_keys_u64s_remaining(b) < BKEY_U64s);
|
||||
EBUG_ON(btree_node_just_written(b));
|
||||
|
||||
if (!bkey_pack_pos(&k, pos, b)) {
|
||||
@ -299,20 +292,19 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
|
||||
k.needs_whiteout = true;
|
||||
|
||||
b->whiteout_u64s += k.u64s;
|
||||
bkey_p_copy(unwritten_whiteouts_start(c, b), &k);
|
||||
bkey_p_copy(unwritten_whiteouts_start(b), &k);
|
||||
}
|
||||
|
||||
/*
|
||||
* write lock must be held on @b (else the dirty bset that we were going to
|
||||
* insert into could be written out from under us)
|
||||
*/
|
||||
static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
|
||||
struct btree *b, unsigned u64s)
|
||||
static inline bool bch2_btree_node_insert_fits(struct btree *b, unsigned u64s)
|
||||
{
|
||||
if (unlikely(btree_node_need_rewrite(b)))
|
||||
return false;
|
||||
|
||||
return u64s <= bch_btree_keys_u64s_remaining(c, b);
|
||||
return u64s <= bch2_btree_keys_u64s_remaining(b);
|
||||
}
|
||||
|
||||
void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
@ -125,13 +125,12 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
|
||||
struct btree_write_buffered_key *wb,
|
||||
bool *write_locked, size_t *fast)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *path;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!wb->journal_seq);
|
||||
EBUG_ON(!c->btree_write_buffer.flushing.pin.seq);
|
||||
EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
|
||||
EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq);
|
||||
EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
@ -155,7 +154,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
|
||||
*write_locked = true;
|
||||
}
|
||||
|
||||
if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) {
|
||||
if (unlikely(!bch2_btree_node_insert_fits(path->l[0].b, wb->k.k.u64s))) {
|
||||
*write_locked = false;
|
||||
return wb_flush_one_slowpath(trans, iter, wb);
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
#include <linux/preempt.h>
|
||||
|
||||
static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage,
|
||||
static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
|
||||
enum bch_data_type data_type,
|
||||
s64 sectors)
|
||||
{
|
||||
@ -54,20 +54,20 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
bch2_fs_usage_acc_to_base(c, i);
|
||||
|
||||
for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
usage->reserved += usage->persistent_reserved[i];
|
||||
usage->b.reserved += usage->persistent_reserved[i];
|
||||
|
||||
for (unsigned i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
|
||||
fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]);
|
||||
fs_usage_data_type_to_base(&usage->b, e->data_type, usage->replicas[i]);
|
||||
}
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
struct bch_dev_usage dev = bch2_dev_usage_read(ca);
|
||||
|
||||
usage->hidden += (dev.d[BCH_DATA_sb].buckets +
|
||||
dev.d[BCH_DATA_journal].buckets) *
|
||||
usage->b.hidden += (dev.d[BCH_DATA_sb].buckets +
|
||||
dev.d[BCH_DATA_journal].buckets) *
|
||||
ca->mi.bucket_size;
|
||||
}
|
||||
|
||||
@ -188,15 +188,15 @@ void bch2_fs_usage_to_text(struct printbuf *out,
|
||||
prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
|
||||
|
||||
prt_printf(out, "hidden:\t\t\t\t%llu\n",
|
||||
fs_usage->u.hidden);
|
||||
fs_usage->u.b.hidden);
|
||||
prt_printf(out, "data:\t\t\t\t%llu\n",
|
||||
fs_usage->u.data);
|
||||
fs_usage->u.b.data);
|
||||
prt_printf(out, "cached:\t\t\t\t%llu\n",
|
||||
fs_usage->u.cached);
|
||||
fs_usage->u.b.cached);
|
||||
prt_printf(out, "reserved:\t\t\t%llu\n",
|
||||
fs_usage->u.reserved);
|
||||
fs_usage->u.b.reserved);
|
||||
prt_printf(out, "nr_inodes:\t\t\t%llu\n",
|
||||
fs_usage->u.nr_inodes);
|
||||
fs_usage->u.b.nr_inodes);
|
||||
prt_printf(out, "online reserved:\t\t%llu\n",
|
||||
fs_usage->online_reserved);
|
||||
|
||||
@ -225,10 +225,10 @@ static u64 reserve_factor(u64 r)
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage)
|
||||
{
|
||||
return min(fs_usage->u.hidden +
|
||||
fs_usage->u.btree +
|
||||
fs_usage->u.data +
|
||||
reserve_factor(fs_usage->u.reserved +
|
||||
return min(fs_usage->u.b.hidden +
|
||||
fs_usage->u.b.btree +
|
||||
fs_usage->u.b.data +
|
||||
reserve_factor(fs_usage->u.b.reserved +
|
||||
fs_usage->online_reserved),
|
||||
c->capacity);
|
||||
}
|
||||
@ -240,17 +240,17 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
u64 data, reserved;
|
||||
|
||||
ret.capacity = c->capacity -
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->hidden);
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->b.hidden);
|
||||
|
||||
data = bch2_fs_usage_read_one(c, &c->usage_base->data) +
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->btree);
|
||||
reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
|
||||
data = bch2_fs_usage_read_one(c, &c->usage_base->b.data) +
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->b.btree);
|
||||
reserved = bch2_fs_usage_read_one(c, &c->usage_base->b.reserved) +
|
||||
percpu_u64_get(c->online_reserved);
|
||||
|
||||
ret.used = min(ret.capacity, data + reserve_factor(reserved));
|
||||
ret.free = ret.capacity - ret.used;
|
||||
|
||||
ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
|
||||
ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -284,7 +284,7 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
|
||||
prt_str(out, bch2_data_types[i]);
|
||||
bch2_prt_data_type(out, i);
|
||||
prt_tab(out);
|
||||
prt_u64(out, usage->d[i].buckets);
|
||||
prt_tab_rjust(out);
|
||||
@ -308,9 +308,9 @@ void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
fs_usage = fs_usage_ptr(c, journal_seq, gc);
|
||||
|
||||
if (data_type_is_hidden(old->data_type))
|
||||
fs_usage->hidden -= ca->mi.bucket_size;
|
||||
fs_usage->b.hidden -= ca->mi.bucket_size;
|
||||
if (data_type_is_hidden(new->data_type))
|
||||
fs_usage->hidden += ca->mi.bucket_size;
|
||||
fs_usage->b.hidden += ca->mi.bucket_size;
|
||||
|
||||
u = dev_usage_ptr(ca, journal_seq, gc);
|
||||
|
||||
@ -359,7 +359,7 @@ static inline int __update_replicas(struct bch_fs *c,
|
||||
if (idx < 0)
|
||||
return -1;
|
||||
|
||||
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
|
||||
fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors);
|
||||
fs_usage->replicas[idx] += sectors;
|
||||
return 0;
|
||||
}
|
||||
@ -394,7 +394,7 @@ int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
preempt_disable();
|
||||
fs_usage = fs_usage_ptr(c, journal_seq, gc);
|
||||
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
|
||||
fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors);
|
||||
fs_usage->replicas[idx] += sectors;
|
||||
preempt_enable();
|
||||
err:
|
||||
@ -523,8 +523,8 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (bch2_fs_inconsistent_on(g->data_type &&
|
||||
g->data_type != data_type, c,
|
||||
"different types of data in same bucket: %s, %s",
|
||||
bch2_data_types[g->data_type],
|
||||
bch2_data_types[data_type])) {
|
||||
bch2_data_type_str(g->data_type),
|
||||
bch2_data_type_str(data_type))) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
@ -532,7 +532,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
|
||||
ca->dev_idx, b, g->gen,
|
||||
bch2_data_types[g->data_type ?: data_type],
|
||||
bch2_data_type_str(g->data_type ?: data_type),
|
||||
g->dirty_sectors, sectors)) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
@ -575,7 +575,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
|
||||
"while marking %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_types[bucket_data_type ?: ptr_data_type],
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
ret = -EIO;
|
||||
@ -588,7 +588,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_types[bucket_data_type ?: ptr_data_type],
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
@ -603,7 +603,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
|
||||
"while marking %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
*bucket_gen(ca, bucket_nr),
|
||||
bch2_data_types[bucket_data_type ?: ptr_data_type],
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
@ -624,8 +624,8 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
|
||||
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_types[bucket_data_type],
|
||||
bch2_data_types[ptr_data_type],
|
||||
bch2_data_type_str(bucket_data_type),
|
||||
bch2_data_type_str(ptr_data_type),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
ret = -EIO;
|
||||
@ -638,7 +638,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
|
||||
"while marking %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_types[bucket_data_type ?: ptr_data_type],
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
bucket_sectors, sectors,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
@ -677,11 +677,11 @@ void bch2_trans_fs_usage_revert(struct btree_trans *trans,
|
||||
BUG_ON(__update_replicas(c, dst, &d->r, -d->delta));
|
||||
}
|
||||
|
||||
dst->nr_inodes -= deltas->nr_inodes;
|
||||
dst->b.nr_inodes -= deltas->nr_inodes;
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
added -= deltas->persistent_reserved[i];
|
||||
dst->reserved -= deltas->persistent_reserved[i];
|
||||
dst->b.reserved -= deltas->persistent_reserved[i];
|
||||
dst->persistent_reserved[i] -= deltas->persistent_reserved[i];
|
||||
}
|
||||
|
||||
@ -694,48 +694,25 @@ void bch2_trans_fs_usage_revert(struct btree_trans *trans,
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
int bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
struct replicas_delta_list *deltas)
|
||||
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
|
||||
static int warned_disk_usage = 0;
|
||||
bool warn = false;
|
||||
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
|
||||
struct replicas_delta *d, *d2;
|
||||
struct replicas_delta *top = (void *) deltas->d + deltas->used;
|
||||
struct bch_fs_usage *dst;
|
||||
s64 added = 0, should_not_have_added;
|
||||
unsigned i;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
preempt_disable();
|
||||
dst = fs_usage_ptr(c, trans->journal_res.seq, false);
|
||||
struct bch_fs_usage_base *dst = &fs_usage_ptr(c, trans->journal_res.seq, false)->b;
|
||||
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
|
||||
|
||||
for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
|
||||
switch (d->r.data_type) {
|
||||
case BCH_DATA_btree:
|
||||
case BCH_DATA_user:
|
||||
case BCH_DATA_parity:
|
||||
added += d->delta;
|
||||
}
|
||||
|
||||
if (__update_replicas(c, dst, &d->r, d->delta))
|
||||
goto need_mark;
|
||||
}
|
||||
|
||||
dst->nr_inodes += deltas->nr_inodes;
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
added += deltas->persistent_reserved[i];
|
||||
dst->reserved += deltas->persistent_reserved[i];
|
||||
dst->persistent_reserved[i] += deltas->persistent_reserved[i];
|
||||
}
|
||||
s64 added = src->btree + src->data + src->reserved;
|
||||
|
||||
/*
|
||||
* Not allowed to reduce sectors_available except by getting a
|
||||
* reservation:
|
||||
*/
|
||||
should_not_have_added = added - (s64) disk_res_sectors;
|
||||
s64 should_not_have_added = added - (s64) disk_res_sectors;
|
||||
if (unlikely(should_not_have_added > 0)) {
|
||||
u64 old, new, v = atomic64_read(&c->sectors_available);
|
||||
|
||||
@ -754,6 +731,13 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
this_cpu_sub(*c->online_reserved, added);
|
||||
}
|
||||
|
||||
dst->hidden += src->hidden;
|
||||
dst->btree += src->btree;
|
||||
dst->data += src->data;
|
||||
dst->cached += src->cached;
|
||||
dst->reserved += src->reserved;
|
||||
dst->nr_inodes += src->nr_inodes;
|
||||
|
||||
preempt_enable();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
@ -761,6 +745,34 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
bch2_trans_inconsistent(trans,
|
||||
"disk usage increased %lli more than %llu sectors reserved)",
|
||||
should_not_have_added, disk_res_sectors);
|
||||
}
|
||||
|
||||
int bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
struct replicas_delta_list *deltas)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct replicas_delta *d, *d2;
|
||||
struct replicas_delta *top = (void *) deltas->d + deltas->used;
|
||||
struct bch_fs_usage *dst;
|
||||
unsigned i;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
preempt_disable();
|
||||
dst = fs_usage_ptr(c, trans->journal_res.seq, false);
|
||||
|
||||
for (d = deltas->d; d != top; d = replicas_delta_next(d))
|
||||
if (__update_replicas(c, dst, &d->r, d->delta))
|
||||
goto need_mark;
|
||||
|
||||
dst->b.nr_inodes += deltas->nr_inodes;
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
dst->b.reserved += deltas->persistent_reserved[i];
|
||||
dst->persistent_reserved[i] += deltas->persistent_reserved[i];
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return 0;
|
||||
need_mark:
|
||||
/* revert changes: */
|
||||
@ -1084,7 +1096,7 @@ static int __trigger_reservation(struct btree_trans *trans,
|
||||
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
|
||||
|
||||
replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
|
||||
fs_usage->reserved += sectors;
|
||||
fs_usage->b.reserved += sectors;
|
||||
fs_usage->persistent_reserved[replicas - 1] += sectors;
|
||||
|
||||
preempt_enable();
|
||||
@ -1130,9 +1142,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
iter.pos.inode, iter.pos.offset, a->v.gen,
|
||||
bch2_data_types[a->v.data_type],
|
||||
bch2_data_types[type],
|
||||
bch2_data_types[type]);
|
||||
bch2_data_type_str(a->v.data_type),
|
||||
bch2_data_type_str(type),
|
||||
bch2_data_type_str(type));
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
@ -356,6 +356,8 @@ int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned,
|
||||
ret; \
|
||||
})
|
||||
|
||||
void bch2_trans_account_disk_usage_change(struct btree_trans *);
|
||||
|
||||
void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
|
||||
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
|
||||
|
||||
@ -385,6 +387,21 @@ static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline const char *bch2_data_type_str(enum bch_data_type type)
|
||||
{
|
||||
return type < BCH_DATA_NR
|
||||
? __bch2_data_types[type]
|
||||
: "(invalid data type)";
|
||||
}
|
||||
|
||||
static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
|
||||
{
|
||||
if (type < BCH_DATA_NR)
|
||||
prt_str(out, __bch2_data_types[type]);
|
||||
else
|
||||
prt_printf(out, "(invalid data type %u)", type);
|
||||
}
|
||||
|
||||
/* disk reservations: */
|
||||
|
||||
static inline void bch2_disk_reservation_put(struct bch_fs *c,
|
||||
|
@ -45,23 +45,18 @@ struct bch_dev_usage {
|
||||
} d[BCH_DATA_NR];
|
||||
};
|
||||
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
struct bch_fs_usage_base {
|
||||
u64 hidden;
|
||||
u64 btree;
|
||||
u64 data;
|
||||
u64 cached;
|
||||
u64 reserved;
|
||||
u64 nr_inodes;
|
||||
};
|
||||
|
||||
/* XXX: add stats for compression ratio */
|
||||
#if 0
|
||||
u64 uncompressed;
|
||||
u64 compressed;
|
||||
#endif
|
||||
|
||||
/* broken out: */
|
||||
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
struct bch_fs_usage_base b;
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
u64 replicas[];
|
||||
};
|
||||
|
@ -109,7 +109,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
|
||||
if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
|
||||
mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
|
||||
|
||||
while (1) {
|
||||
do {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (kthread && kthread_should_stop())
|
||||
break;
|
||||
@ -119,7 +119,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
|
||||
|
||||
schedule();
|
||||
try_to_freeze();
|
||||
}
|
||||
} while (0);
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
del_timer_sync(&wait.cpu_timer);
|
||||
|
@ -47,6 +47,14 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
|
||||
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
|
||||
}
|
||||
|
||||
static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
|
||||
{
|
||||
if (type < BCH_COMPRESSION_TYPE_NR)
|
||||
prt_str(out, __bch2_compression_types[type]);
|
||||
else
|
||||
prt_printf(out, "(invalid compression type %u)", type);
|
||||
}
|
||||
|
||||
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
|
||||
struct bch_extent_crc_unpacked *);
|
||||
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
|
||||
|
@ -285,9 +285,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
k.k->p, bkey_start_pos(&insert->k)) ?:
|
||||
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, insert->k.p) ?:
|
||||
bch2_bkey_set_needs_rebalance(c, insert,
|
||||
op->opts.background_target,
|
||||
op->opts.background_compression) ?:
|
||||
bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?:
|
||||
bch2_trans_update(trans, &iter, insert,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, &op->res,
|
||||
@ -529,7 +527,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
BCH_WRITE_DATA_ENCODED|
|
||||
BCH_WRITE_MOVE|
|
||||
m->data_opts.write_flags;
|
||||
m->op.compression_opt = io_opts.background_compression ?: io_opts.compression;
|
||||
m->op.compression_opt = background_compression(io_opts);
|
||||
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
|
@ -44,19 +44,19 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
|
||||
return false;
|
||||
|
||||
bio = bio_alloc_bioset(ca->disk_sb.bdev,
|
||||
buf_pages(n_sorted, btree_bytes(c)),
|
||||
buf_pages(n_sorted, btree_buf_bytes(b)),
|
||||
REQ_OP_READ|REQ_META,
|
||||
GFP_NOFS,
|
||||
&c->btree_bio);
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
bch2_bio_map(bio, n_sorted, btree_bytes(c));
|
||||
bch2_bio_map(bio, n_sorted, btree_buf_bytes(b));
|
||||
|
||||
submit_bio_wait(bio);
|
||||
|
||||
bio_put(bio);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
|
||||
memcpy(n_ondisk, n_sorted, btree_bytes(c));
|
||||
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
|
||||
|
||||
v->written = 0;
|
||||
if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
|
||||
@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
mutex_lock(&c->verify_lock);
|
||||
|
||||
if (!c->verify_ondisk) {
|
||||
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
|
||||
c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
|
||||
if (!c->verify_ondisk)
|
||||
goto out;
|
||||
}
|
||||
@ -199,19 +199,19 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
return;
|
||||
}
|
||||
|
||||
n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
|
||||
n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
|
||||
if (!n_ondisk) {
|
||||
prt_printf(out, "memory allocation failure\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio = bio_alloc_bioset(ca->disk_sb.bdev,
|
||||
buf_pages(n_ondisk, btree_bytes(c)),
|
||||
buf_pages(n_ondisk, btree_buf_bytes(b)),
|
||||
REQ_OP_READ|REQ_META,
|
||||
GFP_NOFS,
|
||||
&c->btree_bio);
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
bch2_bio_map(bio, n_ondisk, btree_bytes(c));
|
||||
bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b));
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
if (ret) {
|
||||
@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
out:
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
kvpfree(n_ondisk, btree_bytes(c));
|
||||
kvpfree(n_ondisk, btree_buf_bytes(b));
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
|
42
fs/bcachefs/dirent_format.h
Normal file
42
fs/bcachefs/dirent_format.h
Normal file
@ -0,0 +1,42 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_DIRENT_FORMAT_H
|
||||
#define _BCACHEFS_DIRENT_FORMAT_H
|
||||
|
||||
/*
|
||||
* Dirents (and xattrs) have to implement string lookups; since our b-tree
|
||||
* doesn't support arbitrary length strings for the key, we instead index by a
|
||||
* 64 bit hash (currently truncated sha1) of the string, stored in the offset
|
||||
* field of the key - using linear probing to resolve hash collisions. This also
|
||||
* provides us with the readdir cookie posix requires.
|
||||
*
|
||||
* Linear probing requires us to use whiteouts for deletions, in the event of a
|
||||
* collision:
|
||||
*/
|
||||
|
||||
struct bch_dirent {
|
||||
struct bch_val v;
|
||||
|
||||
/* Target inode number: */
|
||||
union {
|
||||
__le64 d_inum;
|
||||
struct { /* DT_SUBVOL */
|
||||
__le32 d_child_subvol;
|
||||
__le32 d_parent_subvol;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* Copy of mode bits 12-15 from the target inode - so userspace can get
|
||||
* the filetype without having to do a stat()
|
||||
*/
|
||||
__u8 d_type;
|
||||
|
||||
__u8 d_name[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define DT_SUBVOL 16
|
||||
#define BCH_DT_MAX 17
|
||||
|
||||
#define BCH_NAME_MAX 512
|
||||
|
||||
#endif /* _BCACHEFS_DIRENT_FORMAT_H */
|
@ -190,7 +190,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
|
||||
a->v.stripe_redundancy, trans,
|
||||
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
|
||||
iter.pos.inode, iter.pos.offset, a->v.gen,
|
||||
bch2_data_types[a->v.data_type],
|
||||
bch2_data_type_str(a->v.data_type),
|
||||
a->v.dirty_sectors,
|
||||
a->v.stripe, s.k->p.offset)) {
|
||||
ret = -EIO;
|
||||
@ -200,7 +200,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
|
||||
if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
|
||||
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
|
||||
iter.pos.inode, iter.pos.offset, a->v.gen,
|
||||
bch2_data_types[a->v.data_type],
|
||||
bch2_data_type_str(a->v.data_type),
|
||||
a->v.dirty_sectors,
|
||||
s.k->p.offset)) {
|
||||
ret = -EIO;
|
||||
@ -367,7 +367,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) {
|
||||
if (flags & BTREE_TRIGGER_ATOMIC) {
|
||||
struct stripe *m = genradix_ptr(&c->stripes, idx);
|
||||
|
||||
if (!m) {
|
||||
|
19
fs/bcachefs/ec_format.h
Normal file
19
fs/bcachefs/ec_format.h
Normal file
@ -0,0 +1,19 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_EC_FORMAT_H
|
||||
#define _BCACHEFS_EC_FORMAT_H
|
||||
|
||||
struct bch_stripe {
|
||||
struct bch_val v;
|
||||
__le16 sectors;
|
||||
__u8 algorithm;
|
||||
__u8 nr_blocks;
|
||||
__u8 nr_redundant;
|
||||
|
||||
__u8 csum_granularity_bits;
|
||||
__u8 csum_type;
|
||||
__u8 pad;
|
||||
|
||||
struct bch_extent_ptr ptrs[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#endif /* _BCACHEFS_EC_FORMAT_H */
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_iter.h"
|
||||
@ -1018,12 +1019,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_extent_crc_unpacked crc =
|
||||
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
|
||||
|
||||
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
|
||||
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
|
||||
crc.compressed_size,
|
||||
crc.uncompressed_size,
|
||||
crc.offset, crc.nonce,
|
||||
bch2_csum_types[crc.csum_type],
|
||||
bch2_compression_types[crc.compression_type]);
|
||||
bch2_csum_types[crc.csum_type]);
|
||||
bch2_prt_compression_type(out, crc.compression_type);
|
||||
break;
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr: {
|
||||
@ -1334,10 +1335,12 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
|
||||
int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
|
||||
unsigned target, unsigned compression)
|
||||
struct bch_io_opts *opts)
|
||||
{
|
||||
struct bkey_s k = bkey_i_to_s(_k);
|
||||
struct bch_extent_rebalance *r;
|
||||
unsigned target = opts->background_target;
|
||||
unsigned compression = background_compression(*opts);
|
||||
bool needs_rebalance;
|
||||
|
||||
if (!bkey_extent_is_direct_data(k.k))
|
||||
|
@ -708,7 +708,7 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
|
||||
bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
|
||||
unsigned, unsigned);
|
||||
struct bch_io_opts *);
|
||||
|
||||
/* Generic extent code: */
|
||||
|
||||
|
295
fs/bcachefs/extents_format.h
Normal file
295
fs/bcachefs/extents_format.h
Normal file
@ -0,0 +1,295 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_EXTENTS_FORMAT_H
|
||||
#define _BCACHEFS_EXTENTS_FORMAT_H
|
||||
|
||||
/*
|
||||
* In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
|
||||
* preceded by checksum/compression information (bch_extent_crc32 or
|
||||
* bch_extent_crc64).
|
||||
*
|
||||
* One major determining factor in the format of extents is how we handle and
|
||||
* represent extents that have been partially overwritten and thus trimmed:
|
||||
*
|
||||
* If an extent is not checksummed or compressed, when the extent is trimmed we
|
||||
* don't have to remember the extent we originally allocated and wrote: we can
|
||||
* merely adjust ptr->offset to point to the start of the data that is currently
|
||||
* live. The size field in struct bkey records the current (live) size of the
|
||||
* extent, and is also used to mean "size of region on disk that we point to" in
|
||||
* this case.
|
||||
*
|
||||
* Thus an extent that is not checksummed or compressed will consist only of a
|
||||
* list of bch_extent_ptrs, with none of the fields in
|
||||
* bch_extent_crc32/bch_extent_crc64.
|
||||
*
|
||||
* When an extent is checksummed or compressed, it's not possible to read only
|
||||
* the data that is currently live: we have to read the entire extent that was
|
||||
* originally written, and then return only the part of the extent that is
|
||||
* currently live.
|
||||
*
|
||||
* Thus, in addition to the current size of the extent in struct bkey, we need
|
||||
* to store the size of the originally allocated space - this is the
|
||||
* compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
|
||||
* when the extent is trimmed, instead of modifying the offset field of the
|
||||
* pointer, we keep a second smaller offset field - "offset into the original
|
||||
* extent of the currently live region".
|
||||
*
|
||||
* The other major determining factor is replication and data migration:
|
||||
*
|
||||
* Each pointer may have its own bch_extent_crc32/64. When doing a replicated
|
||||
* write, we will initially write all the replicas in the same format, with the
|
||||
* same checksum type and compression format - however, when copygc runs later (or
|
||||
* tiering/cache promotion, anything that moves data), it is not in general
|
||||
* going to rewrite all the pointers at once - one of the replicas may be in a
|
||||
* bucket on one device that has very little fragmentation while another lives
|
||||
* in a bucket that has become heavily fragmented, and thus is being rewritten
|
||||
* sooner than the rest.
|
||||
*
|
||||
* Thus it will only move a subset of the pointers (or in the case of
|
||||
* tiering/cache promotion perhaps add a single pointer without dropping any
|
||||
* current pointers), and if the extent has been partially overwritten it must
|
||||
* write only the currently live portion (or copygc would not be able to reduce
|
||||
* fragmentation!) - which necessitates a different bch_extent_crc format for
|
||||
* the new pointer.
|
||||
*
|
||||
* But in the interests of space efficiency, we don't want to store one
|
||||
* bch_extent_crc for each pointer if we don't have to.
|
||||
*
|
||||
* Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
|
||||
* bch_extent_ptrs appended arbitrarily one after the other. We determine the
|
||||
* type of a given entry with a scheme similar to utf8 (except we're encoding a
|
||||
* type, not a size), encoding the type in the position of the first set bit:
|
||||
*
|
||||
* bch_extent_crc32 - 0b1
|
||||
* bch_extent_ptr - 0b10
|
||||
* bch_extent_crc64 - 0b100
|
||||
*
|
||||
* We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
|
||||
* bch_extent_crc64 is the least constrained).
|
||||
*
|
||||
* Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
|
||||
* until the next bch_extent_crc32/64.
|
||||
*
|
||||
* If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
|
||||
* is neither checksummed nor compressed.
|
||||
*/
|
||||
|
||||
#define BCH_EXTENT_ENTRY_TYPES() \
|
||||
x(ptr, 0) \
|
||||
x(crc32, 1) \
|
||||
x(crc64, 2) \
|
||||
x(crc128, 3) \
|
||||
x(stripe_ptr, 4) \
|
||||
x(rebalance, 5)
|
||||
#define BCH_EXTENT_ENTRY_MAX 6
|
||||
|
||||
enum bch_extent_entry_type {
|
||||
#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
|
||||
BCH_EXTENT_ENTRY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
/* Compressed/uncompressed size are stored biased by 1: */
|
||||
struct bch_extent_crc32 {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u32 type:2,
|
||||
_compressed_size:7,
|
||||
_uncompressed_size:7,
|
||||
offset:7,
|
||||
_unused:1,
|
||||
csum_type:4,
|
||||
compression_type:4;
|
||||
__u32 csum;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u32 csum;
|
||||
__u32 compression_type:4,
|
||||
csum_type:4,
|
||||
_unused:1,
|
||||
offset:7,
|
||||
_uncompressed_size:7,
|
||||
_compressed_size:7,
|
||||
type:2;
|
||||
#endif
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CRC32_SIZE_MAX (1U << 7)
|
||||
#define CRC32_NONCE_MAX 0
|
||||
|
||||
struct bch_extent_crc64 {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:3,
|
||||
_compressed_size:9,
|
||||
_uncompressed_size:9,
|
||||
offset:9,
|
||||
nonce:10,
|
||||
csum_type:4,
|
||||
compression_type:4,
|
||||
csum_hi:16;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 csum_hi:16,
|
||||
compression_type:4,
|
||||
csum_type:4,
|
||||
nonce:10,
|
||||
offset:9,
|
||||
_uncompressed_size:9,
|
||||
_compressed_size:9,
|
||||
type:3;
|
||||
#endif
|
||||
__u64 csum_lo;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CRC64_SIZE_MAX (1U << 9)
|
||||
#define CRC64_NONCE_MAX ((1U << 10) - 1)
|
||||
|
||||
struct bch_extent_crc128 {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:4,
|
||||
_compressed_size:13,
|
||||
_uncompressed_size:13,
|
||||
offset:13,
|
||||
nonce:13,
|
||||
csum_type:4,
|
||||
compression_type:4;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 compression_type:4,
|
||||
csum_type:4,
|
||||
nonce:13,
|
||||
offset:13,
|
||||
_uncompressed_size:13,
|
||||
_compressed_size:13,
|
||||
type:4;
|
||||
#endif
|
||||
struct bch_csum csum;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CRC128_SIZE_MAX (1U << 13)
|
||||
#define CRC128_NONCE_MAX ((1U << 13) - 1)
|
||||
|
||||
/*
|
||||
* @reservation - pointer hasn't been written to, just reserved
|
||||
*/
|
||||
struct bch_extent_ptr {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:1,
|
||||
cached:1,
|
||||
unused:1,
|
||||
unwritten:1,
|
||||
offset:44, /* 8 petabytes */
|
||||
dev:8,
|
||||
gen:8;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 gen:8,
|
||||
dev:8,
|
||||
offset:44,
|
||||
unwritten:1,
|
||||
unused:1,
|
||||
cached:1,
|
||||
type:1;
|
||||
#endif
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_extent_stripe_ptr {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:5,
|
||||
block:8,
|
||||
redundancy:4,
|
||||
idx:47;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 idx:47,
|
||||
redundancy:4,
|
||||
block:8,
|
||||
type:5;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct bch_extent_rebalance {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:6,
|
||||
unused:34,
|
||||
compression:8, /* enum bch_compression_opt */
|
||||
target:16;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 target:16,
|
||||
compression:8,
|
||||
unused:34,
|
||||
type:6;
|
||||
#endif
|
||||
};
|
||||
|
||||
union bch_extent_entry {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
|
||||
unsigned long type;
|
||||
#elif __BITS_PER_LONG == 32
|
||||
struct {
|
||||
unsigned long pad;
|
||||
unsigned long type;
|
||||
};
|
||||
#else
|
||||
#error edit for your odd byteorder.
|
||||
#endif
|
||||
|
||||
#define x(f, n) struct bch_extent_##f f;
|
||||
BCH_EXTENT_ENTRY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_btree_ptr {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 _data[0];
|
||||
struct bch_extent_ptr start[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_btree_ptr_v2 {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 mem_ptr;
|
||||
__le64 seq;
|
||||
__le16 sectors_written;
|
||||
__le16 flags;
|
||||
struct bpos min_key;
|
||||
__u64 _data[0];
|
||||
struct bch_extent_ptr start[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
|
||||
|
||||
struct bch_extent {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 _data[0];
|
||||
union bch_extent_entry start[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Maximum size (in u64s) a single pointer could be: */
|
||||
#define BKEY_EXTENT_PTR_U64s_MAX\
|
||||
((sizeof(struct bch_extent_crc128) + \
|
||||
sizeof(struct bch_extent_ptr)) / sizeof(__u64))
|
||||
|
||||
/* Maximum possible size of an entire extent value: */
|
||||
#define BKEY_EXTENT_VAL_U64s_MAX \
|
||||
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
|
||||
|
||||
/* * Maximum possible size of an entire extent, key + value: */
|
||||
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
|
||||
|
||||
/* Btree pointers don't carry around checksums: */
|
||||
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
|
||||
((sizeof(struct bch_btree_ptr_v2) + \
|
||||
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
|
||||
#define BKEY_BTREE_PTR_U64s_MAX \
|
||||
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
|
||||
|
||||
struct bch_reservation {
|
||||
struct bch_val v;
|
||||
|
||||
__le32 generation;
|
||||
__u8 nr_replicas;
|
||||
__u8 pad[3];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_inline_data {
|
||||
struct bch_val v;
|
||||
u8 data[];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_EXTENTS_FORMAT_H */
|
@ -156,7 +156,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
|
||||
}
|
||||
|
||||
#define eytzinger1_for_each(_i, _size) \
|
||||
for ((_i) = eytzinger1_first((_size)); \
|
||||
for (unsigned (_i) = eytzinger1_first((_size)); \
|
||||
(_i) != 0; \
|
||||
(_i) = eytzinger1_next((_i), (_size)))
|
||||
|
||||
@ -227,7 +227,7 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
}
|
||||
|
||||
#define eytzinger0_for_each(_i, _size) \
|
||||
for ((_i) = eytzinger0_first((_size)); \
|
||||
for (unsigned (_i) = eytzinger0_first((_size)); \
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_next((_i), (_size)))
|
||||
|
||||
|
@ -77,6 +77,10 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
||||
|
||||
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
|
||||
|
||||
/* bios must be 512 byte aligned: */
|
||||
if ((offset|iter->count) & (SECTOR_SIZE - 1))
|
||||
return -EINVAL;
|
||||
|
||||
ret = min_t(loff_t, iter->count,
|
||||
max_t(loff_t, 0, i_size_read(&inode->v) - offset));
|
||||
|
||||
|
@ -309,39 +309,49 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
|
||||
u64 start, u64 end)
|
||||
int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
|
||||
u64 *start, u64 end,
|
||||
bool nonblocking)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
pgoff_t index = start >> PAGE_SECTORS_SHIFT;
|
||||
pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
|
||||
pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
|
||||
struct folio_batch fbatch;
|
||||
s64 i_sectors_delta = 0;
|
||||
unsigned i, j;
|
||||
int ret = 0;
|
||||
|
||||
if (end <= start)
|
||||
return;
|
||||
if (end <= *start)
|
||||
return 0;
|
||||
|
||||
folio_batch_init(&fbatch);
|
||||
|
||||
while (filemap_get_folios(inode->v.i_mapping,
|
||||
&index, end_index, &fbatch)) {
|
||||
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
||||
for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
|
||||
struct folio *folio = fbatch.folios[i];
|
||||
|
||||
if (!nonblocking)
|
||||
folio_lock(folio);
|
||||
else if (!folio_trylock(folio)) {
|
||||
folio_batch_release(&fbatch);
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
u64 folio_start = folio_sector(folio);
|
||||
u64 folio_end = folio_end_sector(folio);
|
||||
unsigned folio_offset = max(start, folio_start) - folio_start;
|
||||
unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
|
||||
struct bch_folio *s;
|
||||
|
||||
BUG_ON(end <= folio_start);
|
||||
|
||||
folio_lock(folio);
|
||||
s = bch2_folio(folio);
|
||||
*start = min(end, folio_end);
|
||||
|
||||
struct bch_folio *s = bch2_folio(folio);
|
||||
if (s) {
|
||||
unsigned folio_offset = max(*start, folio_start) - folio_start;
|
||||
unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
|
||||
|
||||
spin_lock(&s->lock);
|
||||
for (j = folio_offset; j < folio_offset + folio_len; j++) {
|
||||
for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
|
||||
i_sectors_delta -= s->s[j].state == SECTOR_dirty;
|
||||
bch2_folio_sector_set(folio, s, j,
|
||||
folio_sector_reserve(s->s[j].state));
|
||||
@ -356,6 +366,7 @@ void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
|
||||
}
|
||||
|
||||
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
|
||||
|
@ -143,7 +143,7 @@ int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned);
|
||||
void bch2_bio_page_state_set(struct bio *, struct bkey_s_c);
|
||||
|
||||
void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64);
|
||||
void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64);
|
||||
int bch2_mark_pagecache_reserved(struct bch_inode_info *, u64 *, u64, bool);
|
||||
|
||||
int bch2_get_folio_disk_reservation(struct bch_fs *,
|
||||
struct bch_inode_info *,
|
||||
|
@ -675,8 +675,11 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
||||
|
||||
bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta);
|
||||
|
||||
drop_locks_do(trans,
|
||||
(bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
|
||||
if (bch2_mark_pagecache_reserved(inode, &hole_start,
|
||||
iter.pos.offset, true))
|
||||
drop_locks_do(trans,
|
||||
bch2_mark_pagecache_reserved(inode, &hole_start,
|
||||
iter.pos.offset, false));
|
||||
bkey_err:
|
||||
bch2_quota_reservation_put(c, inode, "a_res);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
|
@ -337,11 +337,12 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
|
||||
if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
|
||||
create_flags |= BCH_CREATE_SNAPSHOT_RO;
|
||||
|
||||
/* why do we need this lock? */
|
||||
down_read(&c->vfs_sb->s_umount);
|
||||
|
||||
if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
|
||||
if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) {
|
||||
/* sync_inodes_sb enforce s_umount is locked */
|
||||
down_read(&c->vfs_sb->s_umount);
|
||||
sync_inodes_sb(c->vfs_sb);
|
||||
up_read(&c->vfs_sb->s_umount);
|
||||
}
|
||||
retry:
|
||||
if (arg.src_ptr) {
|
||||
error = user_path_at(arg.dirfd,
|
||||
@ -425,8 +426,6 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
|
||||
goto retry;
|
||||
}
|
||||
err1:
|
||||
up_read(&c->vfs_sb->s_umount);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -506,22 +506,33 @@ int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
prt_printf(out, "mode=%o ", inode->bi_mode);
|
||||
printbuf_indent_add(out, 2);
|
||||
prt_printf(out, "mode=%o", inode->bi_mode);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "flags=");
|
||||
prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
|
||||
prt_printf(out, " (%x)", inode->bi_flags);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
|
||||
inode->bi_journal_seq,
|
||||
inode->bi_size,
|
||||
inode->bi_sectors,
|
||||
inode->bi_version);
|
||||
prt_printf(out, "journal_seq=%llu", inode->bi_journal_seq);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "bi_size=%llu", inode->bi_size);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
|
||||
prt_newline(out);
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "bi_version=%llu", inode->bi_version);
|
||||
|
||||
#define x(_name, _bits) \
|
||||
prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
|
||||
prt_printf(out, #_name "=%llu", (u64) inode->_name); \
|
||||
prt_newline(out);
|
||||
BCH_INODE_FIELDS_v3()
|
||||
#undef x
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
|
||||
@ -587,7 +598,7 @@ int bch2_trigger_inode(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
|
||||
if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) {
|
||||
BUG_ON(!trans->journal_res.seq);
|
||||
|
||||
bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
|
||||
@ -597,7 +608,7 @@ int bch2_trigger_inode(struct btree_trans *trans,
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
this_cpu_add(c->usage_gc->nr_inodes, nr);
|
||||
this_cpu_add(c->usage_gc->b.nr_inodes, nr);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
|
166
fs/bcachefs/inode_format.h
Normal file
166
fs/bcachefs/inode_format.h
Normal file
@ -0,0 +1,166 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_INODE_FORMAT_H
|
||||
#define _BCACHEFS_INODE_FORMAT_H
|
||||
|
||||
#define BLOCKDEV_INODE_MAX 4096
|
||||
#define BCACHEFS_ROOT_INO 4096
|
||||
|
||||
struct bch_inode {
|
||||
struct bch_val v;
|
||||
|
||||
__le64 bi_hash_seed;
|
||||
__le32 bi_flags;
|
||||
__le16 bi_mode;
|
||||
__u8 fields[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_inode_v2 {
|
||||
struct bch_val v;
|
||||
|
||||
__le64 bi_journal_seq;
|
||||
__le64 bi_hash_seed;
|
||||
__le64 bi_flags;
|
||||
__le16 bi_mode;
|
||||
__u8 fields[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_inode_v3 {
|
||||
struct bch_val v;
|
||||
|
||||
__le64 bi_journal_seq;
|
||||
__le64 bi_hash_seed;
|
||||
__le64 bi_flags;
|
||||
__le64 bi_sectors;
|
||||
__le64 bi_size;
|
||||
__le64 bi_version;
|
||||
__u8 fields[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define INODEv3_FIELDS_START_INITIAL 6
|
||||
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
|
||||
|
||||
struct bch_inode_generation {
|
||||
struct bch_val v;
|
||||
|
||||
__le32 bi_generation;
|
||||
__le32 pad;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/*
|
||||
* bi_subvol and bi_parent_subvol are only set for subvolume roots:
|
||||
*/
|
||||
|
||||
#define BCH_INODE_FIELDS_v2() \
|
||||
x(bi_atime, 96) \
|
||||
x(bi_ctime, 96) \
|
||||
x(bi_mtime, 96) \
|
||||
x(bi_otime, 96) \
|
||||
x(bi_size, 64) \
|
||||
x(bi_sectors, 64) \
|
||||
x(bi_uid, 32) \
|
||||
x(bi_gid, 32) \
|
||||
x(bi_nlink, 32) \
|
||||
x(bi_generation, 32) \
|
||||
x(bi_dev, 32) \
|
||||
x(bi_data_checksum, 8) \
|
||||
x(bi_compression, 8) \
|
||||
x(bi_project, 32) \
|
||||
x(bi_background_compression, 8) \
|
||||
x(bi_data_replicas, 8) \
|
||||
x(bi_promote_target, 16) \
|
||||
x(bi_foreground_target, 16) \
|
||||
x(bi_background_target, 16) \
|
||||
x(bi_erasure_code, 16) \
|
||||
x(bi_fields_set, 16) \
|
||||
x(bi_dir, 64) \
|
||||
x(bi_dir_offset, 64) \
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32)
|
||||
|
||||
#define BCH_INODE_FIELDS_v3() \
|
||||
x(bi_atime, 96) \
|
||||
x(bi_ctime, 96) \
|
||||
x(bi_mtime, 96) \
|
||||
x(bi_otime, 96) \
|
||||
x(bi_uid, 32) \
|
||||
x(bi_gid, 32) \
|
||||
x(bi_nlink, 32) \
|
||||
x(bi_generation, 32) \
|
||||
x(bi_dev, 32) \
|
||||
x(bi_data_checksum, 8) \
|
||||
x(bi_compression, 8) \
|
||||
x(bi_project, 32) \
|
||||
x(bi_background_compression, 8) \
|
||||
x(bi_data_replicas, 8) \
|
||||
x(bi_promote_target, 16) \
|
||||
x(bi_foreground_target, 16) \
|
||||
x(bi_background_target, 16) \
|
||||
x(bi_erasure_code, 16) \
|
||||
x(bi_fields_set, 16) \
|
||||
x(bi_dir, 64) \
|
||||
x(bi_dir_offset, 64) \
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32) \
|
||||
x(bi_nocow, 8)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
x(data_checksum, 8) \
|
||||
x(compression, 8) \
|
||||
x(project, 32) \
|
||||
x(background_compression, 8) \
|
||||
x(data_replicas, 8) \
|
||||
x(promote_target, 16) \
|
||||
x(foreground_target, 16) \
|
||||
x(background_target, 16) \
|
||||
x(erasure_code, 16) \
|
||||
x(nocow, 8)
|
||||
|
||||
enum inode_opt_id {
|
||||
#define x(name, ...) \
|
||||
Inode_opt_##name,
|
||||
BCH_INODE_OPTS()
|
||||
#undef x
|
||||
Inode_opt_nr,
|
||||
};
|
||||
|
||||
#define BCH_INODE_FLAGS() \
|
||||
x(sync, 0) \
|
||||
x(immutable, 1) \
|
||||
x(append, 2) \
|
||||
x(nodump, 3) \
|
||||
x(noatime, 4) \
|
||||
x(i_size_dirty, 5) \
|
||||
x(i_sectors_dirty, 6) \
|
||||
x(unlinked, 7) \
|
||||
x(backptr_untrusted, 8)
|
||||
|
||||
/* bits 20+ reserved for packed fields below: */
|
||||
|
||||
enum bch_inode_flags {
|
||||
#define x(t, n) BCH_INODE_##t = 1U << n,
|
||||
BCH_INODE_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
enum __bch_inode_flags {
|
||||
#define x(t, n) __BCH_INODE_##t = n,
|
||||
BCH_INODE_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
|
||||
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
|
||||
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
|
||||
|
||||
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
|
||||
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
|
||||
|
||||
LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
|
||||
LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
|
||||
|
||||
LE64_BITMASK(INODEv3_FIELDS_START,
|
||||
struct bch_inode_v3, bi_flags, 31, 36);
|
||||
LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
|
||||
|
||||
#endif /* _BCACHEFS_INODE_FORMAT_H */
|
@ -442,9 +442,7 @@ case LOGGED_OP_FINSERT_shift_extents:
|
||||
|
||||
op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
|
||||
|
||||
ret = bch2_bkey_set_needs_rebalance(c, copy,
|
||||
opts.background_target,
|
||||
opts.background_compression) ?:
|
||||
ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?:
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
|
||||
bch2_logged_op_update(trans, &op->k_i) ?:
|
||||
|
@ -362,9 +362,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
||||
bkey_start_pos(&sk.k->k),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_bkey_set_needs_rebalance(c, sk.k,
|
||||
op->opts.background_target,
|
||||
op->opts.background_compression) ?:
|
||||
ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?:
|
||||
bch2_extent_update(trans, inum, &iter, sk.k,
|
||||
&op->res,
|
||||
op->new_i_size, &op->i_sectors_delta,
|
||||
@ -1447,10 +1445,11 @@ static void __bch2_write(struct bch_write_op *op)
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
if (ret < 0) {
|
||||
bch_err_inum_offset_ratelimited(c,
|
||||
op->pos.inode,
|
||||
op->pos.offset << 9,
|
||||
"%s(): error: %s", __func__, bch2_err_str(ret));
|
||||
if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT))
|
||||
bch_err_inum_offset_ratelimited(c,
|
||||
op->pos.inode,
|
||||
op->pos.offset << 9,
|
||||
"%s(): error: %s", __func__, bch2_err_str(ret));
|
||||
op->error = ret;
|
||||
break;
|
||||
}
|
||||
|
@ -27,6 +27,47 @@ static const char * const bch2_journal_errors[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq)
|
||||
{
|
||||
union journal_res_state s = READ_ONCE(j->reservations);
|
||||
unsigned i = seq & JOURNAL_BUF_MASK;
|
||||
struct journal_buf *buf = j->buf + i;
|
||||
|
||||
prt_printf(out, "seq:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", seq);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "refcount:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", journal_state_count(s, i));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "size:");
|
||||
prt_tab(out);
|
||||
prt_human_readable_u64(out, vstruct_bytes(buf->data));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "expires");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%li jiffies", buf->expires - jiffies);
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
|
||||
{
|
||||
if (!out->nr_tabstops)
|
||||
printbuf_tabstop_push(out, 24);
|
||||
|
||||
for (u64 seq = journal_last_unwritten_seq(j);
|
||||
seq <= journal_cur_seq(j);
|
||||
seq++)
|
||||
bch2_journal_buf_to_text(out, j, seq);
|
||||
}
|
||||
|
||||
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
|
||||
{
|
||||
return seq > j->seq_ondisk;
|
||||
@ -156,7 +197,7 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
|
||||
* We don't close a journal_buf until the next journal_buf is finished writing,
|
||||
* and can be opened again - this also initializes the next journal_buf:
|
||||
*/
|
||||
static void __journal_entry_close(struct journal *j, unsigned closed_val)
|
||||
static void __journal_entry_close(struct journal *j, unsigned closed_val, bool trace)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
@ -185,7 +226,17 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
|
||||
/* Close out old buffer: */
|
||||
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
|
||||
trace_journal_entry_close(c, vstruct_bytes(buf->data));
|
||||
if (trace_journal_entry_close_enabled() && trace) {
|
||||
struct printbuf pbuf = PRINTBUF;
|
||||
pbuf.atomic++;
|
||||
|
||||
prt_str(&pbuf, "entry size: ");
|
||||
prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data));
|
||||
prt_newline(&pbuf);
|
||||
bch2_prt_task_backtrace(&pbuf, current, 1);
|
||||
trace_journal_entry_close(c, pbuf.buf);
|
||||
printbuf_exit(&pbuf);
|
||||
}
|
||||
|
||||
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
|
||||
buf->u64s_reserved) << c->block_bits;
|
||||
@ -225,7 +276,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
|
||||
void bch2_journal_halt(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true);
|
||||
if (!j->err_seq)
|
||||
j->err_seq = journal_cur_seq(j);
|
||||
journal_wake(j);
|
||||
@ -239,7 +290,7 @@ static bool journal_entry_want_write(struct journal *j)
|
||||
|
||||
/* Don't close it yet if we already have a write in flight: */
|
||||
if (ret)
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
else if (nr_unwritten_journal_entries(j)) {
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
|
||||
@ -406,7 +457,7 @@ static void journal_write_work(struct work_struct *work)
|
||||
if (delta > 0)
|
||||
mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
|
||||
else
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
unlock:
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
@ -463,13 +514,21 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
|
||||
buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
|
||||
j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
|
||||
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
|
||||
ret = journal_entry_open(j);
|
||||
|
||||
if (ret == JOURNAL_ERR_max_in_flight) {
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||
&j->max_in_flight_start, true);
|
||||
trace_and_count(c, journal_entry_full, c);
|
||||
if (trace_journal_entry_full_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
|
||||
bch2_journal_bufs_to_text(&buf, j);
|
||||
trace_journal_entry_full(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
count_event(c, journal_entry_full);
|
||||
}
|
||||
unlock:
|
||||
can_discard = j->can_discard;
|
||||
@ -549,7 +608,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
|
||||
/*
|
||||
* Not enough room in current journal entry, have to flush it:
|
||||
*/
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
} else {
|
||||
journal_cur_buf(j)->u64s_reserved += d;
|
||||
}
|
||||
@ -606,7 +665,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
||||
struct journal_res res = { 0 };
|
||||
|
||||
if (journal_entry_is_open(j))
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
@ -786,7 +845,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
|
||||
|
||||
if (buf->need_flush_to_write_buffer) {
|
||||
if (seq == journal_cur_seq(j))
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
|
||||
union journal_res_state s;
|
||||
s.v = atomic64_read_acquire(&j->reservations.counter);
|
||||
@ -1339,35 +1398,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
}
|
||||
|
||||
prt_newline(out);
|
||||
|
||||
for (u64 seq = journal_cur_seq(j);
|
||||
seq >= journal_last_unwritten_seq(j);
|
||||
--seq) {
|
||||
unsigned i = seq & JOURNAL_BUF_MASK;
|
||||
|
||||
prt_printf(out, "unwritten entry:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", seq);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "refcount:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", journal_state_count(s, i));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "sectors:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", j->buf[i].sectors);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "expires");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies);
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
prt_printf(out, "unwritten entries:");
|
||||
prt_newline(out);
|
||||
bch2_journal_bufs_to_text(out, j);
|
||||
|
||||
prt_printf(out,
|
||||
"replay done:\t\t%i\n",
|
||||
|
@ -683,10 +683,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
|
||||
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
|
||||
|
||||
for (i = 0; i < nr_types; i++) {
|
||||
if (i < BCH_DATA_NR)
|
||||
prt_printf(out, " %s", bch2_data_types[i]);
|
||||
else
|
||||
prt_printf(out, " (unknown data type %u)", i);
|
||||
bch2_prt_data_type(out, i);
|
||||
prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu",
|
||||
le64_to_cpu(u->d[i].buckets),
|
||||
le64_to_cpu(u->d[i].sectors),
|
||||
|
30
fs/bcachefs/logged_ops_format.h
Normal file
30
fs/bcachefs/logged_ops_format.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H
|
||||
#define _BCACHEFS_LOGGED_OPS_FORMAT_H
|
||||
|
||||
struct bch_logged_op_truncate {
|
||||
struct bch_val v;
|
||||
__le32 subvol;
|
||||
__le32 pad;
|
||||
__le64 inum;
|
||||
__le64 new_i_size;
|
||||
};
|
||||
|
||||
enum logged_op_finsert_state {
|
||||
LOGGED_OP_FINSERT_start,
|
||||
LOGGED_OP_FINSERT_shift_extents,
|
||||
LOGGED_OP_FINSERT_finish,
|
||||
};
|
||||
|
||||
struct bch_logged_op_finsert {
|
||||
struct bch_val v;
|
||||
__u8 state;
|
||||
__u8 pad[3];
|
||||
__le32 subvol;
|
||||
__le64 inum;
|
||||
__le64 dst_offset;
|
||||
__le64 src_offset;
|
||||
__le64 pos;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_LOGGED_OPS_FORMAT_H */
|
@ -6,9 +6,11 @@
|
||||
#include "backpointers.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "compress.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
#include "errcode.h"
|
||||
@ -34,12 +36,46 @@ const char * const bch2_data_ops_strs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
|
||||
static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
printbuf_tabstop_push(out, 20);
|
||||
prt_str(out, "rewrite ptrs:");
|
||||
prt_tab(out);
|
||||
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "kill ptrs: ");
|
||||
prt_tab(out);
|
||||
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "target: ");
|
||||
prt_tab(out);
|
||||
bch2_target_to_text(out, c, data_opts->target);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "compression: ");
|
||||
prt_tab(out);
|
||||
bch2_compression_opt_to_text(out, background_compression(*io_opts));
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "extra replicas: ");
|
||||
prt_tab(out);
|
||||
prt_u64(out, data_opts->extra_replicas);
|
||||
}
|
||||
|
||||
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
if (trace_move_extent_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_newline(&buf);
|
||||
bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
|
||||
trace_move_extent(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@ -111,6 +147,15 @@ static void move_write(struct moving_io *io)
|
||||
return;
|
||||
}
|
||||
|
||||
if (trace_move_extent_write_enabled()) {
|
||||
struct bch_fs *c = io->write.op.c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
|
||||
trace_move_extent_write(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
closure_get(&io->write.ctxt->cl);
|
||||
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
atomic_inc(&io->write.ctxt->write_ios);
|
||||
@ -241,9 +286,10 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
unsigned sectors = k.k->size, pages;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
trace_move_extent2(c, k, &io_opts, &data_opts);
|
||||
|
||||
if (ctxt->stats)
|
||||
ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
|
||||
trace_move_extent2(c, k);
|
||||
|
||||
bch2_data_update_opts_normalize(k, &data_opts);
|
||||
|
||||
@ -759,6 +805,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
if (!b)
|
||||
goto next;
|
||||
|
||||
unsigned sectors = btree_ptr_sectors_written(&b->key);
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
@ -768,11 +816,10 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
goto err;
|
||||
|
||||
if (ctxt->rate)
|
||||
bch2_ratelimit_increment(ctxt->rate,
|
||||
c->opts.btree_node_size >> 9);
|
||||
bch2_ratelimit_increment(ctxt->rate, sectors);
|
||||
if (ctxt->stats) {
|
||||
atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen);
|
||||
atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved);
|
||||
atomic64_add(sectors, &ctxt->stats->sectors_seen);
|
||||
atomic64_add(sectors, &ctxt->stats->sectors_moved);
|
||||
}
|
||||
}
|
||||
next:
|
||||
@ -1083,9 +1130,9 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
|
||||
{
|
||||
prt_printf(out, "%s: data type=%s pos=",
|
||||
stats->name,
|
||||
bch2_data_types[stats->data_type]);
|
||||
prt_printf(out, "%s: data type==", stats->name);
|
||||
bch2_prt_data_type(out, stats->data_type);
|
||||
prt_str(out, " pos=");
|
||||
bch2_bbpos_to_text(out, stats->pos);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
@ -52,7 +52,7 @@ const char * const bch2_csum_opts[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
const char * const bch2_compression_types[] = {
|
||||
const char * const __bch2_compression_types[] = {
|
||||
BCH_COMPRESSION_TYPES()
|
||||
NULL
|
||||
};
|
||||
@ -72,7 +72,7 @@ const char * const bch2_str_hash_opts[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
const char * const bch2_data_types[] = {
|
||||
const char * const __bch2_data_types[] = {
|
||||
BCH_DATA_TYPES()
|
||||
NULL
|
||||
};
|
||||
|
@ -18,11 +18,11 @@ extern const char * const bch2_sb_compat[];
|
||||
extern const char * const __bch2_btree_ids[];
|
||||
extern const char * const bch2_csum_types[];
|
||||
extern const char * const bch2_csum_opts[];
|
||||
extern const char * const bch2_compression_types[];
|
||||
extern const char * const __bch2_compression_types[];
|
||||
extern const char * const bch2_compression_opts[];
|
||||
extern const char * const bch2_str_hash_types[];
|
||||
extern const char * const bch2_str_hash_opts[];
|
||||
extern const char * const bch2_data_types[];
|
||||
extern const char * const __bch2_data_types[];
|
||||
extern const char * const bch2_member_states[];
|
||||
extern const char * const bch2_jset_entry_types[];
|
||||
extern const char * const bch2_fs_usage_types[];
|
||||
@ -564,6 +564,11 @@ struct bch_io_opts {
|
||||
#undef x
|
||||
};
|
||||
|
||||
static inline unsigned background_compression(struct bch_io_opts opts)
|
||||
{
|
||||
return opts.background_compression ?: opts.compression;
|
||||
}
|
||||
|
||||
struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
|
||||
bool bch2_opt_is_inode_opt(enum bch_opt_id);
|
||||
|
||||
|
47
fs/bcachefs/quota_format.h
Normal file
47
fs/bcachefs/quota_format.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_QUOTA_FORMAT_H
|
||||
#define _BCACHEFS_QUOTA_FORMAT_H
|
||||
|
||||
/* KEY_TYPE_quota: */
|
||||
|
||||
enum quota_types {
|
||||
QTYP_USR = 0,
|
||||
QTYP_GRP = 1,
|
||||
QTYP_PRJ = 2,
|
||||
QTYP_NR = 3,
|
||||
};
|
||||
|
||||
enum quota_counters {
|
||||
Q_SPC = 0,
|
||||
Q_INO = 1,
|
||||
Q_COUNTERS = 2,
|
||||
};
|
||||
|
||||
struct bch_quota_counter {
|
||||
__le64 hardlimit;
|
||||
__le64 softlimit;
|
||||
};
|
||||
|
||||
struct bch_quota {
|
||||
struct bch_val v;
|
||||
struct bch_quota_counter c[Q_COUNTERS];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* BCH_SB_FIELD_quota: */
|
||||
|
||||
struct bch_sb_quota_counter {
|
||||
__le32 timelimit;
|
||||
__le32 warnlimit;
|
||||
};
|
||||
|
||||
struct bch_sb_quota_type {
|
||||
__le64 flags;
|
||||
struct bch_sb_quota_counter c[Q_COUNTERS];
|
||||
};
|
||||
|
||||
struct bch_sb_field_quota {
|
||||
struct bch_sb_field field;
|
||||
struct bch_sb_quota_type q[QTYP_NR];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#endif /* _BCACHEFS_QUOTA_FORMAT_H */
|
@ -177,8 +177,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
|
||||
prt_str(&buf, "target=");
|
||||
bch2_target_to_text(&buf, c, r->target);
|
||||
prt_str(&buf, " compression=");
|
||||
struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
|
||||
prt_str(&buf, bch2_compression_opts[opt.type]);
|
||||
bch2_compression_opt_to_text(&buf, r->compression);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
@ -254,13 +253,12 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
|
||||
|
||||
if (k.k->p.inode) {
|
||||
target = io_opts->background_target;
|
||||
compression = io_opts->background_compression ?: io_opts->compression;
|
||||
compression = background_compression(*io_opts);
|
||||
} else {
|
||||
const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
|
||||
|
||||
target = r ? r->target : io_opts->background_target;
|
||||
compression = r ? r->compression :
|
||||
(io_opts->background_compression ?: io_opts->compression);
|
||||
compression = r ? r->compression : background_compression(*io_opts);
|
||||
}
|
||||
|
||||
data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
|
||||
@ -371,6 +369,7 @@ static int do_rebalance(struct moving_context *ctxt)
|
||||
!kthread_should_stop() &&
|
||||
!atomic64_read(&r->work_stats.sectors_seen) &&
|
||||
!atomic64_read(&r->scan_stats.sectors_seen)) {
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
bch2_trans_unlock_long(trans);
|
||||
rebalance_wait(c);
|
||||
}
|
||||
@ -385,7 +384,6 @@ static int bch2_rebalance_thread(void *arg)
|
||||
struct bch_fs *c = arg;
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
struct moving_context ctxt;
|
||||
int ret;
|
||||
|
||||
set_freezable();
|
||||
|
||||
@ -393,8 +391,7 @@ static int bch2_rebalance_thread(void *arg)
|
||||
writepoint_ptr(&c->rebalance_write_point),
|
||||
true);
|
||||
|
||||
while (!kthread_should_stop() &&
|
||||
!(ret = do_rebalance(&ctxt)))
|
||||
while (!kthread_should_stop() && !do_rebalance(&ctxt))
|
||||
;
|
||||
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
|
@ -280,7 +280,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
le64_to_cpu(u->v);
|
||||
break;
|
||||
case BCH_FS_USAGE_inodes:
|
||||
c->usage_base->nr_inodes = le64_to_cpu(u->v);
|
||||
c->usage_base->b.nr_inodes = le64_to_cpu(u->v);
|
||||
break;
|
||||
case BCH_FS_USAGE_key_version:
|
||||
atomic64_set(&c->key_version,
|
||||
|
@ -292,10 +292,10 @@ static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *f
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_trans_mark_reflink_v(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s new,
|
||||
unsigned flags)
|
||||
int bch2_trigger_reflink_v(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s new,
|
||||
unsigned flags)
|
||||
{
|
||||
if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
|
||||
(flags & BTREE_TRIGGER_INSERT))
|
||||
@ -324,7 +324,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out,
|
||||
min(datalen, 32U), d.v->data);
|
||||
}
|
||||
|
||||
int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
|
||||
int bch2_trigger_indirect_inline_data(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s new,
|
||||
unsigned flags)
|
||||
@ -486,6 +486,13 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
|
||||
bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot);
|
||||
|
||||
if (dst_inum.inum < src_inum.inum) {
|
||||
/* Avoid some lock cycle transaction restarts */
|
||||
ret = bch2_btree_iter_traverse(&dst_iter);
|
||||
if (ret)
|
||||
continue;
|
||||
}
|
||||
|
||||
dst_done = dst_iter.pos.offset - dst_start.offset;
|
||||
src_want = POS(src_start.inode, src_start.offset + dst_done);
|
||||
bch2_btree_iter_set_pos(&src_iter, src_want);
|
||||
@ -538,9 +545,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
min(src_k.k->p.offset - src_want.offset,
|
||||
dst_end.offset - dst_iter.pos.offset));
|
||||
|
||||
ret = bch2_bkey_set_needs_rebalance(c, new_dst.k,
|
||||
opts.background_target,
|
||||
opts.background_compression) ?:
|
||||
ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?:
|
||||
bch2_extent_update(trans, dst_inum, &dst_iter,
|
||||
new_dst.k, &disk_res,
|
||||
new_i_size, i_sectors_delta,
|
||||
|
@ -24,14 +24,14 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
|
||||
enum bkey_invalid_flags, struct printbuf *);
|
||||
void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
|
||||
int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, struct bkey_s, unsigned);
|
||||
|
||||
#define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \
|
||||
.key_invalid = bch2_reflink_v_invalid, \
|
||||
.val_to_text = bch2_reflink_v_to_text, \
|
||||
.swab = bch2_ptr_swab, \
|
||||
.trigger = bch2_trans_mark_reflink_v, \
|
||||
.trigger = bch2_trigger_reflink_v, \
|
||||
.min_val_size = 8, \
|
||||
})
|
||||
|
||||
@ -39,7 +39,7 @@ int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c,
|
||||
enum bkey_invalid_flags, struct printbuf *);
|
||||
void bch2_indirect_inline_data_to_text(struct printbuf *,
|
||||
struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
|
||||
int bch2_trigger_indirect_inline_data(struct btree_trans *,
|
||||
enum btree_id, unsigned,
|
||||
struct bkey_s_c, struct bkey_s,
|
||||
unsigned);
|
||||
@ -47,7 +47,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
|
||||
#define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \
|
||||
.key_invalid = bch2_indirect_inline_data_invalid, \
|
||||
.val_to_text = bch2_indirect_inline_data_to_text, \
|
||||
.trigger = bch2_trans_mark_indirect_inline_data, \
|
||||
.trigger = bch2_trigger_indirect_inline_data, \
|
||||
.min_val_size = 8, \
|
||||
})
|
||||
|
||||
|
33
fs/bcachefs/reflink_format.h
Normal file
33
fs/bcachefs/reflink_format.h
Normal file
@ -0,0 +1,33 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_REFLINK_FORMAT_H
|
||||
#define _BCACHEFS_REFLINK_FORMAT_H
|
||||
|
||||
struct bch_reflink_p {
|
||||
struct bch_val v;
|
||||
__le64 idx;
|
||||
/*
|
||||
* A reflink pointer might point to an indirect extent which is then
|
||||
* later split (by copygc or rebalance). If we only pointed to part of
|
||||
* the original indirect extent, and then one of the fragments is
|
||||
* outside the range we point to, we'd leak a refcount: so when creating
|
||||
* reflink pointers, we need to store pad values to remember the full
|
||||
* range we were taking a reference on.
|
||||
*/
|
||||
__le32 front_pad;
|
||||
__le32 back_pad;
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_reflink_v {
|
||||
struct bch_val v;
|
||||
__le64 refcount;
|
||||
union bch_extent_entry start[0];
|
||||
__u64 _data[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct bch_indirect_inline_data {
|
||||
struct bch_val v;
|
||||
__le64 refcount;
|
||||
u8 data[];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_REFLINK_FORMAT_H */
|
@ -9,6 +9,12 @@
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
|
||||
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
|
||||
static int bch2_memcmp(const void *l, const void *r, size_t size)
|
||||
{
|
||||
return memcmp(l, r, size);
|
||||
}
|
||||
|
||||
/* Replicas tracking - in memory: */
|
||||
|
||||
static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
|
||||
@ -33,21 +39,16 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
|
||||
}
|
||||
|
||||
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
|
||||
struct bch_replicas_entry_v0 *e)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (e->data_type < BCH_DATA_NR)
|
||||
prt_printf(out, "%s", bch2_data_types[e->data_type]);
|
||||
else
|
||||
prt_printf(out, "(invalid data type %u)", e->data_type);
|
||||
bch2_prt_data_type(out, e->data_type);
|
||||
|
||||
prt_printf(out, ": %u [", e->nr_devs);
|
||||
for (i = 0; i < e->nr_devs; i++)
|
||||
for (unsigned i = 0; i < e->nr_devs; i++)
|
||||
prt_printf(out, i ? " %u" : "%u", e->devs[i]);
|
||||
prt_printf(out, "]");
|
||||
}
|
||||
@ -55,15 +56,10 @@ static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
|
||||
void bch2_replicas_entry_to_text(struct printbuf *out,
|
||||
struct bch_replicas_entry_v1 *e)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (e->data_type < BCH_DATA_NR)
|
||||
prt_printf(out, "%s", bch2_data_types[e->data_type]);
|
||||
else
|
||||
prt_printf(out, "(invalid data type %u)", e->data_type);
|
||||
bch2_prt_data_type(out, e->data_type);
|
||||
|
||||
prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs);
|
||||
for (i = 0; i < e->nr_devs; i++)
|
||||
for (unsigned i = 0; i < e->nr_devs; i++)
|
||||
prt_printf(out, i ? " %u" : "%u", e->devs[i]);
|
||||
prt_printf(out, "]");
|
||||
}
|
||||
@ -831,7 +827,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
memcmp, NULL);
|
||||
bch2_memcmp, NULL);
|
||||
|
||||
for (i = 0; i < cpu_r->nr; i++) {
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
|
@ -207,7 +207,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
|
||||
u->entry.type = BCH_JSET_ENTRY_usage;
|
||||
u->entry.btree_id = BCH_FS_USAGE_inodes;
|
||||
u->v = cpu_to_le64(c->usage_base->nr_inodes);
|
||||
u->v = cpu_to_le64(c->usage_base->b.nr_inodes);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "bcachefs.h"
|
||||
#include "super-io.h"
|
||||
#include "counters.h"
|
||||
#include "sb-counters.h"
|
||||
|
||||
/* BCH_SB_FIELD_counters */
|
||||
|
@ -1,11 +1,10 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_COUNTERS_H
|
||||
#define _BCACHEFS_COUNTERS_H
|
||||
#ifndef _BCACHEFS_SB_COUNTERS_H
|
||||
#define _BCACHEFS_SB_COUNTERS_H
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "super-io.h"
|
||||
|
||||
|
||||
int bch2_sb_counters_to_cpu(struct bch_fs *);
|
||||
int bch2_sb_counters_from_cpu(struct bch_fs *);
|
||||
|
||||
@ -14,4 +13,4 @@ int bch2_fs_counters_init(struct bch_fs *);
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_counters;
|
||||
|
||||
#endif // _BCACHEFS_COUNTERS_H
|
||||
#endif // _BCACHEFS_SB_COUNTERS_H
|
98
fs/bcachefs/sb-counters_format.h
Normal file
98
fs/bcachefs/sb-counters_format.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H
|
||||
#define _BCACHEFS_SB_COUNTERS_FORMAT_H
|
||||
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0) \
|
||||
x(io_write, 1) \
|
||||
x(io_move, 2) \
|
||||
x(bucket_invalidate, 3) \
|
||||
x(bucket_discard, 4) \
|
||||
x(bucket_alloc, 5) \
|
||||
x(bucket_alloc_fail, 6) \
|
||||
x(btree_cache_scan, 7) \
|
||||
x(btree_cache_reap, 8) \
|
||||
x(btree_cache_cannibalize, 9) \
|
||||
x(btree_cache_cannibalize_lock, 10) \
|
||||
x(btree_cache_cannibalize_lock_fail, 11) \
|
||||
x(btree_cache_cannibalize_unlock, 12) \
|
||||
x(btree_node_write, 13) \
|
||||
x(btree_node_read, 14) \
|
||||
x(btree_node_compact, 15) \
|
||||
x(btree_node_merge, 16) \
|
||||
x(btree_node_split, 17) \
|
||||
x(btree_node_rewrite, 18) \
|
||||
x(btree_node_alloc, 19) \
|
||||
x(btree_node_free, 20) \
|
||||
x(btree_node_set_root, 21) \
|
||||
x(btree_path_relock_fail, 22) \
|
||||
x(btree_path_upgrade_fail, 23) \
|
||||
x(btree_reserve_get_fail, 24) \
|
||||
x(journal_entry_full, 25) \
|
||||
x(journal_full, 26) \
|
||||
x(journal_reclaim_finish, 27) \
|
||||
x(journal_reclaim_start, 28) \
|
||||
x(journal_write, 29) \
|
||||
x(read_promote, 30) \
|
||||
x(read_bounce, 31) \
|
||||
x(read_split, 33) \
|
||||
x(read_retry, 32) \
|
||||
x(read_reuse_race, 34) \
|
||||
x(move_extent_read, 35) \
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_fail, 38) \
|
||||
x(move_extent_start_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
x(gc_gens_end, 42) \
|
||||
x(gc_gens_start, 43) \
|
||||
x(trans_blocked_journal_reclaim, 44) \
|
||||
x(trans_restart_btree_node_reused, 45) \
|
||||
x(trans_restart_btree_node_split, 46) \
|
||||
x(trans_restart_fault_inject, 47) \
|
||||
x(trans_restart_iter_upgrade, 48) \
|
||||
x(trans_restart_journal_preres_get, 49) \
|
||||
x(trans_restart_journal_reclaim, 50) \
|
||||
x(trans_restart_journal_res_get, 51) \
|
||||
x(trans_restart_key_cache_key_realloced, 52) \
|
||||
x(trans_restart_key_cache_raced, 53) \
|
||||
x(trans_restart_mark_replicas, 54) \
|
||||
x(trans_restart_mem_realloced, 55) \
|
||||
x(trans_restart_memory_allocation_failure, 56) \
|
||||
x(trans_restart_relock, 57) \
|
||||
x(trans_restart_relock_after_fill, 58) \
|
||||
x(trans_restart_relock_key_cache_fill, 59) \
|
||||
x(trans_restart_relock_next_node, 60) \
|
||||
x(trans_restart_relock_parent_for_fill, 61) \
|
||||
x(trans_restart_relock_path, 62) \
|
||||
x(trans_restart_relock_path_intent, 63) \
|
||||
x(trans_restart_too_many_iters, 64) \
|
||||
x(trans_restart_traverse, 65) \
|
||||
x(trans_restart_upgrade, 66) \
|
||||
x(trans_restart_would_deadlock, 67) \
|
||||
x(trans_restart_would_deadlock_write, 68) \
|
||||
x(trans_restart_injected, 69) \
|
||||
x(trans_restart_key_cache_upgrade, 70) \
|
||||
x(trans_traverse_all, 71) \
|
||||
x(transaction_commit, 72) \
|
||||
x(write_super, 73) \
|
||||
x(trans_restart_would_deadlock_recursion_limit, 74) \
|
||||
x(trans_restart_write_buffer_flush, 75) \
|
||||
x(trans_restart_split_race, 76) \
|
||||
x(write_buffer_flush_slowpath, 77) \
|
||||
x(write_buffer_flush_sync, 78)
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
BCH_COUNTER_NR
|
||||
};
|
||||
|
||||
struct bch_sb_field_counters {
|
||||
struct bch_sb_field field;
|
||||
__le64 d[];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */
|
@ -251,7 +251,7 @@ static void member_to_text(struct printbuf *out,
|
||||
prt_printf(out, "Data allowed:");
|
||||
prt_tab(out);
|
||||
if (BCH_MEMBER_DATA_ALLOWED(&m))
|
||||
prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
|
||||
prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
|
||||
else
|
||||
prt_printf(out, "(none)");
|
||||
prt_newline(out);
|
||||
@ -259,7 +259,7 @@ static void member_to_text(struct printbuf *out,
|
||||
prt_printf(out, "Has data:");
|
||||
prt_tab(out);
|
||||
if (data_have)
|
||||
prt_bitflags(out, bch2_data_types, data_have);
|
||||
prt_bitflags(out, __bch2_data_types, data_have);
|
||||
else
|
||||
prt_printf(out, "(none)");
|
||||
prt_newline(out);
|
||||
|
@ -1053,6 +1053,8 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
|
||||
n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
|
||||
n->v.tree = cpu_to_le32(tree);
|
||||
n->v.depth = cpu_to_le32(depth);
|
||||
n->v.btime.lo = cpu_to_le64(bch2_current_time(c));
|
||||
n->v.btime.hi = 0;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
|
||||
n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
|
||||
@ -1681,5 +1683,5 @@ int bch2_snapshots_read(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_snapshots_exit(struct bch_fs *c)
|
||||
{
|
||||
kfree(rcu_dereference_protected(c->snapshots, true));
|
||||
kvfree(rcu_dereference_protected(c->snapshots, true));
|
||||
}
|
||||
|
36
fs/bcachefs/snapshot_format.h
Normal file
36
fs/bcachefs/snapshot_format.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_SNAPSHOT_FORMAT_H
|
||||
#define _BCACHEFS_SNAPSHOT_FORMAT_H
|
||||
|
||||
struct bch_snapshot {
|
||||
struct bch_val v;
|
||||
__le32 flags;
|
||||
__le32 parent;
|
||||
__le32 children[2];
|
||||
__le32 subvol;
|
||||
/* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
|
||||
__le32 tree;
|
||||
__le32 depth;
|
||||
__le32 skip[3];
|
||||
bch_le128 btime;
|
||||
};
|
||||
|
||||
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
|
||||
|
||||
/* True if a subvolume points to this snapshot node: */
|
||||
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
|
||||
|
||||
/*
|
||||
* Snapshot trees:
|
||||
*
|
||||
* The snapshot_trees btree gives us persistent indentifier for each tree of
|
||||
* bch_snapshot nodes, and allow us to record and easily find the root/master
|
||||
* subvolume that other snapshots were created from:
|
||||
*/
|
||||
struct bch_snapshot_tree {
|
||||
struct bch_val v;
|
||||
__le32 master_subvol;
|
||||
__le32 root_snapshot;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_SNAPSHOT_FORMAT_H */
|
35
fs/bcachefs/subvolume_format.h
Normal file
35
fs/bcachefs/subvolume_format.h
Normal file
@ -0,0 +1,35 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_SUBVOLUME_FORMAT_H
|
||||
#define _BCACHEFS_SUBVOLUME_FORMAT_H
|
||||
|
||||
#define SUBVOL_POS_MIN POS(0, 1)
|
||||
#define SUBVOL_POS_MAX POS(0, S32_MAX)
|
||||
#define BCACHEFS_ROOT_SUBVOL 1
|
||||
|
||||
struct bch_subvolume {
|
||||
struct bch_val v;
|
||||
__le32 flags;
|
||||
__le32 snapshot;
|
||||
__le64 inode;
|
||||
/*
|
||||
* Snapshot subvolumes form a tree, separate from the snapshot nodes
|
||||
* tree - if this subvolume is a snapshot, this is the ID of the
|
||||
* subvolume it was created from:
|
||||
*
|
||||
* This is _not_ necessarily the subvolume of the directory containing
|
||||
* this subvolume:
|
||||
*/
|
||||
__le32 parent;
|
||||
__le32 pad;
|
||||
bch_le128 otime;
|
||||
};
|
||||
|
||||
LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1)
|
||||
/*
|
||||
* We need to know whether a subvolume is a snapshot so we can know whether we
|
||||
* can delete it (or whether it should just be rm -rf'd)
|
||||
*/
|
||||
LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2)
|
||||
LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3)
|
||||
|
||||
#endif /* _BCACHEFS_SUBVOLUME_FORMAT_H */
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "checksum.h"
|
||||
#include "counters.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
@ -13,6 +12,7 @@
|
||||
#include "replicas.h"
|
||||
#include "quota.h"
|
||||
#include "sb-clean.h"
|
||||
#include "sb-counters.h"
|
||||
#include "sb-downgrade.h"
|
||||
#include "sb-errors.h"
|
||||
#include "sb-members.h"
|
||||
@ -1321,7 +1321,9 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
|
||||
prt_printf(out, "Superblock size:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%zu", vstruct_bytes(sb));
|
||||
prt_units_u64(out, vstruct_bytes(sb));
|
||||
prt_str(out, "/");
|
||||
prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "Clean:");
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include "checksum.h"
|
||||
#include "clock.h"
|
||||
#include "compress.h"
|
||||
#include "counters.h"
|
||||
#include "debug.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
@ -49,6 +48,7 @@
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "sb-clean.h"
|
||||
#include "sb-counters.h"
|
||||
#include "sb-errors.h"
|
||||
#include "sb-members.h"
|
||||
#include "snapshot.h"
|
||||
@ -883,7 +883,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
|
||||
!(c->online_reserved = alloc_percpu(u64)) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
btree_bytes(c)) ||
|
||||
c->opts.btree_node_size) ||
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
|
||||
!(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
|
||||
sizeof(u64), GFP_KERNEL))) {
|
||||
@ -1625,7 +1625,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
if (data) {
|
||||
struct printbuf data_has = PRINTBUF;
|
||||
|
||||
prt_bitflags(&data_has, bch2_data_types, data);
|
||||
prt_bitflags(&data_has, __bch2_data_types, data);
|
||||
bch_err(ca, "Remove failed, still has data (%s)", data_has.buf);
|
||||
printbuf_exit(&data_has);
|
||||
ret = -EBUSY;
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "btree_gc.h"
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "compress.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
#include "inode.h"
|
||||
@ -247,7 +248,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_for_each_entry(b, &c->btree_cache.live, list)
|
||||
ret += btree_bytes(c);
|
||||
ret += btree_buf_bytes(b);
|
||||
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
return ret;
|
||||
@ -330,7 +331,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
|
||||
prt_str(out, bch2_compression_types[i]);
|
||||
bch2_prt_compression_type(out, i);
|
||||
prt_tab(out);
|
||||
|
||||
prt_human_readable_u64(out, s[i].sectors_compressed << 9);
|
||||
@ -725,8 +726,10 @@ STORE(bch2_fs_opts_dir)
|
||||
bch2_opt_set_sb(c, opt, v);
|
||||
bch2_opt_set_by_id(&c->opts, id, v);
|
||||
|
||||
if ((id == Opt_background_target ||
|
||||
id == Opt_background_compression) && v)
|
||||
if (v &&
|
||||
(id == Opt_background_target ||
|
||||
id == Opt_background_compression ||
|
||||
(id == Opt_compression && !c->opts.background_compression)))
|
||||
bch2_set_rebalance_needs_scan(c, 0);
|
||||
|
||||
ret = size;
|
||||
@ -883,7 +886,7 @@ static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
|
||||
for (i = 1; i < BCH_DATA_NR; i++)
|
||||
prt_printf(out, "%-12s:%12llu\n",
|
||||
bch2_data_types[i],
|
||||
bch2_data_type_str(i),
|
||||
percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
|
||||
}
|
||||
}
|
||||
@ -908,7 +911,7 @@ SHOW(bch2_dev)
|
||||
}
|
||||
|
||||
if (attr == &sysfs_has_data) {
|
||||
prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca));
|
||||
prt_bitflags(out, __bch2_data_types, bch2_dev_has_data(c, ca));
|
||||
prt_char(out, '\n');
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,7 @@ DECLARE_EVENT_CLASS(fs_str,
|
||||
__assign_str(str, str);
|
||||
),
|
||||
|
||||
TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
|
||||
TP_printk("%d,%d\n%s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(trans_str,
|
||||
@ -273,28 +273,14 @@ DEFINE_EVENT(bch_fs, journal_full,
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, journal_entry_full,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
DEFINE_EVENT(fs_str, journal_entry_full,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
TRACE_EVENT(journal_entry_close,
|
||||
TP_PROTO(struct bch_fs *c, unsigned bytes),
|
||||
TP_ARGS(c, bytes),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(u32, bytes )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__entry->bytes = bytes;
|
||||
),
|
||||
|
||||
TP_printk("%d,%d entry bytes %u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->bytes)
|
||||
DEFINE_EVENT(fs_str, journal_entry_close,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, journal_write,
|
||||
@ -542,7 +528,7 @@ TRACE_EVENT(btree_path_relock_fail,
|
||||
__entry->level = path->level;
|
||||
TRACE_BPOS_assign(pos, path->pos);
|
||||
|
||||
c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
|
||||
c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level);
|
||||
__entry->self_read_count = c.n[SIX_LOCK_read];
|
||||
__entry->self_intent_count = c.n[SIX_LOCK_intent];
|
||||
|
||||
@ -827,40 +813,28 @@ TRACE_EVENT(bucket_evacuate,
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_read,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_write,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_finish,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
TRACE_EVENT(move_extent_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *msg),
|
||||
TP_ARGS(c, msg),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__string(msg, msg )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__assign_str(msg, msg);
|
||||
),
|
||||
|
||||
TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
|
||||
DEFINE_EVENT(fs_str, move_extent_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_start_fail,
|
||||
@ -1039,7 +1013,7 @@ TRACE_EVENT(trans_restart_split_race,
|
||||
__entry->level = b->c.level;
|
||||
__entry->written = b->written;
|
||||
__entry->blocks = btree_blocks(trans->c);
|
||||
__entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b);
|
||||
__entry->u64s_remaining = bch2_btree_keys_u64s_remaining(b);
|
||||
),
|
||||
|
||||
TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
|
||||
@ -1146,8 +1120,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
);
|
||||
|
||||
struct get_locks_fail;
|
||||
|
||||
TRACE_EVENT(trans_restart_upgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
@ -1195,11 +1167,9 @@ TRACE_EVENT(trans_restart_upgrade,
|
||||
__entry->node_seq)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path),
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
DEFINE_EVENT(trans_str, trans_restart_relock,
|
||||
TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str),
|
||||
TP_ARGS(trans, caller_ip, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node,
|
||||
|
@ -241,12 +241,17 @@ bool bch2_is_zero(const void *_p, size_t n)
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
|
||||
void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits)
|
||||
{
|
||||
while (nr_bits)
|
||||
prt_char(out, '0' + ((v >> --nr_bits) & 1));
|
||||
}
|
||||
|
||||
void bch2_prt_u64_base2(struct printbuf *out, u64 v)
|
||||
{
|
||||
bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
|
||||
}
|
||||
|
||||
void bch2_print_string_as_lines(const char *prefix, const char *lines)
|
||||
{
|
||||
const char *p;
|
||||
@ -1186,7 +1191,9 @@ int bch2_split_devs(const char *_dev_name, darray_str *ret)
|
||||
{
|
||||
darray_init(ret);
|
||||
|
||||
char *dev_name = kstrdup(_dev_name, GFP_KERNEL), *s = dev_name;
|
||||
char *dev_name, *s, *orig;
|
||||
|
||||
dev_name = orig = kstrdup(_dev_name, GFP_KERNEL);
|
||||
if (!dev_name)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1201,10 +1208,10 @@ int bch2_split_devs(const char *_dev_name, darray_str *ret)
|
||||
}
|
||||
}
|
||||
|
||||
kfree(dev_name);
|
||||
kfree(orig);
|
||||
return 0;
|
||||
err:
|
||||
bch2_darray_str_exit(ret);
|
||||
kfree(dev_name);
|
||||
kfree(orig);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -342,7 +342,8 @@ bool bch2_is_zero(const void *, size_t);
|
||||
|
||||
u64 bch2_read_flag_list(char *, const char * const[]);
|
||||
|
||||
void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
|
||||
void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
|
||||
void bch2_prt_u64_base2(struct printbuf *, u64);
|
||||
|
||||
void bch2_print_string_as_lines(const char *prefix, const char *lines);
|
||||
|
||||
|
@ -590,8 +590,9 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
if (value &&
|
||||
(opt_id == Opt_background_compression ||
|
||||
opt_id == Opt_background_target))
|
||||
(opt_id == Opt_background_target ||
|
||||
opt_id == Opt_background_compression ||
|
||||
(opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression))))
|
||||
bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
|
||||
|
||||
return bch2_err_class(ret);
|
||||
|
19
fs/bcachefs/xattr_format.h
Normal file
19
fs/bcachefs/xattr_format.h
Normal file
@ -0,0 +1,19 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_XATTR_FORMAT_H
|
||||
#define _BCACHEFS_XATTR_FORMAT_H
|
||||
|
||||
#define KEY_TYPE_XATTR_INDEX_USER 0
|
||||
#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1
|
||||
#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
|
||||
#define KEY_TYPE_XATTR_INDEX_TRUSTED 3
|
||||
#define KEY_TYPE_XATTR_INDEX_SECURITY 4
|
||||
|
||||
struct bch_xattr {
|
||||
struct bch_val v;
|
||||
__u8 x_type;
|
||||
__u8 x_name_len;
|
||||
__le16 x_val_len;
|
||||
__u8 x_name[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#endif /* _BCACHEFS_XATTR_FORMAT_H */
|
Loading…
Reference in New Issue
Block a user