bcachefs: Start using bpos.snapshot field

This patch starts treating the bpos.snapshot field like part of the key
in the btree code:

* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
  and xattrs) now always have their snapshot field set to U32_MAX

The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.

We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).

This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2021-03-24 18:02:16 -04:00 committed by Kent Overstreet
parent 4cf91b0270
commit e751c01a8e
22 changed files with 252 additions and 74 deletions

View File

@ -142,19 +142,18 @@ struct bpos {
#define KEY_SNAPSHOT_MAX ((__u32)~0U)
#define KEY_SIZE_MAX ((__u32)~0U)
static inline struct bpos POS(__u64 inode, __u64 offset)
static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
{
struct bpos ret;
ret.inode = inode;
ret.offset = offset;
ret.snapshot = 0;
return ret;
return (struct bpos) {
.inode = inode,
.offset = offset,
.snapshot = snapshot,
};
}
#define POS_MIN POS(0, 0)
#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
#define POS_MIN SPOS(0, 0, 0)
#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
/* Empty placeholder struct, for container_of() */
struct bch_val {
@ -1208,7 +1207,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_new_versioning = 10,
bcachefs_metadata_version_bkey_renumber = 10,
bcachefs_metadata_version_inode_btree_change = 11,
bcachefs_metadata_version_max = 12,
bcachefs_metadata_version_snapshot = 12,
bcachefs_metadata_version_max = 13,
};
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@ -1749,7 +1749,7 @@ struct btree_node {
/* Closed interval: */
struct bpos min_key;
struct bpos max_key;
struct bch_extent_ptr ptr;
struct bch_extent_ptr _ptr; /* not used anymore */
struct bkey_format format;
union {

View File

@ -617,15 +617,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
return "incorrect number of fields";
for (i = 0; i < f->nr_fields; i++) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
u64 field_offset = le64_to_cpu(f->field_offset[i]);
if (f->bits_per_field[i] > 64)
if (f->bits_per_field[i] > unpacked_bits)
return "field too large";
if (field_offset &&
(f->bits_per_field[i] == 64 ||
(field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
field_offset)))
if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
return "offset + bits overflow";
if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
unpacked_mask) <
field_offset)
return "offset + bits overflow";
bits += f->bits_per_field[i];
@ -1126,11 +1130,12 @@ void bch2_bkey_pack_test(void)
struct bkey_packed p;
struct bkey_format test_format = {
.key_u64s = 2,
.key_u64s = 3,
.nr_fields = BKEY_NR_FIELDS,
.bits_per_field = {
13,
64,
32,
},
};

View File

@ -258,24 +258,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
format->bits_per_field[BKEY_FIELD_SNAPSHOT];
}
static inline struct bpos bkey_successor(struct bpos p)
static inline struct bpos bpos_successor(struct bpos p)
{
struct bpos ret = p;
if (!++p.snapshot &&
!++p.offset &&
!++p.inode)
BUG();
if (!++ret.offset)
BUG_ON(!++ret.inode);
return ret;
return p;
}
static inline struct bpos bkey_predecessor(struct bpos p)
static inline struct bpos bpos_predecessor(struct bpos p)
{
struct bpos ret = p;
if (!p.snapshot-- &&
!p.offset-- &&
!p.inode--)
BUG();
if (!ret.offset--)
BUG_ON(!ret.inode--);
return p;
}
return ret;
static inline struct bpos bpos_nosnap_successor(struct bpos p)
{
p.snapshot = 0;
if (!++p.offset &&
!++p.inode)
BUG();
return p;
}
static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
{
p.snapshot = 0;
if (!p.offset-- &&
!p.inode--)
BUG();
return p;
}
static inline u64 bkey_start_offset(const struct bkey *k)

View File

@ -119,9 +119,16 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
return "nonzero size field";
}
if (k.k->p.snapshot)
if (type != BKEY_TYPE_btree &&
!btree_type_has_snapshots(type) &&
k.k->p.snapshot)
return "nonzero snapshot";
if (type != BKEY_TYPE_btree &&
btree_type_has_snapshots(type) &&
k.k->p.snapshot != U32_MAX)
return "invalid snapshot field";
if (type != BKEY_TYPE_btree &&
!bkey_cmp(k.k->p, POS_MAX))
return "POS_MAX key";
@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
const struct bkey_ops *ops;
struct bkey uk;
struct bkey_s u;
unsigned nr_compat = 5;
int i;
/*
* Do these operations in reverse order in the write path:
*/
for (i = 0; i < 4; i++)
switch (!write ? i : 3 - i) {
for (i = 0; i < nr_compat; i++)
switch (!write ? i : nr_compat - 1 - i) {
case 0:
if (big_endian != CPU_BIG_ENDIAN)
bch2_bkey_swab_key(f, k);
@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
}
break;
case 3:
if (version < bcachefs_metadata_version_snapshot &&
(level || btree_type_has_snapshots(btree_id))) {
struct bkey_i *u = packed_to_bkey(k);
if (u) {
u->k.p.snapshot = write
? 0 : U32_MAX;
} else {
u64 min_packed = f->field_offset[BKEY_FIELD_SNAPSHOT];
u64 max_packed = min_packed +
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
uk = __bch2_bkey_unpack_key(f, k);
uk.p.snapshot = write
? min_packed : min_t(u64, U32_MAX, max_packed);
BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
}
}
break;
case 4:
if (!bkey_packed(k)) {
u = bkey_i_to_s(packed_to_bkey(k));
} else {

View File

@ -1438,7 +1438,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
* to the search key is going to have 0 sectors after the search key.
*
* But this does mean that we can't just search for
* bkey_successor(start_of_range) to get the first extent that overlaps with
* bpos_successor(start_of_range) to get the first extent that overlaps with
* the range we want - if we're unlucky and there's an extent that ends
* exactly where we searched, then there could be a deleted key at the same
* position and we'd get that when we search instead of the preceding extent

View File

@ -1018,7 +1018,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (sib != btree_prev_sib)
swap(n1, n2);
if (bpos_cmp(bkey_successor(n1->key.k.p),
if (bpos_cmp(bpos_successor(n1->key.k.p),
n2->data->min_key)) {
char buf1[200], buf2[200];

View File

@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
struct bpos node_end = b->data->max_key;
struct bpos expected_start = bkey_deleted(&prev->k->k)
? node_start
: bkey_successor(prev->k->k.p);
: bpos_successor(prev->k->k.p);
char buf1[200], buf2[200];
bool update_min = false;
bool update_max = false;
@ -1187,7 +1187,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
BTREE_ITER_PREFETCH);
BTREE_ITER_PREFETCH|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS);
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
@ -1405,7 +1407,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
n1->key.k.p = n1->data->max_key =
bkey_unpack_pos(n1, last);
n2->data->min_key = bkey_successor(n1->data->max_key);
n2->data->min_key = bpos_successor(n1->data->max_key);
memcpy_u64s(vstruct_last(s1),
s2->start, u64s);

View File

@ -612,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect level");
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
u64 *p = (u64 *) &bn->ptr;
*p = swab64(*p);
}
if (!write)
compat_btree_node(b->c.level, b->c.btree_id, version,
BSET_BIG_ENDIAN(i), write, bn);
@ -1328,8 +1322,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree))
return -1;
ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?:
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
validate_bset(c, NULL, b, i, sectors, WRITE, false);
if (ret) {
bch2_inconsistent_error(c);
dump_stack();
@ -1482,7 +1476,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
validate_before_checksum = true;
/* validate_bset will be modifying: */
if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change)
if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */

View File

@ -189,8 +189,8 @@ void bch2_btree_flush_all_writes(struct bch_fs *);
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian,
int write, struct bkey_format *f)
unsigned version, unsigned big_endian,
int write, struct bkey_format *f)
{
if (version < bcachefs_metadata_version_inode_btree_change &&
btree_id == BTREE_ID_inodes) {
@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id,
swap(f->field_offset[BKEY_FIELD_INODE],
f->field_offset[BKEY_FIELD_OFFSET]);
}
if (version < bcachefs_metadata_version_snapshot &&
(level || btree_type_has_snapshots(btree_id))) {
u64 max_packed =
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
f->field_offset[BKEY_FIELD_SNAPSHOT] = write
? 0
: U32_MAX - max_packed;
}
}
static inline void compat_bpos(unsigned level, enum btree_id btree_id,
@ -222,16 +232,24 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
btree_node_type_is_extents(btree_id) &&
bpos_cmp(bn->min_key, POS_MIN) &&
write)
bn->min_key = bkey_predecessor(bn->min_key);
bn->min_key = bpos_nosnap_predecessor(bn->min_key);
if (version < bcachefs_metadata_version_snapshot &&
write)
bn->max_key.snapshot = 0;
compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
if (version < bcachefs_metadata_version_snapshot &&
!write)
bn->max_key.snapshot = U32_MAX;
if (version < bcachefs_metadata_version_inode_btree_change &&
btree_node_type_is_extents(btree_id) &&
bpos_cmp(bn->min_key, POS_MIN) &&
!write)
bn->min_key = bkey_successor(bn->min_key);
bn->min_key = bpos_nosnap_successor(bn->min_key);
}
#endif /* _BCACHEFS_BTREE_IO_H */

View File

@ -18,6 +18,36 @@
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
/* Are we iterating over keys in all snapshots? */
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
p = bpos_successor(p);
} else {
p = bpos_nosnap_successor(p);
p.snapshot = iter->snapshot;
}
return p;
}
static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
/* Are we iterating over keys in all snapshots? */
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
p = bpos_predecessor(p);
} else {
p = bpos_nosnap_predecessor(p);
p.snapshot = iter->snapshot;
}
return p;
}
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{
return l < BTREE_MAX_DEPTH &&
@ -30,7 +60,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
bkey_cmp(pos, POS_MAX))
pos = bkey_successor(pos);
pos = bkey_successor(iter, pos);
return pos;
}
@ -591,10 +621,24 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
static void bch2_btree_iter_verify(struct btree_iter *iter)
{
enum btree_iter_type type = btree_iter_type(iter);
unsigned i;
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
iter->pos.snapshot != iter->snapshot);
BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
BUG_ON(type == BTREE_ITER_NODES &&
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
BUG_ON(type != BTREE_ITER_NODES &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
!btree_type_has_snapshots(iter->btree_id));
bch2_btree_iter_verify_locks(iter);
for (i = 0; i < BTREE_MAX_DEPTH; i++)
@ -605,6 +649,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{
enum btree_iter_type type = btree_iter_type(iter);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
iter->pos.snapshot != iter->snapshot);
BUG_ON((type == BTREE_ITER_KEYS ||
type == BTREE_ITER_CACHED) &&
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
@ -1434,7 +1481,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
* Haven't gotten to the end of the parent node: go back down to
* the next child node
*/
btree_iter_set_search_pos(iter, bkey_successor(iter->pos));
btree_iter_set_search_pos(iter, bpos_successor(iter->pos));
/* Unlock to avoid screwing up our lock invariants: */
btree_node_unlock(iter, iter->level);
@ -1508,7 +1555,7 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
bool ret = bpos_cmp(pos, POS_MAX) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_successor(pos);
pos = bkey_successor(iter, pos);
bch2_btree_iter_set_pos(iter, pos);
return ret;
}
@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
bool ret = bpos_cmp(pos, POS_MIN) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_predecessor(pos);
pos = bkey_predecessor(iter, pos);
bch2_btree_iter_set_pos(iter, pos);
return ret;
}
@ -1535,7 +1582,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
* btree, in that case we want iter->pos to reflect that:
*/
if (ret)
btree_iter_set_search_pos(iter, bkey_successor(next_pos));
btree_iter_set_search_pos(iter, bpos_successor(next_pos));
else
bch2_btree_iter_set_pos(iter, POS_MAX);
@ -1548,7 +1595,7 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
bool ret = bpos_cmp(next_pos, POS_MIN) != 0;
if (ret)
btree_iter_set_search_pos(iter, bkey_predecessor(next_pos));
btree_iter_set_search_pos(iter, bpos_predecessor(next_pos));
else
bch2_btree_iter_set_pos(iter, POS_MIN);
@ -1594,13 +1641,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi
k = btree_iter_level_peek(iter, &iter->l[0]);
if (next_update &&
bkey_cmp(next_update->k.p, iter->real_pos) <= 0)
bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
k = bkey_i_to_s_c(next_update);
if (likely(k.k)) {
if (bkey_deleted(k.k)) {
btree_iter_set_search_pos(iter,
bkey_successor(k.k->p));
bkey_successor(iter, k.k->p));
continue;
}
@ -1739,7 +1786,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
}
pos = iter->pos;
@ -1973,6 +2020,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
{
struct btree_iter *iter, *best = NULL;
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
!btree_type_has_snapshots(btree_id))
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
pos.snapshot = btree_type_has_snapshots(btree_id)
? U32_MAX : 0;
/* We always want a fresh iterator for node iterators: */
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
goto alloc_iter;
@ -2007,11 +2062,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS))
!(flags & BTREE_ITER_NOT_EXTENTS) &&
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_IS_EXTENTS;
iter->flags = flags;
iter->snapshot = pos.snapshot;
if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want)
@ -2034,6 +2092,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
__bch2_trans_get_iter(trans, btree_id, pos,
BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
flags);
unsigned i;

View File

@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *);
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
new_pos.snapshot = iter->snapshot;
bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos;
}

View File

@ -216,6 +216,7 @@ enum btree_iter_type {
#define BTREE_ITER_CACHED_NOFILL (1 << 9)
#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
#define BTREE_ITER_NOT_EXTENTS (1 << 11)
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@ -245,6 +246,8 @@ struct btree_iter {
/* what we're searching for/what the iterator actually points to: */
struct bpos real_pos;
struct bpos pos_after_commit;
/* When we're filtering by snapshot, the snapshot ID we're looking for: */
unsigned snapshot;
u16 flags;
u8 idx;
@ -329,7 +332,7 @@ struct bkey_cached {
struct btree_insert_entry {
unsigned trigger_flags;
u8 bkey_type;
u8 btree_id;
enum btree_id btree_id:8;
u8 level;
unsigned trans_triggers_run:1;
unsigned is_extent:1;
@ -610,6 +613,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
#define BTREE_ID_HAS_SNAPSHOTS \
((1U << BTREE_ID_extents)| \
(1U << BTREE_ID_inodes)| \
(1U << BTREE_ID_dirents)| \
(1U << BTREE_ID_xattrs))
static inline bool btree_type_has_snapshots(enum btree_id id)
{
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
}
enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */

View File

@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
break;
}
next_node = bkey_successor(k.k->p);
next_node = bpos_successor(k.k->p);
}
#endif
}
@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b->data->flags = 0;
SET_BTREE_NODE_ID(b->data, as->btree_id);
SET_BTREE_NODE_LEVEL(b->data, level);
b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
@ -1100,6 +1099,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct btree *n2;
struct bset *set1, *set2;
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level);
bch2_btree_update_add_new_node(as, n2);
@ -1146,8 +1146,12 @@ static struct btree *__btree_split_node(struct btree_update *as,
n1->nr.packed_keys = nr_packed;
n1->nr.unpacked_keys = nr_unpacked;
btree_set_max(n1, bkey_unpack_pos(n1, prev));
btree_set_min(n2, bkey_successor(n1->key.k.p));
n1_pos = bkey_unpack_pos(n1, prev);
if (as->c->sb.version < bcachefs_metadata_version_snapshot)
n1_pos.snapshot = U32_MAX;
btree_set_max(n1, n1_pos);
btree_set_min(n2, bpos_successor(n1->key.k.p));
bch2_bkey_format_init(&s);
bch2_bkey_format_add_pos(&s, n2->data->min_key);

View File

@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
BUG_ON(bch2_debug_check_bkeys &&
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type));
BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos));
if (bch2_debug_check_bkeys) {
const char *invalid = bch2_bkey_invalid(c,
bkey_i_to_s_c(i->k), i->bkey_type);
if (invalid) {
char buf[200];
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
panic("invalid bkey %s on insert: %s\n", buf, invalid);
}
}
BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id);
}

View File

@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0);
iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
iter = bch2_trans_get_iter(&trans, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
k = bch2_btree_iter_peek(iter);
while (k.k && !(err = bkey_err(k))) {
@ -290,7 +292,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
* all nodes, meh
*/
i->from = bpos_cmp(POS_MAX, b->key.k.p)
? bkey_successor(b->key.k.p)
? bpos_successor(b->key.k.p)
: b->key.k.p;
if (!i->size)

View File

@ -179,7 +179,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big";
if (bp.v->min_key.snapshot)
if (c->sb.version < bcachefs_metadata_version_snapshot &&
bp.v->min_key.snapshot)
return "invalid min_key.snapshot";
return bch2_bkey_ptrs_invalid(c, k);
@ -211,8 +212,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
btree_node_type_is_extents(btree_id) &&
bkey_cmp(bp.v->min_key, POS_MIN))
bp.v->min_key = write
? bkey_predecessor(bp.v->min_key)
: bkey_successor(bp.v->min_key);
? bpos_nosnap_predecessor(bp.v->min_key)
: bpos_nosnap_successor(bp.v->min_key);
}
/* KEY_TYPE_extent: */

View File

@ -1318,6 +1318,7 @@ static int check_inode(struct btree_trans *trans,
struct bkey_inode_buf p;
bch2_inode_pack(c, &p, &u);
p.inode.k.p = iter->pos;
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|

View File

@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans,
return PTR_ERR(inode_p);
bch2_inode_pack(trans->c, inode_p, inode);
inode_p->inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0;
}

View File

@ -332,6 +332,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u);
inode_p.inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0);
}
@ -447,6 +450,8 @@ int bch2_write_index_default(struct bch_write_op *op)
k = bch2_keylist_front(keys);
k->k.p.snapshot = iter->snapshot;
bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
bkey_copy(sk.k, k);
bch2_cut_front(iter->pos, sk.k);

View File

@ -1449,7 +1449,7 @@ void bch2_journal_write(struct closure *cl)
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true;
if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change)
if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
validate_before_checksum = true;
if (validate_before_checksum &&

View File

@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
ret = -EINVAL;
goto err;
}
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
bch_info(c, "alloc_v2 feature bit not set, fsck required");
c->opts.fsck = true;
@ -1340,6 +1347,7 @@ int bch2_fs_initialize(struct bch_fs *c)
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(c, &packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_inodes,

View File

@ -483,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
for (i = 0; i < nr; i++) {
bkey_cookie_init(&k.k_i);
k.k.p.offset = test_rand();
k.k.p.snapshot = U32_MAX;
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));